local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,506 @@
1
+ """
2
+ Parallel constrained search strategy with progressive constraint relaxation.
3
+
4
+ Key improvements:
5
+ 1. Combines multiple constraints in initial searches
6
+ 2. Runs searches in parallel for efficiency
7
+ 3. Progressively loosens constraints if needed
8
+ 4. Compact design to minimize context usage
9
+ """
10
+
11
+ import concurrent.futures
12
+ from dataclasses import dataclass
13
+ from typing import List
14
+
15
+ from loguru import logger
16
+
17
+ from ..candidates.base_candidate import Candidate
18
+ from ..constraints.base_constraint import Constraint, ConstraintType
19
+ from .constrained_search_strategy import ConstrainedSearchStrategy
20
+
21
+
22
+ @dataclass
23
+ class SearchCombination:
24
+ """Represents a combination of constraints for searching."""
25
+
26
+ constraints: List[Constraint]
27
+ query: str
28
+ priority: int
29
+
30
+ def __hash__(self):
31
+ return hash(self.query)
32
+
33
+
34
+ class ParallelConstrainedStrategy(ConstrainedSearchStrategy):
35
+ """
36
+ Enhanced constrained strategy with parallel search and smart constraint combination.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ *args,
42
+ parallel_workers: int = 100,
43
+ min_results_threshold: int = 10,
44
+ **kwargs,
45
+ ):
46
+ super().__init__(*args, **kwargs)
47
+ self.parallel_workers = parallel_workers
48
+ self.min_results_threshold = min_results_threshold
49
+
50
+ # Define hard constraints that must be satisfied
51
+ self.hard_constraint_keywords = [
52
+ "aired between",
53
+ "aired during",
54
+ "air date",
55
+ "broadcast",
56
+ "episodes",
57
+ "season",
58
+ "year",
59
+ "decade",
60
+ "male",
61
+ "female",
62
+ "gender",
63
+ "tv show",
64
+ "series",
65
+ "program",
66
+ ]
67
+
68
+ def _classify_constraints(self):
69
+ """Classify constraints into hard (must satisfy) and soft (scoring) categories."""
70
+ self.hard_constraints = []
71
+ self.soft_constraints = []
72
+
73
+ for constraint in self.constraint_ranking:
74
+ # Check if constraint is hard based on keywords and type
75
+ is_hard = False
76
+
77
+ # Temporal and statistic constraints are usually hard
78
+ if constraint.type in [
79
+ ConstraintType.TEMPORAL,
80
+ ConstraintType.STATISTIC,
81
+ ]:
82
+ is_hard = True
83
+
84
+ # Check for hard constraint keywords
85
+ constraint_text = constraint.value.lower()
86
+ for keyword in self.hard_constraint_keywords:
87
+ if keyword in constraint_text:
88
+ is_hard = True
89
+ break
90
+
91
+ if is_hard:
92
+ self.hard_constraints.append(constraint)
93
+ else:
94
+ self.soft_constraints.append(constraint)
95
+
96
+ logger.info(
97
+ f"Classified {len(self.hard_constraints)} hard constraints and {len(self.soft_constraints)} soft constraints"
98
+ )
99
+
100
+ def _progressive_constraint_search(self):
101
+ """Override parent method with parallel, combined constraint search."""
102
+ current_candidates = []
103
+ search_iterations = 0
104
+ max_search_iterations = 3
105
+
106
+ # Check if constraint_ranking is available
107
+ if (
108
+ not hasattr(self, "constraint_ranking")
109
+ or not self.constraint_ranking
110
+ ):
111
+ logger.error(
112
+ "No constraint ranking available - calling parent method"
113
+ )
114
+ return super()._progressive_constraint_search()
115
+
116
+ # Detect what type of entity we're looking for
117
+ self.entity_type = self._detect_entity_type()
118
+ logger.info(f"Detected entity type: {self.entity_type}")
119
+
120
+ logger.info(
121
+ f"Starting parallel constraint search with {len(self.constraint_ranking)} constraints"
122
+ )
123
+ logger.info(
124
+ f"Constraint ranking: {[c.value for c in self.constraint_ranking[:5]]}"
125
+ )
126
+
127
+ while search_iterations < max_search_iterations:
128
+ search_iterations += 1
129
+
130
+ # Phase 1: Combined constraints (strict)
131
+ if search_iterations == 1:
132
+ combinations = self._create_strict_combinations()
133
+ strictness = "strict"
134
+ # Phase 2: Relaxed combinations
135
+ elif search_iterations == 2:
136
+ combinations = self._create_relaxed_combinations()
137
+ strictness = "relaxed"
138
+ # Phase 3: Individual constraints (fallback)
139
+ else:
140
+ combinations = self._create_individual_combinations()
141
+ strictness = "individual"
142
+
143
+ logger.info(
144
+ f"Iteration {search_iterations}: {strictness} mode with {len(combinations)} combinations"
145
+ )
146
+
147
+ # Log the actual combinations
148
+ for i, combo in enumerate(combinations):
149
+ logger.info(
150
+ f" Combination {i + 1}: query='{combo.query[:60]}...', constraints={len(combo.constraints)}"
151
+ )
152
+
153
+ if self.progress_callback:
154
+ self.progress_callback(
155
+ f"Search iteration {search_iterations}: {strictness} mode ({len(combinations)} combinations)",
156
+ 15 + (search_iterations * 25),
157
+ {
158
+ "phase": "parallel_search",
159
+ "iteration": search_iterations,
160
+ "combinations": len(combinations),
161
+ "mode": strictness,
162
+ },
163
+ )
164
+
165
+ # Run searches in parallel
166
+ new_candidates = self._parallel_search(combinations)
167
+ current_candidates.extend(new_candidates)
168
+
169
+ # Check if we have enough results
170
+ unique_candidates = self._deduplicate_candidates(current_candidates)
171
+
172
+ if len(unique_candidates) >= self.min_results_threshold:
173
+ if self.progress_callback:
174
+ self.progress_callback(
175
+ f"Found {len(unique_candidates)} candidates - stopping search",
176
+ 90,
177
+ {
178
+ "phase": "search_complete",
179
+ "candidates": len(unique_candidates),
180
+ },
181
+ )
182
+ break
183
+
184
+ if self.progress_callback:
185
+ self.progress_callback(
186
+ f"Found {len(unique_candidates)} candidates - continuing search",
187
+ None,
188
+ {
189
+ "phase": "search_continue",
190
+ "candidates": len(unique_candidates),
191
+ },
192
+ )
193
+
194
+ self.candidates = unique_candidates[: self.candidate_limit]
195
+
196
+ # Add stage tracking for parent class compatibility
197
+ self.stage_candidates = {
198
+ 0: self.candidates, # Final results as last stage
199
+ }
200
+
201
+ def _create_strict_combinations(self) -> List[SearchCombination]:
202
+ """Create initial strict constraint combinations."""
203
+ combinations = []
204
+
205
+ # Group constraints by type for better combination
206
+ by_type = {}
207
+ for c in self.constraint_ranking[:6]: # Top 6 constraints
208
+ if c.type not in by_type:
209
+ by_type[c.type] = []
210
+ by_type[c.type].append(c)
211
+
212
+ # Strategy 1: Combine most restrictive constraints
213
+ if len(self.constraint_ranking) >= 2:
214
+ top_two = self.constraint_ranking[:2]
215
+ query = self._build_query(top_two)
216
+ combinations.append(SearchCombination(top_two, query, 1))
217
+
218
+ # Strategy 2: Combine temporal + property constraints
219
+ temporal = [
220
+ c
221
+ for c in self.constraint_ranking
222
+ if c.type in [ConstraintType.EVENT, ConstraintType.TEMPORAL]
223
+ ]
224
+ properties = [
225
+ c
226
+ for c in self.constraint_ranking
227
+ if c.type == ConstraintType.PROPERTY
228
+ ]
229
+
230
+ if temporal and properties:
231
+ combined = temporal[:1] + properties[:1]
232
+ query = self._build_query(combined)
233
+ combinations.append(SearchCombination(combined, query, 2))
234
+
235
+ # Strategy 3: Combine statistic + property
236
+ stats = [
237
+ c
238
+ for c in self.constraint_ranking
239
+ if c.type == ConstraintType.STATISTIC
240
+ ]
241
+ if stats and properties:
242
+ combined = stats[:1] + properties[:2]
243
+ query = self._build_query(combined)
244
+ combinations.append(SearchCombination(combined, query, 3))
245
+
246
+ return combinations[:5] # Limit to 5 combinations
247
+
248
+ def _create_relaxed_combinations(self) -> List[SearchCombination]:
249
+ """Create relaxed constraint combinations."""
250
+ combinations = []
251
+
252
+ # Use single most restrictive constraints
253
+ for i, constraint in enumerate(self.constraint_ranking[:3]):
254
+ query = self._build_query([constraint])
255
+ combinations.append(SearchCombination([constraint], query, i + 10))
256
+
257
+ # Combine weaker constraints
258
+ if len(self.constraint_ranking) > 3:
259
+ weaker = self.constraint_ranking[3:6]
260
+ query = self._build_query(weaker)
261
+ combinations.append(SearchCombination(weaker, query, 20))
262
+
263
+ return combinations
264
+
265
+ def _create_individual_combinations(self) -> List[SearchCombination]:
266
+ """Create individual constraint searches as fallback."""
267
+ combinations = []
268
+
269
+ for i, constraint in enumerate(self.constraint_ranking[:5]):
270
+ # Create multiple query variations
271
+ queries = self._generate_query_variations(constraint)
272
+ for j, query in enumerate(
273
+ queries[:2]
274
+ ): # 2 variations per constraint
275
+ combinations.append(
276
+ SearchCombination([constraint], query, i * 10 + j + 30)
277
+ )
278
+
279
+ return combinations
280
+
281
+ def _build_query(self, constraints: List[Constraint]) -> str:
282
+ """Build an optimized query from constraints."""
283
+ terms = []
284
+
285
+ # Use entity type to add context
286
+ entity_type = getattr(self, "entity_type", None)
287
+ if entity_type and entity_type != "unknown entity":
288
+ # Add entity type as a search term
289
+ terms.append(f'"{entity_type}"')
290
+
291
+ for c in constraints:
292
+ # Add quotes for multi-word values
293
+ value = c.value
294
+ if " " in value and not value.startswith('"'):
295
+ value = f'"{value}"'
296
+ terms.append(value)
297
+
298
+ # Join with AND for strict matching
299
+ return " AND ".join(terms)
300
+
301
+ def _generate_query_variations(self, constraint: Constraint) -> List[str]:
302
+ """Generate query variations for a single constraint."""
303
+ base = constraint.value
304
+ variations = [base]
305
+
306
+ # Add type-specific variations
307
+ if constraint.type == ConstraintType.STATISTIC:
308
+ variations.extend(
309
+ [f"list {base}", f"complete {base}", f"all {base}"]
310
+ )
311
+ elif constraint.type == ConstraintType.PROPERTY:
312
+ variations.extend(
313
+ [f"with {base}", f"featuring {base}", f"known for {base}"]
314
+ )
315
+
316
+ return variations[:3] # Limit variations
317
+
318
+ def _parallel_search(
319
+ self, combinations: List[SearchCombination]
320
+ ) -> List[Candidate]:
321
+ """Execute searches in parallel."""
322
+ all_candidates = []
323
+
324
+ with concurrent.futures.ThreadPoolExecutor(
325
+ max_workers=self.parallel_workers
326
+ ) as executor:
327
+ # Submit all searches
328
+ future_to_combo = {
329
+ executor.submit(self._execute_combination_search, combo): combo
330
+ for combo in combinations
331
+ }
332
+
333
+ # Process results as they complete
334
+ for i, future in enumerate(
335
+ concurrent.futures.as_completed(future_to_combo)
336
+ ):
337
+ combo = future_to_combo[future]
338
+ try:
339
+ candidates = future.result()
340
+ all_candidates.extend(candidates)
341
+
342
+ if self.progress_callback:
343
+ self.progress_callback(
344
+ f"Completed search {i + 1}/{len(combinations)}: {len(candidates)} results",
345
+ None,
346
+ {
347
+ "phase": "parallel_result",
348
+ "query": combo.query[:50],
349
+ "candidates": len(candidates),
350
+ "total_so_far": len(all_candidates),
351
+ },
352
+ )
353
+ except Exception as e:
354
+ logger.error(f"Search failed for {combo.query}: {e}")
355
+
356
+ return all_candidates
357
+
358
+ def _execute_combination_search(
359
+ self, combination: SearchCombination
360
+ ) -> List[Candidate]:
361
+ """Execute a single combination search."""
362
+ try:
363
+ results = self._execute_search(combination.query)
364
+
365
+ # Extract candidates using LLM
366
+ candidates = []
367
+ content = results.get("current_knowledge", "")
368
+
369
+ logger.info(
370
+ f"Search '{combination.query[:50]}...' returned {len(content)} chars of content"
371
+ )
372
+
373
+ if content and len(content) > 50:
374
+ # Always use LLM extraction for accuracy
375
+ extracted = self._extract_relevant_candidates(
376
+ {"current_knowledge": content},
377
+ combination.constraints[0]
378
+ if combination.constraints
379
+ else None,
380
+ )
381
+ candidates.extend(extracted)
382
+
383
+ logger.info(
384
+ f"Search '{combination.query[:30]}' found {len(candidates)} candidates"
385
+ )
386
+ return candidates
387
+
388
+ except Exception as e:
389
+ logger.error(f"Error in combination search: {e}", exc_info=True)
390
+ return []
391
+
392
+ def _quick_extract_candidates(
393
+ self, content: str, constraints: List[Constraint]
394
+ ) -> List[Candidate]:
395
+ """Extract candidates using LLM with entity type awareness."""
396
+ # Use the detected entity type if available
397
+ entity_type = getattr(self, "entity_type", "entity")
398
+
399
+ extraction_prompt = f"""
400
+ From the following search result, extract {entity_type} names that might match the given constraints.
401
+
402
+ Search result:
403
+ {content}
404
+
405
+ Constraints to consider:
406
+ {chr(10).join(f"- {c.value}" for c in constraints)}
407
+
408
+ Important:
409
+ - Extract ONLY {entity_type} names
410
+ - Do NOT include other types of entities
411
+ - Focus on entities that could potentially match the constraints
412
+
413
+ Return the {entity_type} names, one per line.
414
+ """
415
+
416
+ try:
417
+ response = self.model.invoke(extraction_prompt).content
418
+ candidates = []
419
+ for line in response.split("\n"):
420
+ name = line.strip()
421
+ if name and len(name) > 2:
422
+ candidates.append(Candidate(name=name))
423
+ return candidates[:15]
424
+ except Exception as e:
425
+ logger.error(f"Entity extraction failed: {e}")
426
+ return []
427
+
428
+ def _validate_hard_constraints(
429
+ self, candidates: List[Candidate]
430
+ ) -> List[Candidate]:
431
+ """Filter candidates that don't meet hard constraints."""
432
+ if not self.hard_constraints or not candidates:
433
+ return candidates
434
+
435
+ entity_type = getattr(self, "entity_type", "entity")
436
+
437
+ validation_prompt = f"""
438
+ Validate {entity_type} candidates against hard constraints.
439
+
440
+ Hard constraints that MUST be satisfied:
441
+ {chr(10).join(f"- {c.value}" for c in self.hard_constraints)}
442
+
443
+ {entity_type} candidates to evaluate:
444
+ {chr(10).join(f"- {c.name}" for c in candidates[:20])}
445
+
446
+ Return ONLY the {entity_type} names that satisfy ALL hard constraints, one per line.
447
+ Reject any candidates that:
448
+ 1. Are not actually a {entity_type}
449
+ 2. Do not satisfy ALL the hard constraints listed above
450
+
451
+ Be strict - if there's doubt about a constraint being satisfied, reject the candidate."""
452
+
453
+ try:
454
+ response = self.model.invoke(validation_prompt).content
455
+ valid_names = [
456
+ line.strip() for line in response.split("\n") if line.strip()
457
+ ]
458
+
459
+ # Keep only candidates that passed validation
460
+ filtered = [c for c in candidates if c.name in valid_names]
461
+
462
+ logger.info(
463
+ f"Hard constraint validation: {len(candidates)} -> {len(filtered)} candidates"
464
+ )
465
+ return filtered
466
+
467
+ except Exception as e:
468
+ logger.error(f"Hard constraint validation failed: {e}")
469
+ return candidates[:10] # Return top candidates if validation fails
470
+
471
+ def _detect_entity_type(self) -> str:
472
+ """Use LLM to detect what type of entity we're searching for."""
473
+ # Build context from constraints
474
+ constraint_text = "\n".join(
475
+ [f"- {c.value}" for c in self.constraint_ranking]
476
+ )
477
+
478
+ prompt = f"""
479
+ Analyze these search constraints and determine what type of entity is being searched for:
480
+
481
+ Constraints:
482
+ {constraint_text}
483
+
484
+ What is the primary entity type being searched for? Be specific.
485
+
486
+ Examples of entity types (but you can choose any appropriate type):
487
+ - fictional character
488
+ - TV show
489
+ - movie
490
+ - actor/actress
491
+ - historical figure
492
+ - company
493
+ - product
494
+ - location
495
+ - event
496
+
497
+ Respond with just the entity type.
498
+ """
499
+
500
+ try:
501
+ entity_type = self.model.invoke(prompt).content.strip()
502
+ logger.info(f"LLM determined entity type: {entity_type}")
503
+ return entity_type
504
+ except Exception as e:
505
+ logger.error(f"Failed to detect entity type: {e}")
506
+ return "unknown entity"
@@ -123,7 +123,9 @@ class ParallelSearchStrategy(BaseSearchStrategy):
123
123
  try:
124
124
  # Run each iteration
125
125
  for iteration in range(1, iterations_to_run + 1):
126
- iteration_progress_base = 5 + (iteration - 1) * (70 / iterations_to_run)
126
+ iteration_progress_base = 5 + (iteration - 1) * (
127
+ 70 / iterations_to_run
128
+ )
127
129
 
128
130
  self._update_progress(
129
131
  f"Starting iteration {iteration}/{iterations_to_run}",
@@ -175,7 +177,9 @@ class ParallelSearchStrategy(BaseSearchStrategy):
175
177
 
176
178
  # Generate follow-up questions based on accumulated knowledge if iterations > 2
177
179
  use_knowledge = iterations_to_run > 2
178
- knowledge_for_questions = current_knowledge if use_knowledge else ""
180
+ knowledge_for_questions = (
181
+ current_knowledge if use_knowledge else ""
182
+ )
179
183
  context = f"""Current Knowledge: {knowledge_for_questions}
180
184
  Iteration: {iteration} of {iterations_to_run}"""
181
185
 
@@ -219,7 +223,8 @@ class ParallelSearchStrategy(BaseSearchStrategy):
219
223
  max_workers=len(all_questions)
220
224
  ) as executor:
221
225
  futures = [
222
- executor.submit(search_question, q) for q in all_questions
226
+ executor.submit(search_question, q)
227
+ for q in all_questions
223
228
  ]
224
229
  iteration_search_dict = {}
225
230
  iteration_search_results = []
@@ -234,7 +239,7 @@ class ParallelSearchStrategy(BaseSearchStrategy):
234
239
  iteration_search_dict[question] = search_results
235
240
 
236
241
  self._update_progress(
237
- f"Completed search {i + 1} of {len(all_questions)}: {question[:30]}...",
242
+ f"Completed search {i + 1} of {len(all_questions)}: {question[:500]}",
238
243
  iteration_progress_base
239
244
  + 10
240
245
  + ((i + 1) / len(all_questions) * 30),
@@ -261,7 +266,10 @@ class ParallelSearchStrategy(BaseSearchStrategy):
261
266
  self._update_progress(
262
267
  f"Filtering search results for iteration {iteration}",
263
268
  iteration_progress_base + 45,
264
- {"phase": "cross_engine_filtering", "iteration": iteration},
269
+ {
270
+ "phase": "cross_engine_filtering",
271
+ "iteration": iteration,
272
+ },
265
273
  )
266
274
 
267
275
  # Get the current link count (for indexing)
@@ -276,7 +284,9 @@ class ParallelSearchStrategy(BaseSearchStrategy):
276
284
  start_index=existing_link_count, # Start indexing after existing links
277
285
  )
278
286
 
279
- links = extract_links_from_search_results(filtered_search_results)
287
+ links = extract_links_from_search_results(
288
+ filtered_search_results
289
+ )
280
290
  self.all_links_of_system.extend(links)
281
291
 
282
292
  self._update_progress(
@@ -293,7 +303,9 @@ class ParallelSearchStrategy(BaseSearchStrategy):
293
303
  iteration_search_results = filtered_search_results
294
304
  else:
295
305
  # Just extract links without filtering
296
- links = extract_links_from_search_results(iteration_search_results)
306
+ links = extract_links_from_search_results(
307
+ iteration_search_results
308
+ )
297
309
  self.all_links_of_system.extend(links)
298
310
 
299
311
  # Add to all search results
@@ -303,11 +315,13 @@ class ParallelSearchStrategy(BaseSearchStrategy):
303
315
  if self.include_text_content and iteration_search_results:
304
316
  # For iteration > 1 with knowledge accumulation, use follow-up analysis
305
317
  if iteration > 1 and iterations_to_run > 2:
306
- citation_result = self.citation_handler.analyze_followup(
307
- query,
308
- iteration_search_results,
309
- current_knowledge,
310
- len(self.all_links_of_system) - len(links),
318
+ citation_result = (
319
+ self.citation_handler.analyze_followup(
320
+ query,
321
+ iteration_search_results,
322
+ current_knowledge,
323
+ len(self.all_links_of_system) - len(links),
324
+ )
311
325
  )
312
326
  else:
313
327
  # For first iteration or without knowledge accumulation, use initial analysis
@@ -358,8 +372,10 @@ class ParallelSearchStrategy(BaseSearchStrategy):
358
372
  if self.include_text_content:
359
373
  # Generate a final synthesis from all search results
360
374
  if iterations_to_run > 1:
361
- final_citation_result = self.citation_handler.analyze_initial(
362
- query, all_search_results
375
+ final_citation_result = (
376
+ self.citation_handler.analyze_initial(
377
+ query, all_search_results
378
+ )
363
379
  )
364
380
  # Add null check for final_citation_result
365
381
  if final_citation_result:
@@ -402,8 +418,10 @@ class ParallelSearchStrategy(BaseSearchStrategy):
402
418
  )
403
419
 
404
420
  # Format findings
405
- formatted_findings = self.findings_repository.format_findings_to_text(
406
- findings, synthesized_content
421
+ formatted_findings = (
422
+ self.findings_repository.format_findings_to_text(
423
+ findings, synthesized_content
424
+ )
407
425
  )
408
426
 
409
427
  except Exception as e: