local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -60,7 +60,9 @@ def main():
60
60
  action="store_true",
61
61
  help="Only check what would be migrated, don't perform actual migration",
62
62
  )
63
- parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
63
+ parser.add_argument(
64
+ "--verbose", action="store_true", help="Enable verbose logging"
65
+ )
64
66
 
65
67
  args = parser.parse_args()
66
68
 
@@ -36,7 +36,9 @@ class IntegratedReportGenerator:
36
36
  """
37
37
  self.model = llm or get_llm()
38
38
  # Use provided search_system or create a new one
39
- self.search_system = search_system or AdvancedSearchSystem(llm=self.model)
39
+ self.search_system = search_system or AdvancedSearchSystem(
40
+ llm=self.model
41
+ )
40
42
  self.searches_per_section = (
41
43
  searches_per_section # Control search depth per section
42
44
  )
@@ -57,7 +59,9 @@ class IntegratedReportGenerator:
57
59
 
58
60
  return report
59
61
 
60
- def _determine_report_structure(self, findings: Dict, query: str) -> List[Dict]:
62
+ def _determine_report_structure(
63
+ self, findings: Dict, query: str
64
+ ) -> List[Dict]:
61
65
  """Analyze content and determine optimal report structure."""
62
66
  combined_content = findings["current_knowledge"]
63
67
  prompt = f"""
@@ -84,7 +88,9 @@ class IntegratedReportGenerator:
84
88
  Each subsection must include its purpose after the | symbol.
85
89
  """
86
90
 
87
- response = search_utilities.remove_think_tags(self.model.invoke(prompt).content)
91
+ response = search_utilities.remove_think_tags(
92
+ self.model.invoke(prompt).content
93
+ )
88
94
 
89
95
  # Parse the structure
90
96
  structure = []
@@ -141,7 +147,9 @@ class IntegratedReportGenerator:
141
147
  self.search_system.max_iterations = 1 # Keep search focused
142
148
 
143
149
  # Perform search for this subsection
144
- subsection_results = self.search_system.analyze_topic(subsection_query)
150
+ subsection_results = self.search_system.analyze_topic(
151
+ subsection_query
152
+ )
145
153
 
146
154
  # Restore original iterations setting
147
155
  self.search_system.max_iterations = original_max_iterations
@@ -151,7 +159,9 @@ class IntegratedReportGenerator:
151
159
  "current_knowledge" in subsection_results
152
160
  and subsection_results["current_knowledge"]
153
161
  ):
154
- section_content.append(subsection_results["current_knowledge"])
162
+ section_content.append(
163
+ subsection_results["current_knowledge"]
164
+ )
155
165
  else:
156
166
  section_content.append(
157
167
  "*Limited information was found for this subsection.*\n"
@@ -215,8 +225,10 @@ class IntegratedReportGenerator:
215
225
  # Format links from search system
216
226
  # Get utilities module dynamically to avoid circular imports
217
227
  utilities = importlib.import_module("local_deep_research.utilities")
218
- formatted_all_links = utilities.search_utilities.format_links_to_markdown(
219
- all_links=self.search_system.all_links_of_system
228
+ formatted_all_links = (
229
+ utilities.search_utilities.format_links_to_markdown(
230
+ all_links=self.search_system.all_links_of_system
231
+ )
220
232
  )
221
233
 
222
234
  # Create final report with all parts
@@ -240,5 +252,7 @@ class IntegratedReportGenerator:
240
252
  return {"content": final_report_content, "metadata": metadata}
241
253
 
242
254
  def _generate_error_report(self, query: str, error_msg: str) -> str:
243
- error_report = f"=== ERROR REPORT ===\nQuery: {query}\nError: {error_msg}"
255
+ error_report = (
256
+ f"=== ERROR REPORT ===\nQuery: {query}\nError: {error_msg}"
257
+ )
244
258
  return error_report
@@ -8,15 +8,48 @@ from .advanced_search_system.findings.repository import FindingsRepository
8
8
  from .advanced_search_system.questions.standard_question import (
9
9
  StandardQuestionGenerator,
10
10
  )
11
- from .advanced_search_system.strategies.iterdrag_strategy import IterDRAGStrategy
11
+ from .advanced_search_system.strategies.adaptive_decomposition_strategy import (
12
+ AdaptiveDecompositionStrategy,
13
+ )
14
+ from .advanced_search_system.strategies.browsecomp_optimized_strategy import (
15
+ BrowseCompOptimizedStrategy,
16
+ )
17
+ from .advanced_search_system.strategies.constrained_search_strategy import (
18
+ ConstrainedSearchStrategy,
19
+ )
20
+ from .advanced_search_system.strategies.early_stop_constrained_strategy import (
21
+ EarlyStopConstrainedStrategy,
22
+ )
23
+ from .advanced_search_system.strategies.evidence_based_strategy_v2 import (
24
+ EnhancedEvidenceBasedStrategy,
25
+ )
26
+ from .advanced_search_system.strategies.iterative_reasoning_strategy import (
27
+ IterativeReasoningStrategy,
28
+ )
29
+ from .advanced_search_system.strategies.iterdrag_strategy import (
30
+ IterDRAGStrategy,
31
+ )
32
+ from .advanced_search_system.strategies.parallel_constrained_strategy import (
33
+ ParallelConstrainedStrategy,
34
+ )
12
35
  from .advanced_search_system.strategies.parallel_search_strategy import (
13
36
  ParallelSearchStrategy,
14
37
  )
15
- from .advanced_search_system.strategies.rapid_search_strategy import RapidSearchStrategy
38
+ from .advanced_search_system.strategies.rapid_search_strategy import (
39
+ RapidSearchStrategy,
40
+ )
41
+ from .advanced_search_system.strategies.recursive_decomposition_strategy import (
42
+ RecursiveDecompositionStrategy,
43
+ )
44
+ from .advanced_search_system.strategies.smart_decomposition_strategy import (
45
+ SmartDecompositionStrategy,
46
+ )
16
47
  from .advanced_search_system.strategies.source_based_strategy import (
17
48
  SourceBasedSearchStrategy,
18
49
  )
19
- from .advanced_search_system.strategies.standard_strategy import StandardSearchStrategy
50
+ from .advanced_search_system.strategies.standard_strategy import (
51
+ StandardSearchStrategy,
52
+ )
20
53
  from .citation_handler import CitationHandler
21
54
  from .config.llm_config import get_llm
22
55
  from .config.search_config import get_search
@@ -31,18 +64,38 @@ class AdvancedSearchSystem:
31
64
 
32
65
  def __init__(
33
66
  self,
34
- strategy_name: str = "source-based",
67
+ strategy_name: str = "source-based", # Default to comprehensive research strategy
35
68
  include_text_content: bool = True,
36
69
  use_cross_engine_filter: bool = True,
37
70
  llm: BaseChatModel | None = None,
38
71
  search: BaseSearchEngine | None = None,
39
72
  max_iterations: int | None = None,
40
73
  questions_per_iteration: int | None = None,
74
+ use_atomic_facts: bool = False,
41
75
  ):
42
76
  """Initialize the advanced search system.
43
77
 
44
78
  Args:
45
- strategy_name: The name of the search strategy to use ("standard" or "iterdrag")
79
+ strategy_name: The name of the search strategy to use. Options:
80
+ - "standard": Basic iterative search strategy
81
+ - "iterdrag": Iterative Dense Retrieval Augmented Generation
82
+ - "source-based": Focuses on finding and extracting from sources
83
+ - "parallel": Runs multiple search queries in parallel
84
+ - "rapid": Quick single-pass search
85
+ - "recursive": Recursive decomposition of complex queries
86
+ - "iterative": Loop-based reasoning with persistent knowledge
87
+ - "adaptive": Adaptive step-by-step reasoning
88
+ - "smart": Automatically chooses best strategy based on query
89
+ - "browsecomp": Optimized for BrowseComp-style puzzle queries
90
+ - "evidence": Enhanced evidence-based verification with improved candidate discovery
91
+ - "constrained": Progressive constraint-based search that narrows candidates step by step
92
+ - "parallel-constrained": Parallel constraint-based search with combined constraint execution
93
+ - "early-stop-constrained": Parallel constraint search with immediate evaluation and early stopping at 99% confidence
94
+ - "dual-confidence": Dual confidence scoring with positive/negative/uncertainty
95
+ - "dual-confidence-with-rejection": Dual confidence with early rejection of poor candidates
96
+ - "concurrent-dual-confidence": Concurrent search & evaluation with progressive constraint relaxation
97
+ - "modular": Modular architecture using constraint checking and candidate exploration modules
98
+ - "browsecomp-entity": Entity-focused search for BrowseComp questions with knowledge graph building
46
99
  include_text_content: If False, only includes metadata and links in search results
47
100
  use_cross_engine_filter: Whether to filter results across search
48
101
  engines.
@@ -53,6 +106,8 @@ class AdvancedSearchSystem:
53
106
  perform. Will be read from the settings if not specified.
54
107
  questions_per_iteration: The number of questions to include in
55
108
  each iteration. Will be read from the settings if not specified.
109
+ use_atomic_facts: Whether to use atomic fact decomposition for
110
+ complex queries when using the source-based strategy.
56
111
 
57
112
  """
58
113
  # Get configuration
@@ -103,6 +158,7 @@ class AdvancedSearchSystem:
103
158
  include_text_content=include_text_content,
104
159
  use_cross_engine_filter=use_cross_engine_filter,
105
160
  all_links_of_system=self.all_links_of_system,
161
+ use_atomic_facts=use_atomic_facts,
106
162
  )
107
163
  elif strategy_name.lower() == "parallel":
108
164
  logger.info("Creating ParallelSearchStrategy instance")
@@ -120,6 +176,319 @@ class AdvancedSearchSystem:
120
176
  search=self.search,
121
177
  all_links_of_system=self.all_links_of_system,
122
178
  )
179
+ elif (
180
+ strategy_name.lower() == "recursive"
181
+ or strategy_name.lower() == "recursive-decomposition"
182
+ ):
183
+ logger.info("Creating RecursiveDecompositionStrategy instance")
184
+ self.strategy = RecursiveDecompositionStrategy(
185
+ model=self.model,
186
+ search=self.search,
187
+ all_links_of_system=self.all_links_of_system,
188
+ )
189
+ elif strategy_name.lower() == "iterative":
190
+ logger.info("Creating IterativeReasoningStrategy instance")
191
+ # For iterative reasoning, use more iterations to allow thorough exploration
192
+ # while keeping search iterations separate
193
+ self.strategy = IterativeReasoningStrategy(
194
+ model=self.model,
195
+ search=self.search,
196
+ all_links_of_system=self.all_links_of_system,
197
+ max_iterations=20, # Increased reasoning iterations
198
+ confidence_threshold=0.95, # Increased from 0.85 to require very high confidence
199
+ search_iterations_per_round=self.max_iterations
200
+ or 1, # Search iterations from settings
201
+ questions_per_search=self.questions_per_iteration,
202
+ )
203
+ elif strategy_name.lower() == "adaptive":
204
+ logger.info("Creating AdaptiveDecompositionStrategy instance")
205
+ self.strategy = AdaptiveDecompositionStrategy(
206
+ model=self.model,
207
+ search=self.search,
208
+ all_links_of_system=self.all_links_of_system,
209
+ max_steps=self.max_iterations,
210
+ min_confidence=0.8,
211
+ source_search_iterations=2,
212
+ source_questions_per_iteration=self.questions_per_iteration,
213
+ )
214
+ elif strategy_name.lower() == "smart":
215
+ logger.info("Creating SmartDecompositionStrategy instance")
216
+ self.strategy = SmartDecompositionStrategy(
217
+ model=self.model,
218
+ search=self.search,
219
+ all_links_of_system=self.all_links_of_system,
220
+ max_iterations=self.max_iterations,
221
+ source_search_iterations=2,
222
+ source_questions_per_iteration=self.questions_per_iteration,
223
+ )
224
+ elif strategy_name.lower() == "browsecomp":
225
+ logger.info("Creating BrowseCompOptimizedStrategy instance")
226
+ self.strategy = BrowseCompOptimizedStrategy(
227
+ model=self.model,
228
+ search=self.search,
229
+ all_links_of_system=self.all_links_of_system,
230
+ max_browsecomp_iterations=15, # BrowseComp strategy main iterations
231
+ confidence_threshold=0.9,
232
+ max_iterations=self.max_iterations, # Source-based sub-searches
233
+ questions_per_iteration=self.questions_per_iteration, # Source-based sub-searches
234
+ )
235
+ elif strategy_name.lower() == "evidence":
236
+ logger.info("Creating EnhancedEvidenceBasedStrategy instance")
237
+ self.strategy = EnhancedEvidenceBasedStrategy(
238
+ model=self.model,
239
+ search=self.search,
240
+ all_links_of_system=self.all_links_of_system,
241
+ max_iterations=20, # Main evidence-gathering iterations
242
+ confidence_threshold=0.95, # Increased from 0.85 to require very high confidence
243
+ candidate_limit=20, # Increased from 10
244
+ evidence_threshold=0.9, # Increased from 0.6 to require strong evidence
245
+ max_search_iterations=self.max_iterations, # Source-based sub-searches
246
+ questions_per_iteration=self.questions_per_iteration,
247
+ min_candidates_threshold=10, # Increased from 3
248
+ enable_pattern_learning=True,
249
+ )
250
+ elif strategy_name.lower() == "constrained":
251
+ logger.info("Creating ConstrainedSearchStrategy instance")
252
+ self.strategy = ConstrainedSearchStrategy(
253
+ model=self.model,
254
+ search=self.search,
255
+ all_links_of_system=self.all_links_of_system,
256
+ max_iterations=20,
257
+ confidence_threshold=0.95, # Increased from 0.85 to require very high confidence
258
+ candidate_limit=100, # Increased from 30
259
+ evidence_threshold=0.9, # Increased from 0.6 to require strong evidence
260
+ max_search_iterations=self.max_iterations,
261
+ questions_per_iteration=self.questions_per_iteration,
262
+ min_candidates_per_stage=20, # Increased from 5
263
+ )
264
+ elif strategy_name.lower() in [
265
+ "parallel-constrained",
266
+ "parallel_constrained",
267
+ ]:
268
+ logger.info("Creating ParallelConstrainedStrategy instance")
269
+ self.strategy = ParallelConstrainedStrategy(
270
+ model=self.model,
271
+ search=self.search,
272
+ all_links_of_system=self.all_links_of_system,
273
+ max_iterations=20,
274
+ confidence_threshold=0.95, # Increased from 0.85 to require very high confidence
275
+ candidate_limit=100,
276
+ evidence_threshold=0.9, # Increased from 0.6 to require strong evidence
277
+ max_search_iterations=self.max_iterations,
278
+ questions_per_iteration=self.questions_per_iteration,
279
+ min_candidates_per_stage=20, # Correct parameter name for parent class
280
+ parallel_workers=100, # Run up to 100 searches in parallel
281
+ )
282
+ elif strategy_name.lower() in [
283
+ "early-stop-constrained",
284
+ "early_stop_constrained",
285
+ ]:
286
+ logger.info("Creating EarlyStopConstrainedStrategy instance")
287
+ self.strategy = EarlyStopConstrainedStrategy(
288
+ model=self.model,
289
+ search=self.search,
290
+ all_links_of_system=self.all_links_of_system,
291
+ max_iterations=20,
292
+ confidence_threshold=0.95,
293
+ candidate_limit=100,
294
+ evidence_threshold=0.9,
295
+ max_search_iterations=self.max_iterations,
296
+ questions_per_iteration=self.questions_per_iteration,
297
+ min_candidates_per_stage=20,
298
+ parallel_workers=100, # Increased parallelism as requested
299
+ early_stop_threshold=0.99, # Stop when we find 99%+ confidence
300
+ concurrent_evaluation=True, # Evaluate candidates as we find them
301
+ )
302
+ elif strategy_name.lower() in ["smart-query", "smart_query"]:
303
+ from .advanced_search_system.strategies.smart_query_strategy import (
304
+ SmartQueryStrategy,
305
+ )
306
+
307
+ logger.info("Creating SmartQueryStrategy instance")
308
+ self.strategy = SmartQueryStrategy(
309
+ model=self.model,
310
+ search=self.search,
311
+ all_links_of_system=self.all_links_of_system,
312
+ max_iterations=20,
313
+ confidence_threshold=0.95,
314
+ candidate_limit=100,
315
+ evidence_threshold=0.9,
316
+ max_search_iterations=self.max_iterations,
317
+ questions_per_iteration=self.questions_per_iteration,
318
+ min_candidates_per_stage=20,
319
+ parallel_workers=100, # High parallelism
320
+ early_stop_threshold=0.99, # Stop when we find 99%+ confidence
321
+ concurrent_evaluation=True, # Evaluate candidates as we find them
322
+ use_llm_query_generation=True, # Use smart query generation
323
+ queries_per_combination=3, # Generate multiple queries
324
+ )
325
+ elif strategy_name.lower() in ["dual-confidence", "dual_confidence"]:
326
+ from .advanced_search_system.strategies.dual_confidence_strategy import (
327
+ DualConfidenceStrategy,
328
+ )
329
+
330
+ logger.info("Creating DualConfidenceStrategy instance")
331
+ self.strategy = DualConfidenceStrategy(
332
+ model=self.model,
333
+ search=self.search,
334
+ all_links_of_system=self.all_links_of_system,
335
+ max_iterations=20,
336
+ confidence_threshold=0.95,
337
+ candidate_limit=100,
338
+ evidence_threshold=0.9,
339
+ max_search_iterations=self.max_iterations,
340
+ questions_per_iteration=self.questions_per_iteration,
341
+ min_candidates_per_stage=20,
342
+ parallel_workers=100,
343
+ early_stop_threshold=0.95, # Lower threshold since dual confidence is more accurate
344
+ concurrent_evaluation=True,
345
+ use_llm_query_generation=True,
346
+ queries_per_combination=3,
347
+ use_entity_seeding=True,
348
+ use_direct_property_search=True,
349
+ uncertainty_penalty=0.2, # Penalty for uncertain evidence
350
+ negative_weight=0.5, # How much negative evidence counts against
351
+ )
352
+ elif strategy_name.lower() in [
353
+ "dual-confidence-with-rejection",
354
+ "dual_confidence_with_rejection",
355
+ ]:
356
+ from .advanced_search_system.strategies.dual_confidence_with_rejection import (
357
+ DualConfidenceWithRejectionStrategy,
358
+ )
359
+
360
+ logger.info("Creating DualConfidenceWithRejectionStrategy instance")
361
+ self.strategy = DualConfidenceWithRejectionStrategy(
362
+ model=self.model,
363
+ search=self.search,
364
+ all_links_of_system=self.all_links_of_system,
365
+ max_iterations=20,
366
+ confidence_threshold=0.95,
367
+ candidate_limit=100,
368
+ evidence_threshold=0.9,
369
+ max_search_iterations=self.max_iterations,
370
+ questions_per_iteration=self.questions_per_iteration,
371
+ min_candidates_per_stage=20,
372
+ parallel_workers=100,
373
+ early_stop_threshold=0.95,
374
+ concurrent_evaluation=True,
375
+ use_llm_query_generation=True,
376
+ queries_per_combination=3,
377
+ use_entity_seeding=True,
378
+ use_direct_property_search=True,
379
+ uncertainty_penalty=0.2,
380
+ negative_weight=0.5,
381
+ rejection_threshold=0.3, # Reject if negative confidence > 30%
382
+ positive_threshold=0.2, # Unless positive confidence > 20%
383
+ critical_constraint_rejection=0.2, # Stricter for critical constraints
384
+ )
385
+ elif strategy_name.lower() in [
386
+ "concurrent-dual-confidence",
387
+ "concurrent_dual_confidence",
388
+ ]:
389
+ from .advanced_search_system.strategies.concurrent_dual_confidence_strategy import (
390
+ ConcurrentDualConfidenceStrategy,
391
+ )
392
+
393
+ logger.info("Creating ConcurrentDualConfidenceStrategy instance")
394
+ self.strategy = ConcurrentDualConfidenceStrategy(
395
+ model=self.model,
396
+ search=self.search,
397
+ all_links_of_system=self.all_links_of_system,
398
+ max_iterations=20,
399
+ confidence_threshold=0.95,
400
+ candidate_limit=100,
401
+ evidence_threshold=0.9,
402
+ max_search_iterations=self.max_iterations,
403
+ questions_per_iteration=self.questions_per_iteration,
404
+ min_candidates_per_stage=20,
405
+ parallel_workers=10, # Concurrent evaluation threads
406
+ early_stop_threshold=0.95,
407
+ concurrent_evaluation=True,
408
+ use_llm_query_generation=True,
409
+ queries_per_combination=3,
410
+ use_entity_seeding=True,
411
+ use_direct_property_search=True,
412
+ uncertainty_penalty=0.2,
413
+ negative_weight=0.5,
414
+ rejection_threshold=0.3,
415
+ positive_threshold=0.2,
416
+ critical_constraint_rejection=0.2,
417
+ # Concurrent-specific parameters
418
+ min_good_candidates=3,
419
+ target_candidates=5,
420
+ max_candidates=10,
421
+ min_score_threshold=0.65,
422
+ exceptional_score=0.95,
423
+ quality_plateau_threshold=0.1,
424
+ max_search_time=30.0,
425
+ max_evaluations=30,
426
+ )
427
+ elif strategy_name.lower() in ["modular", "modular-strategy"]:
428
+ from .advanced_search_system.strategies.modular_strategy import (
429
+ ModularStrategy,
430
+ )
431
+
432
+ logger.info("Creating ModularStrategy instance")
433
+ self.strategy = ModularStrategy(
434
+ model=self.model,
435
+ search=self.search,
436
+ all_links_of_system=self.all_links_of_system,
437
+ constraint_checker_type="dual_confidence", # dual_confidence, strict, threshold
438
+ exploration_strategy="adaptive", # parallel, adaptive, constraint_guided, diversity
439
+ early_rejection=True, # Enable fast pre-screening
440
+ early_stopping=True, # Enable early stopping
441
+ llm_constraint_processing=True, # Enable LLM-driven constraint processing
442
+ immediate_evaluation=True, # Enable immediate candidate evaluation
443
+ )
444
+ elif strategy_name.lower() in ["modular-parallel", "modular_parallel"]:
445
+ from .advanced_search_system.strategies.modular_strategy import (
446
+ ModularStrategy,
447
+ )
448
+
449
+ logger.info(
450
+ "Creating ModularStrategy with parallel exploration and dual confidence"
451
+ )
452
+ self.strategy = ModularStrategy(
453
+ model=self.model,
454
+ search=self.search,
455
+ all_links_of_system=self.all_links_of_system,
456
+ constraint_checker_type="dual_confidence", # Concurrent evaluation with +/-/? scoring
457
+ exploration_strategy="parallel", # Parallel constraint searches
458
+ )
459
+ elif strategy_name.lower() in [
460
+ "focused-iteration",
461
+ "focused_iteration",
462
+ ]:
463
+ from .advanced_search_system.strategies.focused_iteration_strategy import (
464
+ FocusedIterationStrategy,
465
+ )
466
+
467
+ logger.info("Creating FocusedIterationStrategy instance")
468
+ # PRESERVE SIMPLEQA PERFORMANCE: Keep proven 96.51% accuracy configuration
469
+ # Original optimal settings: max_iterations=8, questions_per_iteration=5
470
+ self.strategy = FocusedIterationStrategy(
471
+ model=self.model,
472
+ search=self.search,
473
+ all_links_of_system=self.all_links_of_system,
474
+ max_iterations=8, # PROVEN OPTIMAL FOR SIMPLEQA (96.51% accuracy)
475
+ questions_per_iteration=5, # PROVEN OPTIMAL FOR SIMPLEQA
476
+ use_browsecomp_optimization=True, # Enable BrowseComp optimizations
477
+ )
478
+ elif strategy_name.lower() in [
479
+ "browsecomp-entity",
480
+ "browsecomp_entity",
481
+ ]:
482
+ from .advanced_search_system.strategies.browsecomp_entity_strategy import (
483
+ BrowseCompEntityStrategy,
484
+ )
485
+
486
+ logger.info("Creating BrowseCompEntityStrategy instance")
487
+ self.strategy = BrowseCompEntityStrategy(
488
+ model=self.model,
489
+ search=self.search,
490
+ all_links_of_system=self.all_links_of_system,
491
+ )
123
492
  else:
124
493
  logger.info("Creating StandardSearchStrategy instance")
125
494
  self.strategy = StandardSearchStrategy(
@@ -135,13 +504,17 @@ class AdvancedSearchSystem:
135
504
  if hasattr(self, "progress_callback") and self.progress_callback:
136
505
  self.strategy.set_progress_callback(self.progress_callback)
137
506
 
138
- def _progress_callback(self, message: str, progress: int, metadata: dict) -> None:
507
+ def _progress_callback(
508
+ self, message: str, progress: int, metadata: dict
509
+ ) -> None:
139
510
  """Handle progress updates from the strategy."""
140
511
  logger.info(f"Progress: {progress}% - {message}")
141
512
  if hasattr(self, "progress_callback"):
142
513
  self.progress_callback(message, progress, metadata)
143
514
 
144
- def set_progress_callback(self, callback: Callable[[str, int, dict], None]) -> None:
515
+ def set_progress_callback(
516
+ self, callback: Callable[[str, int, dict], None]
517
+ ) -> None:
145
518
  """Set a callback function to receive progress updates."""
146
519
  self.progress_callback = callback
147
520
  if hasattr(self, "strategy"):
@@ -185,17 +558,25 @@ class AdvancedSearchSystem:
185
558
 
186
559
  # Update our attributes for backward compatibility
187
560
 
188
- self.questions_by_iteration = self.strategy.questions_by_iteration.copy()
561
+ self.questions_by_iteration = (
562
+ self.strategy.questions_by_iteration.copy()
563
+ )
189
564
  # Send progress message with search info
190
565
 
191
566
  # Only extend if they're different objects in memory to avoid duplication
192
567
  # This check prevents doubling the list when they reference the same object
193
568
  # Fix for issue #301: "too many links in detailed report mode"
194
- if id(self.all_links_of_system) != id(self.strategy.all_links_of_system):
569
+ if id(self.all_links_of_system) != id(
570
+ self.strategy.all_links_of_system
571
+ ):
195
572
  self.all_links_of_system.extend(self.strategy.all_links_of_system)
196
573
 
197
574
  # Include the search system instance for access to citations
198
575
  result["search_system"] = self
199
576
  result["all_links_of_system"] = self.all_links_of_system
577
+
578
+ # Ensure query is included in the result
579
+ if "query" not in result:
580
+ result["query"] = query
200
581
  result["questions_by_iteration"] = self.questions_by_iteration
201
582
  return result
@@ -24,7 +24,9 @@ def check_db_content(db_path, description):
24
24
  # Get list of tables
25
25
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
26
26
  tables = [
27
- row[0] for row in cursor.fetchall() if not row[0].startswith("sqlite_")
27
+ row[0]
28
+ for row in cursor.fetchall()
29
+ if not row[0].startswith("sqlite_")
28
30
  ]
29
31
 
30
32
  if not tables:
@@ -56,7 +58,9 @@ def check_db_content(db_path, description):
56
58
  key_cols = (
57
59
  "id, query, status"
58
60
  if table == "research_history"
59
- else "id, key, value" if table == "settings" else "id, message"
61
+ else "id, key, value"
62
+ if table == "settings"
63
+ else "id, message"
60
64
  )
61
65
  cursor.execute(f"SELECT {key_cols} FROM {table} LIMIT 3")
62
66
  sample = cursor.fetchall()
@@ -110,13 +114,19 @@ def main():
110
114
  if choice == "y":
111
115
  # Run the migration script directly
112
116
  try:
113
- from src.local_deep_research.setup_data_dir import setup_data_dir
117
+ from src.local_deep_research.setup_data_dir import (
118
+ setup_data_dir,
119
+ )
114
120
  except ImportError:
115
121
  # If that fails, try with the direct import
116
122
  sys.path.append(
117
- os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
123
+ os.path.dirname(
124
+ os.path.dirname(os.path.abspath(__file__))
125
+ )
126
+ )
127
+ from local_deep_research.setup_data_dir import (
128
+ setup_data_dir,
118
129
  )
119
- from local_deep_research.setup_data_dir import setup_data_dir
120
130
 
121
131
  setup_data_dir()
122
132
 
@@ -1,19 +1,22 @@
1
1
  import os
2
- from functools import cache
3
2
  from typing import Any, Dict
4
3
 
4
+ from cachetools import LRUCache
5
5
  from loguru import logger
6
6
  from sqlalchemy import create_engine
7
7
  from sqlalchemy.orm import Session, sessionmaker
8
8
 
9
+ from ..utilities.threading_utils import thread_specific_cache
9
10
  from ..web.services.settings_manager import SettingsManager
10
11
 
11
12
  # Database path.
12
- DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "data"))
13
+ DATA_DIR = os.path.abspath(
14
+ os.path.join(os.path.dirname(__file__), "..", "..", "data")
15
+ )
13
16
  DB_PATH = os.path.join(DATA_DIR, "ldr.db")
14
17
 
15
18
 
16
- @cache
19
+ @thread_specific_cache(cache=LRUCache(maxsize=1))
17
20
  def get_db_session() -> Session:
18
21
  """
19
22
  Returns:
@@ -24,7 +27,7 @@ def get_db_session() -> Session:
24
27
  return session_class()
25
28
 
26
29
 
27
- @cache
30
+ @thread_specific_cache(cache=LRUCache(maxsize=1))
28
31
  def get_settings_manager() -> SettingsManager:
29
32
  """
30
33
  Returns: