local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -19,7 +19,11 @@ class StandardSearchStrategy(BaseSearchStrategy):
19
19
  """Standard iterative search strategy that generates follow-up questions."""
20
20
 
21
21
  def __init__(
22
- self, search=None, model=None, citation_handler=None, all_links_of_system=None
22
+ self,
23
+ search=None,
24
+ model=None,
25
+ citation_handler=None,
26
+ all_links_of_system=None,
23
27
  ):
24
28
  """Initialize with optional dependency injection for testing."""
25
29
  super().__init__(all_links_of_system=all_links_of_system)
@@ -73,9 +77,7 @@ class StandardSearchStrategy(BaseSearchStrategy):
73
77
 
74
78
  # Check if search engine is available
75
79
  if self.search is None:
76
- error_msg = (
77
- "Error: No search engine available. Please check your configuration."
78
- )
80
+ error_msg = "Error: No search engine available. Please check your configuration."
79
81
  self._update_progress(
80
82
  error_msg,
81
83
  100,
@@ -127,7 +129,9 @@ Iteration: {iteration + 1} of {total_iterations}"""
127
129
  )
128
130
  for q_idx, question in enumerate(questions):
129
131
  question_progress_base = iteration_progress_base + (
130
- ((q_idx + 1) / question_count) * (100 / total_iterations) * 0.5
132
+ ((q_idx + 1) / question_count)
133
+ * (100 / total_iterations)
134
+ * 0.5
131
135
  )
132
136
 
133
137
  self._update_progress(
@@ -156,7 +160,9 @@ Iteration: {iteration + 1} of {total_iterations}"""
156
160
  except Exception as e:
157
161
  error_msg = f"Error during search: {str(e)}"
158
162
  logger.exception(f"SEARCH ERROR: {error_msg}")
159
- self._handle_search_error(error_msg, question_progress_base + 10)
163
+ self._handle_search_error(
164
+ error_msg, question_progress_base + 10
165
+ )
160
166
  search_results = []
161
167
 
162
168
  if search_results is None:
@@ -171,7 +177,10 @@ Iteration: {iteration + 1} of {total_iterations}"""
171
177
  self._update_progress(
172
178
  f"Found {len(search_results)} results for question: {question}",
173
179
  int(question_progress_base + 2),
174
- {"phase": "search_complete", "result_count": len(search_results)},
180
+ {
181
+ "phase": "search_complete",
182
+ "result_count": len(search_results),
183
+ },
175
184
  )
176
185
 
177
186
  logger.info(f"len search: {len(search_results)}")
@@ -240,7 +249,9 @@ Iteration: {iteration + 1} of {total_iterations}"""
240
249
  except Exception as e:
241
250
  error_msg = f"Error analyzing results: {str(e)}"
242
251
  logger.exception(f"ANALYSIS ERROR: {error_msg}")
243
- self._handle_search_error(error_msg, question_progress_base + 10)
252
+ self._handle_search_error(
253
+ error_msg, question_progress_base + 10
254
+ )
244
255
 
245
256
  iteration += 1
246
257
 
@@ -250,11 +261,16 @@ Iteration: {iteration + 1} of {total_iterations}"""
250
261
  {"phase": "knowledge_compression"},
251
262
  )
252
263
 
253
- if knowledge_accumulation == KnowledgeAccumulationApproach.ITERATION.value:
264
+ if (
265
+ knowledge_accumulation
266
+ == KnowledgeAccumulationApproach.ITERATION.value
267
+ ):
254
268
  try:
255
269
  logger.info("ITERATION - Compressing Knowledge")
256
- current_knowledge = self.knowledge_generator.compress_knowledge(
257
- current_knowledge, query, section_links
270
+ current_knowledge = (
271
+ self.knowledge_generator.compress_knowledge(
272
+ current_knowledge, query, section_links
273
+ )
258
274
  )
259
275
  logger.info("FINISHED ITERATION - Compressing Knowledge")
260
276
  except Exception as e:
@@ -271,7 +287,9 @@ Iteration: {iteration + 1} of {total_iterations}"""
271
287
  )
272
288
 
273
289
  # Extract content from findings for synthesis
274
- finding_contents = [f["content"] for f in findings if "content" in f]
290
+ finding_contents = [
291
+ f["content"] for f in findings if "content" in f
292
+ ]
275
293
 
276
294
  # First synthesize findings to get coherent content
277
295
  synthesized_content = self.findings_repository.synthesize_findings(
@@ -288,8 +306,10 @@ Iteration: {iteration + 1} of {total_iterations}"""
288
306
  )
289
307
 
290
308
  # Now format the findings with search questions and sources
291
- formatted_findings = self.findings_repository.format_findings_to_text(
292
- findings, synthesized_content
309
+ formatted_findings = (
310
+ self.findings_repository.format_findings_to_text(
311
+ findings, synthesized_content
312
+ )
293
313
  )
294
314
 
295
315
  # Add the synthesized content to the repository
@@ -75,7 +75,10 @@ class BaseTool(ABC):
75
75
  logger.error(f"Invalid type for parameter {param_name}")
76
76
  return False
77
77
 
78
- if "enum" in param_schema and param_value not in param_schema["enum"]:
78
+ if (
79
+ "enum" in param_schema
80
+ and param_value not in param_schema["enum"]
81
+ ):
79
82
  logger.error(f"Invalid value for parameter {param_name}")
80
83
  return False
81
84
 
@@ -97,4 +100,6 @@ class BaseTool(ABC):
97
100
  Args:
98
101
  result: The result of the tool execution
99
102
  """
100
- logger.info(f"Tool {self.name} execution completed with result: {result}")
103
+ logger.info(
104
+ f"Tool {self.name} execution completed with result: {result}"
105
+ )
@@ -235,7 +235,9 @@ def compare_configurations(
235
235
  import time
236
236
 
237
237
  timestamp = time.strftime("%Y%m%d_%H%M%S")
238
- report_file = os.path.join(output_dir, f"comparison_{dataset_type}_{timestamp}.md")
238
+ report_file = os.path.join(
239
+ output_dir, f"comparison_{dataset_type}_{timestamp}.md"
240
+ )
239
241
 
240
242
  with open(report_file, "w") as f:
241
243
  f.write(f"# Configuration Comparison - {dataset_type.capitalize()}\n\n")
@@ -247,7 +249,9 @@ def compare_configurations(
247
249
 
248
250
  for result in results:
249
251
  accuracy = result.get("metrics", {}).get("accuracy", 0)
250
- avg_time = result.get("metrics", {}).get("average_processing_time", 0)
252
+ avg_time = result.get("metrics", {}).get(
253
+ "average_processing_time", 0
254
+ )
251
255
  examples = result.get("total_examples", 0)
252
256
 
253
257
  f.write(
@@ -21,9 +21,9 @@ def _init_search_system(
21
21
  openai_endpoint_url: str | None = None,
22
22
  progress_callback: Callable[[str, int, dict], None] | None = None,
23
23
  search_tool: Optional[str] = None,
24
+ search_strategy: str = "source_based",
24
25
  iterations: int = 1,
25
26
  questions_per_iteration: int = 1,
26
- search_strategy: str = "source_based",
27
27
  ) -> AdvancedSearchSystem:
28
28
  """
29
29
  Initializes the advanced search system with specified parameters. This function sets up
@@ -39,6 +39,7 @@ def _init_search_system(
39
39
  setting)
40
40
  progress_callback: Optional callback function to receive progress updates
41
41
  search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
42
+ search_strategy: Search strategy to use (modular, source_based, etc.). If None, uses default
42
43
  iterations: Number of research cycles to perform
43
44
  questions_per_iteration: Number of questions to generate per cycle
44
45
  search_strategy: The name of the search strategy to use.
@@ -220,7 +221,9 @@ def analyze_documents(
220
221
  # Force reindex if requested
221
222
  if force_reindex and hasattr(search, "embedding_manager"):
222
223
  for folder_path in search.folder_paths:
223
- search.embedding_manager.index_folder(folder_path, force_reindex=True)
224
+ search.embedding_manager.index_folder(
225
+ folder_path, force_reindex=True
226
+ )
224
227
 
225
228
  # Perform the search
226
229
  results = search.run(query)
@@ -235,7 +238,8 @@ def analyze_documents(
235
238
 
236
239
  docs_text = "\n\n".join(
237
240
  [
238
- f"Document {i + 1}:" f" {doc.get('content', doc.get('snippet', ''))[:1000]}"
241
+ f"Document {i + 1}:"
242
+ f" {doc.get('content', doc.get('snippet', ''))[:1000]}"
239
243
  for i, doc in enumerate(results[:5])
240
244
  ]
241
245
  ) # Limit to first 5 docs and 1000 chars each
@@ -269,7 +273,9 @@ def analyze_documents(
269
273
  f.write(f"## Documents Found: {len(results)}\n\n")
270
274
 
271
275
  for i, doc in enumerate(results):
272
- f.write(f"### Document {i + 1}:" f" {doc.get('title', 'Untitled')}\n\n")
276
+ f.write(
277
+ f"### Document {i + 1}: {doc.get('title', 'Untitled')}\n\n"
278
+ )
273
279
  f.write(f"**Source:** {doc.get('link', 'Unknown')}\n\n")
274
280
  f.write(
275
281
  f"**Content:**\n\n{doc.get('content', doc.get('snippet', 'No content available'))[:1000]}...\n\n"
@@ -10,21 +10,25 @@ __version__ = "0.2.0"
10
10
  # Core benchmark functionality
11
11
  from .datasets import get_available_datasets, load_dataset
12
12
  from .metrics import (
13
+ calculate_combined_score,
13
14
  calculate_metrics,
14
15
  calculate_quality_metrics,
15
- calculate_speed_metrics,
16
16
  calculate_resource_metrics,
17
- calculate_combined_score,
17
+ calculate_speed_metrics,
18
18
  generate_report,
19
19
  )
20
- from .runners import run_benchmark, run_browsecomp_benchmark, run_simpleqa_benchmark
21
20
 
22
21
  # Optimization functionality
23
22
  from .optimization import (
24
- optimize_parameters,
23
+ optimize_for_efficiency,
25
24
  optimize_for_quality,
26
25
  optimize_for_speed,
27
- optimize_for_efficiency,
26
+ optimize_parameters,
27
+ )
28
+ from .runners import (
29
+ run_benchmark,
30
+ run_browsecomp_benchmark,
31
+ run_simpleqa_benchmark,
28
32
  )
29
33
 
30
34
  __all__ = [
@@ -36,13 +40,11 @@ __all__ = [
36
40
  "get_available_datasets",
37
41
  "calculate_metrics",
38
42
  "generate_report",
39
-
40
43
  # Metrics for optimization
41
44
  "calculate_quality_metrics",
42
45
  "calculate_speed_metrics",
43
46
  "calculate_resource_metrics",
44
47
  "calculate_combined_score",
45
-
46
48
  # Optimization functionality
47
49
  "optimize_parameters",
48
50
  "optimize_for_quality",
@@ -300,7 +300,9 @@ def compare_configurations(
300
300
  import time
301
301
 
302
302
  timestamp = time.strftime("%Y%m%d_%H%M%S")
303
- report_file = os.path.join(output_dir, f"comparison_{dataset_type}_{timestamp}.md")
303
+ report_file = os.path.join(
304
+ output_dir, f"comparison_{dataset_type}_{timestamp}.md"
305
+ )
304
306
 
305
307
  with open(report_file, "w") as f:
306
308
  f.write(f"# Configuration Comparison - {dataset_type.capitalize()}\n\n")
@@ -312,7 +314,9 @@ def compare_configurations(
312
314
 
313
315
  for result in results:
314
316
  accuracy = result.get("metrics", {}).get("accuracy", 0)
315
- avg_time = result.get("metrics", {}).get("average_processing_time", 0)
317
+ avg_time = result.get("metrics", {}).get(
318
+ "average_processing_time", 0
319
+ )
316
320
  examples = result.get("total_examples", 0)
317
321
 
318
322
  f.write(
@@ -38,7 +38,10 @@ def setup_benchmark_parser(subparsers):
38
38
  help="Number of search iterations (default: 3)",
39
39
  )
40
40
  benchmark_parent.add_argument(
41
- "--questions", type=int, default=3, help="Questions per iteration (default: 3)"
41
+ "--questions",
42
+ type=int,
43
+ default=3,
44
+ help="Questions per iteration (default: 3)",
42
45
  )
43
46
  benchmark_parent.add_argument(
44
47
  "--search-tool",
@@ -75,15 +78,21 @@ def setup_benchmark_parser(subparsers):
75
78
  "--search-model", type=str, help="Model to use for the search system"
76
79
  )
77
80
  benchmark_parent.add_argument(
78
- "--search-provider", type=str, help="Provider to use for the search system"
81
+ "--search-provider",
82
+ type=str,
83
+ help="Provider to use for the search system",
79
84
  )
80
85
  benchmark_parent.add_argument(
81
- "--endpoint-url", type=str, help="Endpoint URL for OpenRouter or other API services"
86
+ "--endpoint-url",
87
+ type=str,
88
+ help="Endpoint URL for OpenRouter or other API services",
82
89
  )
83
90
  benchmark_parent.add_argument(
84
- "--search-strategy", type=str, default="source_based",
91
+ "--search-strategy",
92
+ type=str,
93
+ default="source_based",
85
94
  choices=["source_based", "standard", "rapid", "parallel", "iterdrag"],
86
- help="Search strategy to use (default: source_based)"
95
+ help="Search strategy to use (default: source_based)",
87
96
  )
88
97
 
89
98
  # SimpleQA benchmark command
@@ -94,12 +103,16 @@ def setup_benchmark_parser(subparsers):
94
103
 
95
104
  # BrowseComp benchmark command
96
105
  browsecomp_parser = subparsers.add_parser(
97
- "browsecomp", parents=[benchmark_parent], help="Run BrowseComp benchmark"
106
+ "browsecomp",
107
+ parents=[benchmark_parent],
108
+ help="Run BrowseComp benchmark",
98
109
  )
99
110
  browsecomp_parser.set_defaults(func=run_browsecomp_cli)
100
111
 
101
112
  # List available benchmarks command
102
- list_parser = subparsers.add_parser("list", help="List available benchmarks")
113
+ list_parser = subparsers.add_parser(
114
+ "list", help="List available benchmarks"
115
+ )
103
116
  list_parser.set_defaults(func=list_benchmarks_cli)
104
117
 
105
118
  # Compare configurations command
@@ -304,11 +317,14 @@ def main():
304
317
  Main entry point for benchmark CLI.
305
318
  """
306
319
  parser = argparse.ArgumentParser(
307
- description="Local Deep Research Benchmarking Tool", prog="ldr-benchmark"
320
+ description="Local Deep Research Benchmarking Tool",
321
+ prog="ldr-benchmark",
308
322
  )
309
323
 
310
324
  # Set up logging
311
- parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
325
+ parser.add_argument(
326
+ "--verbose", action="store_true", help="Enable verbose logging"
327
+ )
312
328
 
313
329
  # Create subparsers
314
330
  subparsers = parser.add_subparsers(
@@ -324,7 +340,8 @@ def main():
324
340
  # Set up logging
325
341
  log_level = logging.DEBUG if args.verbose else logging.INFO
326
342
  logging.basicConfig(
327
- level=log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
343
+ level=log_level,
344
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
328
345
  )
329
346
 
330
347
  # Run command
@@ -10,7 +10,6 @@ import logging
10
10
  import os
11
11
  import sys
12
12
  from datetime import datetime
13
- from typing import Any, Dict, List, Optional
14
13
 
15
14
  from .comparison import compare_configurations
16
15
  from .efficiency import ResourceMonitor, SpeedProfiler
@@ -18,7 +17,8 @@ from .optimization import optimize_parameters
18
17
 
19
18
  # Configure logging
20
19
  logging.basicConfig(
21
- level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
20
+ level=logging.INFO,
21
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
22
22
  )
23
23
  logger = logging.getLogger(__name__)
24
24
 
@@ -44,7 +44,9 @@ Examples:
44
44
  subparsers = parser.add_subparsers(dest="command", help="Command to run")
45
45
 
46
46
  # Optimizer parser
47
- optimize_parser = subparsers.add_parser("optimize", help="Optimize parameters")
47
+ optimize_parser = subparsers.add_parser(
48
+ "optimize", help="Optimize parameters"
49
+ )
48
50
  optimize_parser.add_argument("query", help="Research query to optimize for")
49
51
  optimize_parser.add_argument(
50
52
  "--output-dir",
@@ -67,21 +69,32 @@ Examples:
67
69
  "--timeout", type=int, help="Maximum seconds to run optimization"
68
70
  )
69
71
  optimize_parser.add_argument(
70
- "--n-jobs", type=int, default=1, help="Number of parallel jobs for optimization"
72
+ "--n-jobs",
73
+ type=int,
74
+ default=1,
75
+ help="Number of parallel jobs for optimization",
76
+ )
77
+ optimize_parser.add_argument(
78
+ "--study-name", help="Name of the Optuna study"
71
79
  )
72
- optimize_parser.add_argument("--study-name", help="Name of the Optuna study")
73
80
  optimize_parser.add_argument(
74
81
  "--speed-focus", action="store_true", help="Focus optimization on speed"
75
82
  )
76
83
  optimize_parser.add_argument(
77
- "--quality-focus", action="store_true", help="Focus optimization on quality"
84
+ "--quality-focus",
85
+ action="store_true",
86
+ help="Focus optimization on quality",
78
87
  )
79
88
 
80
89
  # Comparison parser
81
- compare_parser = subparsers.add_parser("compare", help="Compare configurations")
90
+ compare_parser = subparsers.add_parser(
91
+ "compare", help="Compare configurations"
92
+ )
82
93
  compare_parser.add_argument("query", help="Research query to compare with")
83
94
  compare_parser.add_argument(
84
- "--configs", required=True, help="JSON file with configurations to compare"
95
+ "--configs",
96
+ required=True,
97
+ help="JSON file with configurations to compare",
85
98
  )
86
99
  compare_parser.add_argument(
87
100
  "--output-dir",
@@ -99,7 +112,9 @@ Examples:
99
112
  )
100
113
 
101
114
  # Profiling parser
102
- profile_parser = subparsers.add_parser("profile", help="Profile resource usage")
115
+ profile_parser = subparsers.add_parser(
116
+ "profile", help="Profile resource usage"
117
+ )
103
118
  profile_parser.add_argument("query", help="Research query to profile")
104
119
  profile_parser.add_argument(
105
120
  "--output-dir",
@@ -203,7 +218,9 @@ def run_comparison(args):
203
218
  for i, result in enumerate(
204
219
  [r for r in results["results"] if r.get("success", False)]
205
220
  ):
206
- print(f"{i+1}. {result['name']}: {result.get('overall_score', 0):.4f}")
221
+ print(
222
+ f"{i + 1}. {result['name']}: {result.get('overall_score', 0):.4f}"
223
+ )
207
224
 
208
225
  print(f"\nResults saved to: {results.get('report_path', args.output_dir)}")
209
226
 
@@ -275,12 +292,18 @@ def run_profiling(args):
275
292
  if name != "total_duration" and name.endswith("_duration"):
276
293
  component = name.replace("_duration", "")
277
294
  duration = value
278
- percent = (duration / total_duration * 100) if total_duration > 0 else 0
295
+ percent = (
296
+ (duration / total_duration * 100)
297
+ if total_duration > 0
298
+ else 0
299
+ )
279
300
  print(f"- {component}: {duration:.2f}s ({percent:.1f}%)")
280
301
 
281
302
  # Resource summary
282
303
  print("\nResource Usage Summary:")
283
- print(f"Peak memory: {resource_results.get('process_memory_max_mb', 0):.1f} MB")
304
+ print(
305
+ f"Peak memory: {resource_results.get('process_memory_max_mb', 0):.1f} MB"
306
+ )
284
307
  print(
285
308
  f"Average memory: {resource_results.get('process_memory_avg_mb', 0):.1f} MB"
286
309
  )
@@ -308,7 +331,9 @@ def run_profiling(args):
308
331
  "timing_results": timing_results,
309
332
  "resource_results": resource_results,
310
333
  "findings_count": len(results.get("findings", [])),
311
- "knowledge_length": len(results.get("current_knowledge", "")),
334
+ "knowledge_length": len(
335
+ results.get("current_knowledge", "")
336
+ ),
312
337
  "timestamp": timestamp,
313
338
  },
314
339
  f,
@@ -5,8 +5,10 @@ This module provides tools for comparing the performance of different
5
5
  parameters, models, and search engines.
6
6
  """
7
7
 
8
- from local_deep_research.benchmarks.comparison.evaluator import compare_configurations
8
+ from local_deep_research.benchmarks.comparison.evaluator import (
9
+ compare_configurations,
10
+ )
9
11
 
10
12
  __all__ = [
11
- 'compare_configurations',
13
+ "compare_configurations",
12
14
  ]