local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -8,7 +8,7 @@ of different components and processes in the research system.
8
8
  import logging
9
9
  import time
10
10
  from contextlib import contextmanager
11
- from typing import Dict, List, Optional, Any, Callable
11
+ from typing import Any, Callable, Dict
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
14
 
@@ -16,57 +16,57 @@ logger = logging.getLogger(__name__)
16
16
  class SpeedProfiler:
17
17
  """
18
18
  Profiler for tracking execution speed of components.
19
-
19
+
20
20
  This class provides methods for timing operations and
21
21
  collecting performance statistics for later analysis.
22
22
  """
23
-
23
+
24
24
  def __init__(self):
25
25
  """Initialize the profiler with empty timing data."""
26
26
  self.timings = {}
27
27
  self.current_timers = {}
28
28
  self.total_start_time = None
29
29
  self.total_end_time = None
30
-
30
+
31
31
  def start(self):
32
32
  """Start the global profiling session."""
33
33
  self.timings = {}
34
34
  self.current_timers = {}
35
35
  self.total_start_time = time.time()
36
-
36
+
37
37
  def stop(self):
38
38
  """Stop the global profiling session."""
39
39
  self.total_end_time = time.time()
40
-
40
+
41
41
  # Stop any timers that are still running
42
42
  for name in list(self.current_timers.keys()):
43
43
  self.stop_timer(name)
44
-
44
+
45
45
  def start_timer(self, name: str):
46
46
  """
47
47
  Start a named timer.
48
-
48
+
49
49
  Args:
50
50
  name: Name of the timer to start
51
51
  """
52
52
  if name in self.current_timers:
53
53
  logger.warning(f"Timer '{name}' is already running. Restarting.")
54
-
54
+
55
55
  self.current_timers[name] = time.time()
56
-
56
+
57
57
  def stop_timer(self, name: str):
58
58
  """
59
59
  Stop a named timer and record the elapsed time.
60
-
60
+
61
61
  Args:
62
62
  name: Name of the timer to stop
63
63
  """
64
64
  if name not in self.current_timers:
65
65
  logger.warning(f"Timer '{name}' was not started.")
66
66
  return
67
-
67
+
68
68
  elapsed = time.time() - self.current_timers[name]
69
-
69
+
70
70
  if name not in self.timings:
71
71
  self.timings[name] = {
72
72
  "total": elapsed,
@@ -74,7 +74,7 @@ class SpeedProfiler:
74
74
  "min": elapsed,
75
75
  "max": elapsed,
76
76
  "starts": [self.current_timers[name]],
77
- "durations": [elapsed]
77
+ "durations": [elapsed],
78
78
  }
79
79
  else:
80
80
  self.timings[name]["total"] += elapsed
@@ -83,17 +83,17 @@ class SpeedProfiler:
83
83
  self.timings[name]["max"] = max(self.timings[name]["max"], elapsed)
84
84
  self.timings[name]["starts"].append(self.current_timers[name])
85
85
  self.timings[name]["durations"].append(elapsed)
86
-
86
+
87
87
  del self.current_timers[name]
88
-
88
+
89
89
  @contextmanager
90
90
  def timer(self, name: str):
91
91
  """
92
92
  Context manager for timing a block of code.
93
-
93
+
94
94
  Args:
95
95
  name: Name of the timer
96
-
96
+
97
97
  Example:
98
98
  with profiler.timer("my_operation"):
99
99
  # Code to time
@@ -104,23 +104,26 @@ class SpeedProfiler:
104
104
  yield
105
105
  finally:
106
106
  self.stop_timer(name)
107
-
107
+
108
108
  def get_timings(self) -> Dict[str, Any]:
109
109
  """
110
110
  Get all recorded timings.
111
-
111
+
112
112
  Returns:
113
113
  Dictionary of timing data for all measured operations
114
114
  """
115
115
  result = self.timings.copy()
116
-
116
+
117
117
  # Add averages
118
118
  for name, data in result.items():
119
119
  if data["count"] > 0:
120
120
  data["avg"] = data["total"] / data["count"]
121
-
121
+
122
122
  # Add total duration
123
- if self.total_start_time is not None and self.total_end_time is not None:
123
+ if (
124
+ self.total_start_time is not None
125
+ and self.total_end_time is not None
126
+ ):
124
127
  result["total"] = {
125
128
  "total": self.total_end_time - self.total_start_time,
126
129
  "count": 1,
@@ -128,87 +131,98 @@ class SpeedProfiler:
128
131
  "max": self.total_end_time - self.total_start_time,
129
132
  "avg": self.total_end_time - self.total_start_time,
130
133
  "starts": [self.total_start_time],
131
- "durations": [self.total_end_time - self.total_start_time]
134
+ "durations": [self.total_end_time - self.total_start_time],
132
135
  }
133
-
136
+
134
137
  return result
135
-
138
+
136
139
  def get_summary(self) -> Dict[str, float]:
137
140
  """
138
141
  Get a summary of timing information.
139
-
142
+
140
143
  Returns:
141
144
  Dictionary with summary statistics
142
145
  """
143
146
  timings = self.get_timings()
144
147
  summary = {}
145
-
148
+
146
149
  # Total duration
147
150
  if "total" in timings:
148
151
  summary["total_duration"] = timings["total"]["total"]
149
- elif self.total_start_time is not None and self.total_end_time is not None:
150
- summary["total_duration"] = self.total_end_time - self.total_start_time
152
+ elif (
153
+ self.total_start_time is not None
154
+ and self.total_end_time is not None
155
+ ):
156
+ summary["total_duration"] = (
157
+ self.total_end_time - self.total_start_time
158
+ )
151
159
  else:
152
- summary["total_duration"] = sum(t["total"] for t in timings.values())
153
-
160
+ summary["total_duration"] = sum(
161
+ t["total"] for t in timings.values()
162
+ )
163
+
154
164
  # Component durations
155
165
  for name, data in timings.items():
156
166
  if name != "total":
157
167
  summary[f"{name}_duration"] = data["total"]
158
168
  summary[f"{name}_percent"] = (
159
- data["total"] / summary["total_duration"] * 100
160
- if summary["total_duration"] > 0 else 0
169
+ data["total"] / summary["total_duration"] * 100
170
+ if summary["total_duration"] > 0
171
+ else 0
161
172
  )
162
-
173
+
163
174
  # Per-operation breakdowns
164
175
  for name, data in timings.items():
165
176
  if data["count"] > 0:
166
177
  summary[f"{name}_per_operation"] = data["total"] / data["count"]
167
-
178
+
168
179
  return summary
169
-
180
+
170
181
  def print_summary(self):
171
182
  """Print a formatted summary of timing information."""
172
183
  summary = self.get_summary()
173
184
  total = summary.get("total_duration", 0)
174
-
185
+
175
186
  print("\n===== SPEED PROFILE SUMMARY =====")
176
187
  print(f"Total execution time: {total:.2f} seconds")
177
188
  print("\n--- Component Breakdown ---")
178
-
189
+
179
190
  # Print each component's timing
180
191
  for name, data in self.timings.items():
181
192
  if name != "total":
182
193
  percent = data["total"] / total * 100 if total > 0 else 0
183
- print(f"{name}: {data['total']:.2f}s ({percent:.1f}%) - "
184
- f"{data['count']} calls, avg {data['total'] / data['count']:.3f}s per call")
185
-
194
+ print(
195
+ f"{name}: {data['total']:.2f}s ({percent:.1f}%) - "
196
+ f"{data['count']} calls, avg {data['total'] / data['count']:.3f}s per call"
197
+ )
198
+
186
199
  print("\n==============================")
187
200
 
188
201
 
189
202
  def time_function(func: Callable) -> Callable:
190
203
  """
191
204
  Decorator to time a function's execution.
192
-
205
+
193
206
  Args:
194
207
  func: Function to time
195
-
208
+
196
209
  Returns:
197
210
  Wrapped function that logs its execution time
198
-
211
+
199
212
  Example:
200
213
  @time_function
201
214
  def my_slow_function():
202
215
  # Some slow code
203
216
  pass
204
217
  """
218
+
205
219
  def wrapper(*args, **kwargs):
206
220
  start_time = time.time()
207
221
  result = func(*args, **kwargs)
208
222
  elapsed = time.time() - start_time
209
-
223
+
210
224
  logger.info(f"{func.__name__} took {elapsed:.3f} seconds")
211
-
225
+
212
226
  return result
213
-
227
+
214
228
  return wrapper
@@ -47,7 +47,9 @@ class BrowseCompEvaluator(BaseBenchmarkEvaluator):
47
47
  benchmark_dir = self._create_subdirectory(output_dir)
48
48
 
49
49
  # Log benchmark execution
50
- logger.info(f"Running BrowseComp benchmark with {num_examples} examples")
50
+ logger.info(
51
+ f"Running BrowseComp benchmark with {num_examples} examples"
52
+ )
51
53
 
52
54
  try:
53
55
  # Run BrowseComp benchmark
@@ -54,7 +54,9 @@ class CompositeBenchmarkEvaluator:
54
54
  }
55
55
 
56
56
  # Log the weights being used
57
- logger.info(f"Using normalized benchmark weights: {self.normalized_weights}")
57
+ logger.info(
58
+ f"Using normalized benchmark weights: {self.normalized_weights}"
59
+ )
58
60
 
59
61
  def evaluate(
60
62
  self,
@@ -105,7 +107,9 @@ class CompositeBenchmarkEvaluator:
105
107
  combined_score += weighted_contribution
106
108
 
107
109
  except Exception as e:
108
- logger.error(f"Error running {benchmark_name} benchmark: {str(e)}")
110
+ logger.error(
111
+ f"Error running {benchmark_name} benchmark: {str(e)}"
112
+ )
109
113
  all_results[benchmark_name] = {
110
114
  "benchmark_type": benchmark_name,
111
115
  "error": str(e),
@@ -9,9 +9,9 @@ import json
9
9
  import logging
10
10
  import os
11
11
  import time
12
- from typing import Any, Dict, List, Optional
12
+ from typing import Any, Dict
13
+
13
14
 
14
- from local_deep_research.api import quick_summary
15
15
  from ..datasets.base import DatasetRegistry
16
16
  from ..metrics import calculate_metrics, generate_report
17
17
  from ..runners import run_simpleqa_benchmark # Keep for backward compatibility
@@ -134,9 +134,15 @@ class SimpleQAEvaluator(BaseBenchmarkEvaluator):
134
134
 
135
135
  # Set up output files
136
136
  timestamp = time.strftime("%Y%m%d_%H%M%S")
137
- results_file = os.path.join(output_dir, f"simpleqa_{timestamp}_results.jsonl")
138
- evaluation_file = os.path.join(output_dir, f"simpleqa_{timestamp}_evaluation.jsonl")
139
- report_file = os.path.join(output_dir, f"simpleqa_{timestamp}_report.md")
137
+ results_file = os.path.join(
138
+ output_dir, f"simpleqa_{timestamp}_results.jsonl"
139
+ )
140
+ evaluation_file = os.path.join(
141
+ output_dir, f"simpleqa_{timestamp}_evaluation.jsonl"
142
+ )
143
+ report_file = os.path.join(
144
+ output_dir, f"simpleqa_{timestamp}_report.md"
145
+ )
140
146
 
141
147
  # Process each example
142
148
  results = []
@@ -146,7 +152,9 @@ class SimpleQAEvaluator(BaseBenchmarkEvaluator):
146
152
  question = dataset_instance.get_question(example)
147
153
  correct_answer = dataset_instance.get_answer(example)
148
154
 
149
- logger.info(f"Processing {i + 1}/{len(examples)}: {question[:50]}...")
155
+ logger.info(
156
+ f"Processing {i + 1}/{len(examples)}: {question[:50]}..."
157
+ )
150
158
 
151
159
  try:
152
160
  # Format query based on dataset type
@@ -158,18 +166,25 @@ class SimpleQAEvaluator(BaseBenchmarkEvaluator):
158
166
  # Create search config from system_config
159
167
  search_params = {
160
168
  "iterations": system_config.get("iterations", 3),
161
- "questions_per_iteration": system_config.get("questions_per_iteration", 3),
162
- "search_tool": system_config.get("search_tool", "searxng"),
169
+ "questions_per_iteration": system_config.get(
170
+ "questions_per_iteration", 3
171
+ ),
172
+ "search_tool": system_config.get(
173
+ "search_tool", "searxng"
174
+ ),
163
175
  # Note: search_strategy is stored in the config but not passed to quick_summary
164
176
  # as it's not supported by the underlying API
165
177
  }
166
178
 
167
179
  # Get response from LDR
168
180
  from local_deep_research.api import quick_summary
181
+
169
182
  search_result = quick_summary(
170
183
  query=formatted_query,
171
184
  iterations=search_params.get("iterations"),
172
- questions_per_iteration=search_params.get("questions_per_iteration"),
185
+ questions_per_iteration=search_params.get(
186
+ "questions_per_iteration"
187
+ ),
173
188
  search_tool=search_params.get("search_tool"),
174
189
  )
175
190
 
@@ -181,7 +196,10 @@ class SimpleQAEvaluator(BaseBenchmarkEvaluator):
181
196
 
182
197
  # Extract structured answer
183
198
  from ..graders import extract_answer_from_response
184
- extracted = extract_answer_from_response(response, "simpleqa")
199
+
200
+ extracted = extract_answer_from_response(
201
+ response, "simpleqa"
202
+ )
185
203
 
186
204
  # Format result
187
205
  result = {
@@ -224,7 +242,8 @@ class SimpleQAEvaluator(BaseBenchmarkEvaluator):
224
242
 
225
243
  # Grade results
226
244
  from ..graders import grade_results
227
- evaluation_results = grade_results(
245
+
246
+ grade_results(
228
247
  results_file=results_file,
229
248
  output_file=evaluation_file,
230
249
  dataset_type="simpleqa",
@@ -244,9 +263,13 @@ class SimpleQAEvaluator(BaseBenchmarkEvaluator):
244
263
  "Dataset": "SimpleQA",
245
264
  "Examples": len(examples),
246
265
  "Iterations": search_params.get("iterations", 3),
247
- "Questions per iteration": search_params.get("questions_per_iteration", 3),
266
+ "Questions per iteration": search_params.get(
267
+ "questions_per_iteration", 3
268
+ ),
248
269
  "Search tool": search_params.get("search_tool", "searxng"),
249
- "Search strategy": search_params.get("search_strategy", "source_based"),
270
+ "Search strategy": search_params.get(
271
+ "search_strategy", "source_based"
272
+ ),
250
273
  },
251
274
  )
252
275
 
@@ -59,7 +59,9 @@ def get_evaluation_llm(custom_config: Optional[Dict[str, Any]] = None):
59
59
  "api_key",
60
60
  }
61
61
 
62
- filtered_config = {k: v for k, v in config.items() if k in ldr_supported_params}
62
+ filtered_config = {
63
+ k: v for k, v in config.items() if k in ldr_supported_params
64
+ }
63
65
 
64
66
  # Check if we're using openai_endpoint but don't have an API key configured
65
67
  if filtered_config.get("provider") == "openai_endpoint":
@@ -182,7 +184,9 @@ def grade_results(
182
184
 
183
185
  try:
184
186
  # Grade using LLM
185
- if hasattr(evaluation_llm, "invoke") and callable(evaluation_llm.invoke):
187
+ if hasattr(evaluation_llm, "invoke") and callable(
188
+ evaluation_llm.invoke
189
+ ):
186
190
  if hasattr(evaluation_llm, "chat_messages"):
187
191
  # Handle ChatOpenAI and similar models that use messages
188
192
  grading_response = evaluation_llm.invoke(
@@ -214,7 +218,9 @@ def grade_results(
214
218
  grading_response,
215
219
  re.DOTALL,
216
220
  )
217
- reasoning = reasoning_match.group(1).strip() if reasoning_match else ""
221
+ reasoning = (
222
+ reasoning_match.group(1).strip() if reasoning_match else ""
223
+ )
218
224
 
219
225
  correct_match = re.search(
220
226
  r"correct:\s*(yes|no)", grading_response, re.IGNORECASE
@@ -225,8 +231,12 @@ def grade_results(
225
231
  else False
226
232
  )
227
233
 
228
- confidence_match = re.search(r"confidence:\s*(\d+)", grading_response)
229
- confidence = confidence_match.group(1) if confidence_match else "100"
234
+ confidence_match = re.search(
235
+ r"confidence:\s*(\d+)", grading_response
236
+ )
237
+ confidence = (
238
+ confidence_match.group(1) if confidence_match else "100"
239
+ )
230
240
  else:
231
241
  # SimpleQA extraction
232
242
  extracted_answer_match = re.search(
@@ -239,9 +249,13 @@ def grade_results(
239
249
  )
240
250
 
241
251
  reasoning_match = re.search(
242
- r"Reasoning:\s*(.*?)(?:\nCorrect:|\Z)", grading_response, re.DOTALL
252
+ r"Reasoning:\s*(.*?)(?:\nCorrect:|\Z)",
253
+ grading_response,
254
+ re.DOTALL,
255
+ )
256
+ reasoning = (
257
+ reasoning_match.group(1).strip() if reasoning_match else ""
243
258
  )
244
- reasoning = reasoning_match.group(1).strip() if reasoning_match else ""
245
259
 
246
260
  correct_match = re.search(
247
261
  r"Correct:\s*(yes|no)", grading_response, re.IGNORECASE
@@ -304,7 +318,11 @@ def grade_results(
304
318
  progress_callback(
305
319
  idx,
306
320
  len(results),
307
- {"status": "error", "error": str(e), "result": error_result},
321
+ {
322
+ "status": "error",
323
+ "error": str(e),
324
+ "result": error_result,
325
+ },
308
326
  )
309
327
 
310
328
  accuracy = correct_count / len(results) if results else 0
@@ -366,7 +384,9 @@ def human_evaluation(
366
384
  # Get human judgment
367
385
  while True:
368
386
  judgment = (
369
- input("\nIs the model's answer correct? (y/n): ").strip().lower()
387
+ input("\nIs the model's answer correct? (y/n): ")
388
+ .strip()
389
+ .lower()
370
390
  )
371
391
  if judgment in ["y", "n"]:
372
392
  break
@@ -375,7 +395,9 @@ def human_evaluation(
375
395
  is_correct = judgment == "y"
376
396
 
377
397
  # Get reasoning
378
- reasoning = input("Please provide reasoning for your judgment: ").strip()
398
+ reasoning = input(
399
+ "Please provide reasoning for your judgment: "
400
+ ).strip()
379
401
  else:
380
402
  # Non-interactive mode - placeholder for API/UI implementation
381
403
  # In a real implementation, this would be filled by UI actions
@@ -77,4 +77,4 @@ fig = plot_optimization_history(
77
77
  best_values=[0.5, 0.6, 0.7, 0.7, 0.8],
78
78
  output_file="optimization_history.png"
79
79
  )
80
- ```
80
+ ```
@@ -11,7 +11,7 @@ import os
11
11
  import tempfile
12
12
  import time
13
13
  from datetime import datetime
14
- from typing import Any, Dict, List, Optional, Union
14
+ from typing import Any, Dict, Optional
15
15
 
16
16
  logger = logging.getLogger(__name__)
17
17
 
@@ -50,7 +50,9 @@ def calculate_metrics(results_file: str) -> Dict[str, Any]:
50
50
  processing_times = [
51
51
  r.get("processing_time", 0) for r in results if "processing_time" in r
52
52
  ]
53
- avg_time = sum(processing_times) / len(processing_times) if processing_times else 0
53
+ avg_time = (
54
+ sum(processing_times) / len(processing_times) if processing_times else 0
55
+ )
54
56
 
55
57
  # Average confidence if available
56
58
  confidence_values = []
@@ -62,7 +64,9 @@ def calculate_metrics(results_file: str) -> Dict[str, Any]:
62
64
  pass
63
65
 
64
66
  avg_confidence = (
65
- sum(confidence_values) / len(confidence_values) if confidence_values else 0
67
+ sum(confidence_values) / len(confidence_values)
68
+ if confidence_values
69
+ else 0
66
70
  )
67
71
 
68
72
  # Calculate error rate
@@ -100,7 +104,9 @@ def calculate_metrics(results_file: str) -> Dict[str, Any]:
100
104
  "total": counts["total"],
101
105
  "correct": counts["correct"],
102
106
  "accuracy": (
103
- counts["correct"] / counts["total"] if counts["total"] else 0
107
+ counts["correct"] / counts["total"]
108
+ if counts["total"]
109
+ else 0
104
110
  ),
105
111
  }
106
112
  metrics["categories"] = category_metrics
@@ -136,7 +142,9 @@ def evaluate_benchmark_quality(
136
142
  # Create search configuration from system config
137
143
  search_config = {
138
144
  "iterations": system_config.get("iterations", 2),
139
- "questions_per_iteration": system_config.get("questions_per_iteration", 2),
145
+ "questions_per_iteration": system_config.get(
146
+ "questions_per_iteration", 2
147
+ ),
140
148
  "search_strategy": system_config.get("search_strategy", "iterdrag"),
141
149
  "search_tool": system_config.get("search_tool", "searxng"),
142
150
  "model_name": system_config.get("model_name"),
@@ -174,7 +182,9 @@ def evaluate_benchmark_quality(
174
182
  try:
175
183
  shutil.rmtree(temp_dir)
176
184
  except Exception as e:
177
- logger.warning(f"Failed to clean up temporary directory: {str(e)}")
185
+ logger.warning(
186
+ f"Failed to clean up temporary directory: {str(e)}"
187
+ )
178
188
 
179
189
 
180
190
  def measure_execution_time(
@@ -216,7 +226,7 @@ def measure_execution_time(
216
226
 
217
227
  try:
218
228
  for i in range(num_runs):
219
- logger.info(f"Executing speed test run {i+1}/{num_runs}")
229
+ logger.info(f"Executing speed test run {i + 1}/{num_runs}")
220
230
  start_time = time.time()
221
231
  system.search(query, full_response=False)
222
232
  end_time = time.time()
@@ -264,7 +274,9 @@ def calculate_quality_metrics(
264
274
  """
265
275
  # Run quality evaluation
266
276
  quality_results = evaluate_benchmark_quality(
267
- system_config=system_config, num_examples=num_examples, output_dir=output_dir
277
+ system_config=system_config,
278
+ num_examples=num_examples,
279
+ output_dir=output_dir,
268
280
  )
269
281
 
270
282
  # Return normalized quality score
@@ -337,7 +349,10 @@ def calculate_resource_metrics(
337
349
  # Normalize to 0-1 scale (lower is better)
338
350
  resource_score = 1.0 / (1.0 + (complexity / 4.0))
339
351
 
340
- return {"resource_score": resource_score, "estimated_complexity": complexity}
352
+ return {
353
+ "resource_score": resource_score,
354
+ "estimated_complexity": complexity,
355
+ }
341
356
 
342
357
 
343
358
  def calculate_combined_score(
@@ -382,4 +397,4 @@ def calculate_combined_score(
382
397
  resource_score = metrics["resource"].get("resource_score", 0.0)
383
398
  score += resource_score * norm_weights["resource"]
384
399
 
385
- return score
400
+ return score
@@ -46,7 +46,9 @@ def generate_report(
46
46
  # Sample up to 5 correct and 5 incorrect examples
47
47
  correct_examples = [r for r in results if r.get("is_correct", False)][:5]
48
48
  incorrect_examples = [
49
- r for r in results if "is_correct" in r and not r.get("is_correct", False)
49
+ r
50
+ for r in results
51
+ if "is_correct" in r and not r.get("is_correct", False)
50
52
  ][:5]
51
53
 
52
54
  # Create report
@@ -67,7 +69,9 @@ def generate_report(
67
69
  )
68
70
 
69
71
  if "average_confidence" in metrics:
70
- report.append(f"- **Average Confidence**: {metrics['average_confidence']:.2f}%")
72
+ report.append(
73
+ f"- **Average Confidence**: {metrics['average_confidence']:.2f}%"
74
+ )
71
75
 
72
76
  if "error_count" in metrics and metrics["error_count"] > 0:
73
77
  report.append(f"- **Error Count**: {metrics['error_count']}")
@@ -152,4 +156,4 @@ def generate_report(
152
156
  f.write("\n".join(report))
153
157
 
154
158
  logger.info(f"Report saved to {output_file}")
155
- return output_file
159
+ return output_file