local-deep-research 0.3.12__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. local_deep_research/__version__.py +1 -1
  2. local_deep_research/advanced_search_system/filters/base_filter.py +2 -3
  3. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +4 -5
  4. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +298 -0
  5. local_deep_research/advanced_search_system/findings/repository.py +0 -3
  6. local_deep_research/advanced_search_system/strategies/base_strategy.py +1 -2
  7. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +14 -18
  8. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +4 -8
  9. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +5 -6
  10. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +2 -2
  11. local_deep_research/advanced_search_system/strategies/standard_strategy.py +9 -7
  12. local_deep_research/api/benchmark_functions.py +288 -0
  13. local_deep_research/api/research_functions.py +8 -4
  14. local_deep_research/benchmarks/README.md +162 -0
  15. local_deep_research/benchmarks/__init__.py +51 -0
  16. local_deep_research/benchmarks/benchmark_functions.py +353 -0
  17. local_deep_research/benchmarks/cli/__init__.py +16 -0
  18. local_deep_research/benchmarks/cli/benchmark_commands.py +338 -0
  19. local_deep_research/benchmarks/cli.py +347 -0
  20. local_deep_research/benchmarks/comparison/__init__.py +12 -0
  21. local_deep_research/benchmarks/comparison/evaluator.py +768 -0
  22. local_deep_research/benchmarks/datasets/__init__.py +53 -0
  23. local_deep_research/benchmarks/datasets/base.py +295 -0
  24. local_deep_research/benchmarks/datasets/browsecomp.py +116 -0
  25. local_deep_research/benchmarks/datasets/custom_dataset_template.py +98 -0
  26. local_deep_research/benchmarks/datasets/simpleqa.py +74 -0
  27. local_deep_research/benchmarks/datasets/utils.py +116 -0
  28. local_deep_research/benchmarks/datasets.py +31 -0
  29. local_deep_research/benchmarks/efficiency/__init__.py +14 -0
  30. local_deep_research/benchmarks/efficiency/resource_monitor.py +367 -0
  31. local_deep_research/benchmarks/efficiency/speed_profiler.py +214 -0
  32. local_deep_research/benchmarks/evaluators/__init__.py +18 -0
  33. local_deep_research/benchmarks/evaluators/base.py +74 -0
  34. local_deep_research/benchmarks/evaluators/browsecomp.py +83 -0
  35. local_deep_research/benchmarks/evaluators/composite.py +121 -0
  36. local_deep_research/benchmarks/evaluators/simpleqa.py +271 -0
  37. local_deep_research/benchmarks/graders.py +410 -0
  38. local_deep_research/benchmarks/metrics/README.md +80 -0
  39. local_deep_research/benchmarks/metrics/__init__.py +24 -0
  40. local_deep_research/benchmarks/metrics/calculation.py +385 -0
  41. local_deep_research/benchmarks/metrics/reporting.py +155 -0
  42. local_deep_research/benchmarks/metrics/visualization.py +205 -0
  43. local_deep_research/benchmarks/metrics.py +11 -0
  44. local_deep_research/benchmarks/optimization/__init__.py +32 -0
  45. local_deep_research/benchmarks/optimization/api.py +274 -0
  46. local_deep_research/benchmarks/optimization/metrics.py +20 -0
  47. local_deep_research/benchmarks/optimization/optuna_optimizer.py +1163 -0
  48. local_deep_research/benchmarks/runners.py +434 -0
  49. local_deep_research/benchmarks/templates.py +65 -0
  50. local_deep_research/config/llm_config.py +26 -23
  51. local_deep_research/config/search_config.py +1 -5
  52. local_deep_research/defaults/default_settings.json +108 -7
  53. local_deep_research/search_system.py +16 -8
  54. local_deep_research/utilities/db_utils.py +3 -6
  55. local_deep_research/utilities/es_utils.py +441 -0
  56. local_deep_research/utilities/log_utils.py +36 -0
  57. local_deep_research/utilities/search_utilities.py +8 -9
  58. local_deep_research/web/app.py +7 -9
  59. local_deep_research/web/app_factory.py +9 -12
  60. local_deep_research/web/database/migrations.py +8 -5
  61. local_deep_research/web/database/models.py +20 -0
  62. local_deep_research/web/database/schema_upgrade.py +5 -8
  63. local_deep_research/web/models/database.py +15 -18
  64. local_deep_research/web/routes/benchmark_routes.py +427 -0
  65. local_deep_research/web/routes/research_routes.py +13 -17
  66. local_deep_research/web/routes/settings_routes.py +264 -67
  67. local_deep_research/web/services/research_service.py +47 -57
  68. local_deep_research/web/services/settings_manager.py +1 -4
  69. local_deep_research/web/services/settings_service.py +4 -6
  70. local_deep_research/web/static/css/styles.css +12 -0
  71. local_deep_research/web/static/js/components/logpanel.js +164 -155
  72. local_deep_research/web/static/js/components/research.js +44 -3
  73. local_deep_research/web/static/js/components/settings.js +27 -0
  74. local_deep_research/web/static/js/services/socket.js +47 -0
  75. local_deep_research/web_search_engines/default_search_engines.py +38 -0
  76. local_deep_research/web_search_engines/engines/meta_search_engine.py +100 -33
  77. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +31 -17
  78. local_deep_research/web_search_engines/engines/search_engine_brave.py +8 -3
  79. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +343 -0
  80. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +14 -6
  81. local_deep_research/web_search_engines/engines/search_engine_local.py +19 -23
  82. local_deep_research/web_search_engines/engines/search_engine_local_all.py +9 -12
  83. local_deep_research/web_search_engines/engines/search_engine_searxng.py +12 -17
  84. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +8 -4
  85. local_deep_research/web_search_engines/search_engine_base.py +22 -5
  86. local_deep_research/web_search_engines/search_engine_factory.py +32 -11
  87. local_deep_research/web_search_engines/search_engines_config.py +14 -1
  88. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/METADATA +10 -2
  89. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/RECORD +92 -49
  90. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/WHEEL +0 -0
  91. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/entry_points.txt +0 -0
  92. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,214 @@
1
+ """
2
+ Speed profiling tools for Local Deep Research.
3
+
4
+ This module provides functionality for measuring execution time
5
+ of different components and processes in the research system.
6
+ """
7
+
8
+ import logging
9
+ import time
10
+ from contextlib import contextmanager
11
+ from typing import Dict, List, Optional, Any, Callable
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class SpeedProfiler:
17
+ """
18
+ Profiler for tracking execution speed of components.
19
+
20
+ This class provides methods for timing operations and
21
+ collecting performance statistics for later analysis.
22
+ """
23
+
24
+ def __init__(self):
25
+ """Initialize the profiler with empty timing data."""
26
+ self.timings = {}
27
+ self.current_timers = {}
28
+ self.total_start_time = None
29
+ self.total_end_time = None
30
+
31
+ def start(self):
32
+ """Start the global profiling session."""
33
+ self.timings = {}
34
+ self.current_timers = {}
35
+ self.total_start_time = time.time()
36
+
37
+ def stop(self):
38
+ """Stop the global profiling session."""
39
+ self.total_end_time = time.time()
40
+
41
+ # Stop any timers that are still running
42
+ for name in list(self.current_timers.keys()):
43
+ self.stop_timer(name)
44
+
45
+ def start_timer(self, name: str):
46
+ """
47
+ Start a named timer.
48
+
49
+ Args:
50
+ name: Name of the timer to start
51
+ """
52
+ if name in self.current_timers:
53
+ logger.warning(f"Timer '{name}' is already running. Restarting.")
54
+
55
+ self.current_timers[name] = time.time()
56
+
57
+ def stop_timer(self, name: str):
58
+ """
59
+ Stop a named timer and record the elapsed time.
60
+
61
+ Args:
62
+ name: Name of the timer to stop
63
+ """
64
+ if name not in self.current_timers:
65
+ logger.warning(f"Timer '{name}' was not started.")
66
+ return
67
+
68
+ elapsed = time.time() - self.current_timers[name]
69
+
70
+ if name not in self.timings:
71
+ self.timings[name] = {
72
+ "total": elapsed,
73
+ "count": 1,
74
+ "min": elapsed,
75
+ "max": elapsed,
76
+ "starts": [self.current_timers[name]],
77
+ "durations": [elapsed]
78
+ }
79
+ else:
80
+ self.timings[name]["total"] += elapsed
81
+ self.timings[name]["count"] += 1
82
+ self.timings[name]["min"] = min(self.timings[name]["min"], elapsed)
83
+ self.timings[name]["max"] = max(self.timings[name]["max"], elapsed)
84
+ self.timings[name]["starts"].append(self.current_timers[name])
85
+ self.timings[name]["durations"].append(elapsed)
86
+
87
+ del self.current_timers[name]
88
+
89
+ @contextmanager
90
+ def timer(self, name: str):
91
+ """
92
+ Context manager for timing a block of code.
93
+
94
+ Args:
95
+ name: Name of the timer
96
+
97
+ Example:
98
+ with profiler.timer("my_operation"):
99
+ # Code to time
100
+ do_something()
101
+ """
102
+ self.start_timer(name)
103
+ try:
104
+ yield
105
+ finally:
106
+ self.stop_timer(name)
107
+
108
+ def get_timings(self) -> Dict[str, Any]:
109
+ """
110
+ Get all recorded timings.
111
+
112
+ Returns:
113
+ Dictionary of timing data for all measured operations
114
+ """
115
+ result = self.timings.copy()
116
+
117
+ # Add averages
118
+ for name, data in result.items():
119
+ if data["count"] > 0:
120
+ data["avg"] = data["total"] / data["count"]
121
+
122
+ # Add total duration
123
+ if self.total_start_time is not None and self.total_end_time is not None:
124
+ result["total"] = {
125
+ "total": self.total_end_time - self.total_start_time,
126
+ "count": 1,
127
+ "min": self.total_end_time - self.total_start_time,
128
+ "max": self.total_end_time - self.total_start_time,
129
+ "avg": self.total_end_time - self.total_start_time,
130
+ "starts": [self.total_start_time],
131
+ "durations": [self.total_end_time - self.total_start_time]
132
+ }
133
+
134
+ return result
135
+
136
+ def get_summary(self) -> Dict[str, float]:
137
+ """
138
+ Get a summary of timing information.
139
+
140
+ Returns:
141
+ Dictionary with summary statistics
142
+ """
143
+ timings = self.get_timings()
144
+ summary = {}
145
+
146
+ # Total duration
147
+ if "total" in timings:
148
+ summary["total_duration"] = timings["total"]["total"]
149
+ elif self.total_start_time is not None and self.total_end_time is not None:
150
+ summary["total_duration"] = self.total_end_time - self.total_start_time
151
+ else:
152
+ summary["total_duration"] = sum(t["total"] for t in timings.values())
153
+
154
+ # Component durations
155
+ for name, data in timings.items():
156
+ if name != "total":
157
+ summary[f"{name}_duration"] = data["total"]
158
+ summary[f"{name}_percent"] = (
159
+ data["total"] / summary["total_duration"] * 100
160
+ if summary["total_duration"] > 0 else 0
161
+ )
162
+
163
+ # Per-operation breakdowns
164
+ for name, data in timings.items():
165
+ if data["count"] > 0:
166
+ summary[f"{name}_per_operation"] = data["total"] / data["count"]
167
+
168
+ return summary
169
+
170
+ def print_summary(self):
171
+ """Print a formatted summary of timing information."""
172
+ summary = self.get_summary()
173
+ total = summary.get("total_duration", 0)
174
+
175
+ print("\n===== SPEED PROFILE SUMMARY =====")
176
+ print(f"Total execution time: {total:.2f} seconds")
177
+ print("\n--- Component Breakdown ---")
178
+
179
+ # Print each component's timing
180
+ for name, data in self.timings.items():
181
+ if name != "total":
182
+ percent = data["total"] / total * 100 if total > 0 else 0
183
+ print(f"{name}: {data['total']:.2f}s ({percent:.1f}%) - "
184
+ f"{data['count']} calls, avg {data['total'] / data['count']:.3f}s per call")
185
+
186
+ print("\n==============================")
187
+
188
+
189
+ def time_function(func: Callable) -> Callable:
190
+ """
191
+ Decorator to time a function's execution.
192
+
193
+ Args:
194
+ func: Function to time
195
+
196
+ Returns:
197
+ Wrapped function that logs its execution time
198
+
199
+ Example:
200
+ @time_function
201
+ def my_slow_function():
202
+ # Some slow code
203
+ pass
204
+ """
205
+ def wrapper(*args, **kwargs):
206
+ start_time = time.time()
207
+ result = func(*args, **kwargs)
208
+ elapsed = time.time() - start_time
209
+
210
+ logger.info(f"{func.__name__} took {elapsed:.3f} seconds")
211
+
212
+ return result
213
+
214
+ return wrapper
@@ -0,0 +1,18 @@
1
+ """
2
+ Benchmark evaluation package.
3
+
4
+ This package provides evaluators for different benchmark types and
5
+ a composite evaluator for weighted multi-benchmark evaluation.
6
+ """
7
+
8
+ from .base import BaseBenchmarkEvaluator
9
+ from .browsecomp import BrowseCompEvaluator
10
+ from .composite import CompositeBenchmarkEvaluator
11
+ from .simpleqa import SimpleQAEvaluator
12
+
13
+ __all__ = [
14
+ "BaseBenchmarkEvaluator",
15
+ "SimpleQAEvaluator",
16
+ "BrowseCompEvaluator",
17
+ "CompositeBenchmarkEvaluator",
18
+ ]
@@ -0,0 +1,74 @@
1
+ """
2
+ Base class for benchmark evaluators.
3
+
4
+ This module defines the abstract base class that all benchmark evaluators
5
+ must implement, establishing a common interface for different benchmark types.
6
+ """
7
+
8
+ import logging
9
+ import os
10
+ from abc import ABC, abstractmethod
11
+ from typing import Any, Dict
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class BaseBenchmarkEvaluator(ABC):
17
+ """
18
+ Abstract base class for benchmark evaluators.
19
+
20
+ All benchmark evaluator implementations must inherit from this class and
21
+ implement the evaluate method to run their specific benchmark type.
22
+ """
23
+
24
+ def __init__(self, name: str):
25
+ """
26
+ Initialize benchmark evaluator with a name.
27
+
28
+ Args:
29
+ name: Unique identifier for this benchmark type
30
+ """
31
+ self.name = name
32
+
33
+ def get_name(self) -> str:
34
+ """
35
+ Get the benchmark name.
36
+
37
+ Returns:
38
+ The benchmark identifier
39
+ """
40
+ return self.name
41
+
42
+ @abstractmethod
43
+ def evaluate(
44
+ self,
45
+ system_config: Dict[str, Any],
46
+ num_examples: int,
47
+ output_dir: str,
48
+ ) -> Dict[str, Any]:
49
+ """
50
+ Run benchmark evaluation with given system configuration.
51
+
52
+ Args:
53
+ system_config: Configuration parameters for the system under test
54
+ num_examples: Number of benchmark examples to evaluate
55
+ output_dir: Directory to save evaluation results
56
+
57
+ Returns:
58
+ Dictionary with evaluation metrics including quality_score (0-1)
59
+ """
60
+ pass
61
+
62
+ def _create_subdirectory(self, output_dir: str) -> str:
63
+ """
64
+ Create a benchmark-specific subdirectory for output.
65
+
66
+ Args:
67
+ output_dir: Parent directory for output
68
+
69
+ Returns:
70
+ Path to the benchmark-specific directory
71
+ """
72
+ benchmark_dir = os.path.join(output_dir, self.name)
73
+ os.makedirs(benchmark_dir, exist_ok=True)
74
+ return benchmark_dir
@@ -0,0 +1,83 @@
1
+ """
2
+ BrowseComp benchmark evaluator.
3
+
4
+ This module provides a benchmark evaluator implementation for the BrowseComp
5
+ benchmark, which tests browsing comprehension capabilities.
6
+ """
7
+
8
+ import logging
9
+ from typing import Any, Dict
10
+
11
+ from ..runners import run_browsecomp_benchmark
12
+ from .base import BaseBenchmarkEvaluator
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class BrowseCompEvaluator(BaseBenchmarkEvaluator):
18
+ """
19
+ Evaluator for the BrowseComp benchmark.
20
+
21
+ This evaluator runs the BrowseComp benchmark, which tests a system's ability
22
+ to accurately comprehend and answer questions from web browsing.
23
+ """
24
+
25
+ def __init__(self):
26
+ """Initialize the BrowseComp evaluator."""
27
+ super().__init__("browsecomp")
28
+
29
+ def evaluate(
30
+ self,
31
+ system_config: Dict[str, Any],
32
+ num_examples: int,
33
+ output_dir: str,
34
+ ) -> Dict[str, Any]:
35
+ """
36
+ Run BrowseComp benchmark and return metrics.
37
+
38
+ Args:
39
+ system_config: Search and LLM configuration parameters
40
+ num_examples: Number of benchmark examples to run
41
+ output_dir: Directory to save evaluation results
42
+
43
+ Returns:
44
+ Dictionary with metrics including quality_score based on accuracy
45
+ """
46
+ # Create benchmark-specific directory
47
+ benchmark_dir = self._create_subdirectory(output_dir)
48
+
49
+ # Log benchmark execution
50
+ logger.info(f"Running BrowseComp benchmark with {num_examples} examples")
51
+
52
+ try:
53
+ # Run BrowseComp benchmark
54
+ results = run_browsecomp_benchmark(
55
+ num_examples=num_examples,
56
+ output_dir=benchmark_dir,
57
+ search_config=system_config,
58
+ run_evaluation=True,
59
+ )
60
+
61
+ # Extract metrics
62
+ metrics = results.get("metrics", {})
63
+ accuracy = metrics.get("accuracy", 0.0)
64
+
65
+ # Return evaluation results with quality score
66
+ return {
67
+ "benchmark_type": self.name,
68
+ "accuracy": accuracy,
69
+ "quality_score": accuracy, # Map accuracy directly to quality score
70
+ "raw_results": results,
71
+ "report_path": results.get("report_path"),
72
+ }
73
+
74
+ except Exception as e:
75
+ logger.error(f"Error in BrowseComp evaluation: {str(e)}")
76
+
77
+ # Return error information
78
+ return {
79
+ "benchmark_type": self.name,
80
+ "error": str(e),
81
+ "quality_score": 0.0,
82
+ "accuracy": 0.0,
83
+ }
@@ -0,0 +1,121 @@
1
+ """
2
+ Composite benchmark evaluator.
3
+
4
+ This module provides a composite evaluator that can run multiple benchmarks
5
+ with weighted scores to provide a comprehensive evaluation.
6
+ """
7
+
8
+ import logging
9
+ from typing import Any, Dict, Optional
10
+
11
+ # Import specific evaluator implementations
12
+ from .browsecomp import BrowseCompEvaluator
13
+ from .simpleqa import SimpleQAEvaluator
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class CompositeBenchmarkEvaluator:
19
+ """
20
+ Evaluator that combines multiple benchmarks with weighted scores.
21
+
22
+ This evaluator runs multiple benchmark types and combines their scores
23
+ according to specified weights, enabling comprehensive evaluation across
24
+ different metrics and tasks.
25
+ """
26
+
27
+ def __init__(self, benchmark_weights: Optional[Dict[str, float]] = None):
28
+ """
29
+ Initialize with benchmark weights.
30
+
31
+ Args:
32
+ benchmark_weights: Dictionary mapping benchmark names to weights
33
+ Default: {"simpleqa": 1.0}
34
+ """
35
+ # Default to SimpleQA only if no weights provided
36
+ self.benchmark_weights = benchmark_weights or {"simpleqa": 1.0}
37
+
38
+ # Create evaluators for available benchmarks
39
+ self.evaluators = {
40
+ "simpleqa": SimpleQAEvaluator(),
41
+ "browsecomp": BrowseCompEvaluator(),
42
+ }
43
+
44
+ # Normalize weights to sum to 1.0
45
+ total_weight = sum(self.benchmark_weights.values())
46
+ if total_weight <= 0:
47
+ logger.warning(
48
+ "Total benchmark weight is zero or negative. Using default weights."
49
+ )
50
+ self.normalized_weights = {"simpleqa": 1.0}
51
+ else:
52
+ self.normalized_weights = {
53
+ k: w / total_weight for k, w in self.benchmark_weights.items()
54
+ }
55
+
56
+ # Log the weights being used
57
+ logger.info(f"Using normalized benchmark weights: {self.normalized_weights}")
58
+
59
+ def evaluate(
60
+ self,
61
+ system_config: Dict[str, Any],
62
+ num_examples: int,
63
+ output_dir: str,
64
+ ) -> Dict[str, Any]:
65
+ """
66
+ Run all requested benchmarks and compute weighted score.
67
+
68
+ Args:
69
+ system_config: Configuration parameters for the system under test
70
+ num_examples: Number of benchmark examples to evaluate
71
+ output_dir: Directory to save evaluation results
72
+
73
+ Returns:
74
+ Dictionary with combined metrics and individual benchmark results
75
+ """
76
+ all_results = {}
77
+ combined_score = 0.0
78
+
79
+ # Run each benchmark with weight > 0
80
+ for benchmark_name, weight in self.normalized_weights.items():
81
+ if weight > 0 and benchmark_name in self.evaluators:
82
+ evaluator = self.evaluators[benchmark_name]
83
+
84
+ try:
85
+ # Run benchmark evaluation
86
+ result = evaluator.evaluate(
87
+ system_config=system_config,
88
+ num_examples=num_examples,
89
+ output_dir=output_dir,
90
+ )
91
+
92
+ # Store individual results
93
+ all_results[benchmark_name] = result
94
+
95
+ # Calculate weighted contribution to combined score
96
+ quality_score = result.get("quality_score", 0.0)
97
+ weighted_contribution = quality_score * weight
98
+
99
+ logger.info(
100
+ f"Benchmark {benchmark_name}: score={quality_score:.4f}, "
101
+ f"weight={weight:.2f}, contribution={weighted_contribution:.4f}"
102
+ )
103
+
104
+ # Add to combined score
105
+ combined_score += weighted_contribution
106
+
107
+ except Exception as e:
108
+ logger.error(f"Error running {benchmark_name} benchmark: {str(e)}")
109
+ all_results[benchmark_name] = {
110
+ "benchmark_type": benchmark_name,
111
+ "error": str(e),
112
+ "quality_score": 0.0,
113
+ }
114
+
115
+ # Return combined results
116
+ return {
117
+ "quality_score": combined_score,
118
+ "benchmark_results": all_results,
119
+ "benchmark_weights": self.normalized_weights,
120
+ "combined_score": combined_score,
121
+ }