local-deep-research 0.3.12__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +1 -0
- local_deep_research/__version__.py +1 -1
- local_deep_research/advanced_search_system/filters/base_filter.py +2 -3
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +4 -5
- local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +298 -0
- local_deep_research/advanced_search_system/findings/repository.py +0 -3
- local_deep_research/advanced_search_system/strategies/base_strategy.py +1 -2
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +14 -18
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +4 -8
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +5 -6
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +2 -2
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +9 -7
- local_deep_research/api/benchmark_functions.py +288 -0
- local_deep_research/api/research_functions.py +8 -4
- local_deep_research/benchmarks/README.md +162 -0
- local_deep_research/benchmarks/__init__.py +51 -0
- local_deep_research/benchmarks/benchmark_functions.py +353 -0
- local_deep_research/benchmarks/cli/__init__.py +16 -0
- local_deep_research/benchmarks/cli/benchmark_commands.py +338 -0
- local_deep_research/benchmarks/cli.py +347 -0
- local_deep_research/benchmarks/comparison/__init__.py +12 -0
- local_deep_research/benchmarks/comparison/evaluator.py +768 -0
- local_deep_research/benchmarks/datasets/__init__.py +53 -0
- local_deep_research/benchmarks/datasets/base.py +295 -0
- local_deep_research/benchmarks/datasets/browsecomp.py +116 -0
- local_deep_research/benchmarks/datasets/custom_dataset_template.py +98 -0
- local_deep_research/benchmarks/datasets/simpleqa.py +74 -0
- local_deep_research/benchmarks/datasets/utils.py +116 -0
- local_deep_research/benchmarks/datasets.py +31 -0
- local_deep_research/benchmarks/efficiency/__init__.py +14 -0
- local_deep_research/benchmarks/efficiency/resource_monitor.py +367 -0
- local_deep_research/benchmarks/efficiency/speed_profiler.py +214 -0
- local_deep_research/benchmarks/evaluators/__init__.py +18 -0
- local_deep_research/benchmarks/evaluators/base.py +74 -0
- local_deep_research/benchmarks/evaluators/browsecomp.py +83 -0
- local_deep_research/benchmarks/evaluators/composite.py +121 -0
- local_deep_research/benchmarks/evaluators/simpleqa.py +271 -0
- local_deep_research/benchmarks/graders.py +410 -0
- local_deep_research/benchmarks/metrics/README.md +80 -0
- local_deep_research/benchmarks/metrics/__init__.py +24 -0
- local_deep_research/benchmarks/metrics/calculation.py +385 -0
- local_deep_research/benchmarks/metrics/reporting.py +155 -0
- local_deep_research/benchmarks/metrics/visualization.py +205 -0
- local_deep_research/benchmarks/metrics.py +11 -0
- local_deep_research/benchmarks/optimization/__init__.py +32 -0
- local_deep_research/benchmarks/optimization/api.py +274 -0
- local_deep_research/benchmarks/optimization/metrics.py +20 -0
- local_deep_research/benchmarks/optimization/optuna_optimizer.py +1163 -0
- local_deep_research/benchmarks/runners.py +434 -0
- local_deep_research/benchmarks/templates.py +65 -0
- local_deep_research/config/llm_config.py +26 -23
- local_deep_research/config/search_config.py +1 -5
- local_deep_research/defaults/default_settings.json +108 -7
- local_deep_research/search_system.py +16 -8
- local_deep_research/utilities/db_utils.py +3 -6
- local_deep_research/utilities/es_utils.py +441 -0
- local_deep_research/utilities/log_utils.py +36 -0
- local_deep_research/utilities/search_utilities.py +8 -9
- local_deep_research/web/app.py +15 -10
- local_deep_research/web/app_factory.py +9 -12
- local_deep_research/web/database/migrations.py +8 -5
- local_deep_research/web/database/models.py +20 -0
- local_deep_research/web/database/schema_upgrade.py +5 -8
- local_deep_research/web/models/database.py +15 -18
- local_deep_research/web/routes/benchmark_routes.py +427 -0
- local_deep_research/web/routes/research_routes.py +13 -17
- local_deep_research/web/routes/settings_routes.py +264 -67
- local_deep_research/web/services/research_service.py +58 -73
- local_deep_research/web/services/settings_manager.py +1 -4
- local_deep_research/web/services/settings_service.py +4 -6
- local_deep_research/web/static/css/styles.css +12 -0
- local_deep_research/web/static/js/components/logpanel.js +164 -155
- local_deep_research/web/static/js/components/research.js +44 -3
- local_deep_research/web/static/js/components/settings.js +27 -0
- local_deep_research/web/static/js/services/socket.js +47 -0
- local_deep_research/web_search_engines/default_search_engines.py +38 -0
- local_deep_research/web_search_engines/engines/meta_search_engine.py +100 -33
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +31 -17
- local_deep_research/web_search_engines/engines/search_engine_brave.py +8 -3
- local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +343 -0
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +14 -6
- local_deep_research/web_search_engines/engines/search_engine_local.py +19 -23
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +9 -12
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +12 -17
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +8 -4
- local_deep_research/web_search_engines/search_engine_base.py +22 -5
- local_deep_research/web_search_engines/search_engine_factory.py +30 -11
- local_deep_research/web_search_engines/search_engines_config.py +14 -1
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.1.dist-info}/METADATA +10 -2
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.1.dist-info}/RECORD +93 -51
- local_deep_research/app.py +0 -8
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.1.dist-info}/WHEEL +0 -0
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.1.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,205 @@
|
|
1
|
+
"""
|
2
|
+
Visualization utilities for optimization results.
|
3
|
+
|
4
|
+
This module provides functions for generating visual representations
|
5
|
+
of benchmark and optimization results.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
import os
|
10
|
+
from typing import Dict, List, Optional, Tuple, Union
|
11
|
+
|
12
|
+
import numpy as np
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
# Check if matplotlib is available
|
17
|
+
try:
|
18
|
+
import matplotlib.pyplot as plt
|
19
|
+
from matplotlib.figure import Figure
|
20
|
+
|
21
|
+
MATPLOTLIB_AVAILABLE = True
|
22
|
+
except ImportError:
|
23
|
+
MATPLOTLIB_AVAILABLE = False
|
24
|
+
logger.warning("Matplotlib not available. Visualization functions will be limited.")
|
25
|
+
|
26
|
+
|
27
|
+
def plot_optimization_history(
|
28
|
+
trial_values: List[float],
|
29
|
+
best_values: List[float],
|
30
|
+
output_file: Optional[str] = None,
|
31
|
+
title: str = "Optimization History",
|
32
|
+
) -> Optional[Figure]:
|
33
|
+
"""
|
34
|
+
Plot the optimization history.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
trial_values: List of objective values for each trial
|
38
|
+
best_values: List of best values observed up to each trial
|
39
|
+
output_file: Path to save the plot (if None, returns figure without saving)
|
40
|
+
title: Plot title
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
Matplotlib figure or None if matplotlib is not available
|
44
|
+
"""
|
45
|
+
if not MATPLOTLIB_AVAILABLE:
|
46
|
+
logger.warning("Matplotlib not available. Cannot create plot.")
|
47
|
+
return None
|
48
|
+
|
49
|
+
fig, ax = plt.subplots(figsize=(10, 6))
|
50
|
+
trials = list(range(1, len(trial_values) + 1))
|
51
|
+
|
52
|
+
# Plot trial values and best values
|
53
|
+
ax.plot(trials, trial_values, "o-", alpha=0.5, label="Trial Value")
|
54
|
+
ax.plot(trials, best_values, "r-", label="Best Value")
|
55
|
+
|
56
|
+
# Add labels and title
|
57
|
+
ax.set_xlabel("Trial Number")
|
58
|
+
ax.set_ylabel("Objective Value")
|
59
|
+
ax.set_title(title)
|
60
|
+
ax.grid(True, linestyle="--", alpha=0.7)
|
61
|
+
ax.legend()
|
62
|
+
|
63
|
+
# Save or return
|
64
|
+
if output_file:
|
65
|
+
fig.tight_layout()
|
66
|
+
fig.savefig(output_file, dpi=300, bbox_inches="tight")
|
67
|
+
logger.info(f"Saved optimization history plot to {output_file}")
|
68
|
+
|
69
|
+
return fig
|
70
|
+
|
71
|
+
|
72
|
+
def plot_parameter_importance(
|
73
|
+
parameter_names: List[str],
|
74
|
+
importance_values: List[float],
|
75
|
+
output_file: Optional[str] = None,
|
76
|
+
title: str = "Parameter Importance",
|
77
|
+
) -> Optional[Figure]:
|
78
|
+
"""
|
79
|
+
Plot parameter importance.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
parameter_names: List of parameter names
|
83
|
+
importance_values: List of importance values
|
84
|
+
output_file: Path to save the plot (if None, returns figure without saving)
|
85
|
+
title: Plot title
|
86
|
+
|
87
|
+
Returns:
|
88
|
+
Matplotlib figure or None if matplotlib is not available
|
89
|
+
"""
|
90
|
+
if not MATPLOTLIB_AVAILABLE:
|
91
|
+
logger.warning("Matplotlib not available. Cannot create plot.")
|
92
|
+
return None
|
93
|
+
|
94
|
+
# Sort by importance
|
95
|
+
sorted_indices = np.argsort(importance_values)
|
96
|
+
sorted_names = [parameter_names[i] for i in sorted_indices]
|
97
|
+
sorted_values = [importance_values[i] for i in sorted_indices]
|
98
|
+
|
99
|
+
fig, ax = plt.subplots(figsize=(10, 6))
|
100
|
+
y_pos = range(len(sorted_names))
|
101
|
+
|
102
|
+
# Create horizontal bar chart
|
103
|
+
ax.barh(y_pos, sorted_values, align="center")
|
104
|
+
ax.set_yticks(y_pos)
|
105
|
+
ax.set_yticklabels(sorted_names)
|
106
|
+
ax.invert_yaxis() # Labels read top-to-bottom
|
107
|
+
|
108
|
+
# Add labels and title
|
109
|
+
ax.set_xlabel("Importance")
|
110
|
+
ax.set_title(title)
|
111
|
+
ax.grid(True, linestyle="--", alpha=0.3, axis="x")
|
112
|
+
|
113
|
+
# Save or return
|
114
|
+
if output_file:
|
115
|
+
fig.tight_layout()
|
116
|
+
fig.savefig(output_file, dpi=300, bbox_inches="tight")
|
117
|
+
logger.info(f"Saved parameter importance plot to {output_file}")
|
118
|
+
|
119
|
+
return fig
|
120
|
+
|
121
|
+
|
122
|
+
def plot_quality_vs_speed(
|
123
|
+
quality_scores: List[float],
|
124
|
+
speed_scores: List[float],
|
125
|
+
parameter_values: Optional[List[Dict[str, any]]] = None,
|
126
|
+
output_file: Optional[str] = None,
|
127
|
+
title: str = "Quality vs. Speed Trade-off",
|
128
|
+
) -> Optional[Figure]:
|
129
|
+
"""
|
130
|
+
Plot quality vs. speed trade-off.
|
131
|
+
|
132
|
+
Args:
|
133
|
+
quality_scores: List of quality scores
|
134
|
+
speed_scores: List of speed scores
|
135
|
+
parameter_values: Optional list of parameter dictionaries for each point
|
136
|
+
output_file: Path to save the plot (if None, returns figure without saving)
|
137
|
+
title: Plot title
|
138
|
+
|
139
|
+
Returns:
|
140
|
+
Matplotlib figure or None if matplotlib is not available
|
141
|
+
"""
|
142
|
+
if not MATPLOTLIB_AVAILABLE:
|
143
|
+
logger.warning("Matplotlib not available. Cannot create plot.")
|
144
|
+
return None
|
145
|
+
|
146
|
+
fig, ax = plt.subplots(figsize=(10, 8))
|
147
|
+
|
148
|
+
# Create scatter plot
|
149
|
+
scatter = ax.scatter(
|
150
|
+
speed_scores,
|
151
|
+
quality_scores,
|
152
|
+
c=np.arange(len(quality_scores)),
|
153
|
+
cmap="viridis",
|
154
|
+
alpha=0.7,
|
155
|
+
s=100
|
156
|
+
)
|
157
|
+
|
158
|
+
# Add colorbar to show trial number
|
159
|
+
cbar = plt.colorbar(scatter)
|
160
|
+
cbar.set_label("Trial Number")
|
161
|
+
|
162
|
+
# Add labels and title
|
163
|
+
ax.set_xlabel("Speed Score (higher = faster)")
|
164
|
+
ax.set_ylabel("Quality Score (higher = better)")
|
165
|
+
ax.set_title(title)
|
166
|
+
ax.grid(True, linestyle="--", alpha=0.5)
|
167
|
+
|
168
|
+
# Add reference lines
|
169
|
+
ax.axhline(y=0.7, color="r", linestyle="--", alpha=0.3, label="Good Quality Threshold")
|
170
|
+
ax.axvline(x=0.7, color="g", linestyle="--", alpha=0.3, label="Good Speed Threshold")
|
171
|
+
|
172
|
+
# Mark Pareto frontier
|
173
|
+
if len(quality_scores) > 2:
|
174
|
+
try:
|
175
|
+
# Identify Pareto frontier points
|
176
|
+
pareto_points = []
|
177
|
+
for i in range(len(quality_scores)):
|
178
|
+
is_pareto = True
|
179
|
+
for j in range(len(quality_scores)):
|
180
|
+
if i != j:
|
181
|
+
if quality_scores[j] >= quality_scores[i] and speed_scores[j] >= speed_scores[i]:
|
182
|
+
if quality_scores[j] > quality_scores[i] or speed_scores[j] > speed_scores[i]:
|
183
|
+
is_pareto = False
|
184
|
+
break
|
185
|
+
if is_pareto:
|
186
|
+
pareto_points.append((speed_scores[i], quality_scores[i]))
|
187
|
+
|
188
|
+
# Sort pareto points by speed score
|
189
|
+
pareto_points.sort()
|
190
|
+
if pareto_points:
|
191
|
+
pareto_x, pareto_y = zip(*pareto_points)
|
192
|
+
ax.plot(pareto_x, pareto_y, "k--", label="Pareto Frontier")
|
193
|
+
ax.scatter(pareto_x, pareto_y, c="red", s=50, alpha=0.8)
|
194
|
+
except Exception as e:
|
195
|
+
logger.warning(f"Error calculating Pareto frontier: {e}")
|
196
|
+
|
197
|
+
ax.legend()
|
198
|
+
|
199
|
+
# Save or return
|
200
|
+
if output_file:
|
201
|
+
fig.tight_layout()
|
202
|
+
fig.savefig(output_file, dpi=300, bbox_inches="tight")
|
203
|
+
logger.info(f"Saved quality vs. speed plot to {output_file}")
|
204
|
+
|
205
|
+
return fig
|
@@ -0,0 +1,11 @@
|
|
1
|
+
"""
|
2
|
+
Metrics calculation and report generation.
|
3
|
+
|
4
|
+
This module is maintained for backward compatibility.
|
5
|
+
New code should use the metrics package directly.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from .metrics.calculation import calculate_metrics
|
9
|
+
from .metrics.reporting import generate_report
|
10
|
+
|
11
|
+
__all__ = ["calculate_metrics", "generate_report"]
|
@@ -0,0 +1,32 @@
|
|
1
|
+
"""
|
2
|
+
Optimization submodule for parameter tuning in Local Deep Research.
|
3
|
+
|
4
|
+
This module provides tools for finding optimal parameter configurations
|
5
|
+
for the research system using Optuna and other optimization methods.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from local_deep_research.benchmarks.optimization.api import (
|
9
|
+
optimize_for_efficiency,
|
10
|
+
optimize_for_quality,
|
11
|
+
optimize_for_speed,
|
12
|
+
optimize_parameters,
|
13
|
+
)
|
14
|
+
from local_deep_research.benchmarks.optimization.metrics import (
|
15
|
+
calculate_combined_score,
|
16
|
+
calculate_quality_metrics,
|
17
|
+
calculate_resource_metrics,
|
18
|
+
calculate_speed_metrics,
|
19
|
+
)
|
20
|
+
from local_deep_research.benchmarks.optimization.optuna_optimizer import OptunaOptimizer
|
21
|
+
|
22
|
+
__all__ = [
|
23
|
+
"OptunaOptimizer",
|
24
|
+
"optimize_parameters",
|
25
|
+
"optimize_for_speed",
|
26
|
+
"optimize_for_quality",
|
27
|
+
"optimize_for_efficiency",
|
28
|
+
"calculate_quality_metrics",
|
29
|
+
"calculate_speed_metrics",
|
30
|
+
"calculate_resource_metrics",
|
31
|
+
"calculate_combined_score",
|
32
|
+
]
|
@@ -0,0 +1,274 @@
|
|
1
|
+
"""
|
2
|
+
API functions for optimization tasks in Local Deep Research.
|
3
|
+
|
4
|
+
This module provides a simplified interface for parameter optimization
|
5
|
+
without having to directly work with the optimizer classes.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
import os
|
10
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
11
|
+
|
12
|
+
# No metrics imports needed here, they're used in the OptunaOptimizer
|
13
|
+
from .optuna_optimizer import OptunaOptimizer
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
def optimize_parameters(
|
19
|
+
query: str,
|
20
|
+
param_space: Optional[Dict[str, Any]] = None,
|
21
|
+
output_dir: str = os.path.join("data", "optimization_results"),
|
22
|
+
model_name: Optional[str] = None,
|
23
|
+
provider: Optional[str] = None,
|
24
|
+
search_tool: Optional[str] = None,
|
25
|
+
temperature: float = 0.7,
|
26
|
+
n_trials: int = 30,
|
27
|
+
timeout: Optional[int] = None,
|
28
|
+
n_jobs: int = 1,
|
29
|
+
study_name: Optional[str] = None,
|
30
|
+
optimization_metrics: Optional[List[str]] = None,
|
31
|
+
metric_weights: Optional[Dict[str, float]] = None,
|
32
|
+
progress_callback: Optional[Callable[[int, int, Dict], None]] = None,
|
33
|
+
benchmark_weights: Optional[Dict[str, float]] = None,
|
34
|
+
) -> Tuple[Dict[str, Any], float]:
|
35
|
+
"""
|
36
|
+
Optimize parameters for Local Deep Research.
|
37
|
+
|
38
|
+
Args:
|
39
|
+
query: The research query to use for all experiments
|
40
|
+
param_space: Dictionary defining parameter search spaces (optional)
|
41
|
+
output_dir: Directory to save optimization results
|
42
|
+
model_name: Name of the LLM model to use
|
43
|
+
provider: LLM provider
|
44
|
+
search_tool: Search engine to use
|
45
|
+
temperature: LLM temperature
|
46
|
+
n_trials: Number of parameter combinations to try
|
47
|
+
timeout: Maximum seconds to run optimization (None for no limit)
|
48
|
+
n_jobs: Number of parallel jobs for optimization
|
49
|
+
study_name: Name of the Optuna study
|
50
|
+
optimization_metrics: List of metrics to optimize (default: ["quality", "speed"])
|
51
|
+
metric_weights: Dictionary of weights for each metric
|
52
|
+
progress_callback: Optional callback for progress updates
|
53
|
+
benchmark_weights: Dictionary mapping benchmark types to weights
|
54
|
+
(e.g., {"simpleqa": 0.6, "browsecomp": 0.4})
|
55
|
+
If None, only SimpleQA is used with weight 1.0
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
Tuple of (best_parameters, best_score)
|
59
|
+
"""
|
60
|
+
# Create optimizer
|
61
|
+
optimizer = OptunaOptimizer(
|
62
|
+
base_query=query,
|
63
|
+
output_dir=output_dir,
|
64
|
+
model_name=model_name,
|
65
|
+
provider=provider,
|
66
|
+
search_tool=search_tool,
|
67
|
+
temperature=temperature,
|
68
|
+
n_trials=n_trials,
|
69
|
+
timeout=timeout,
|
70
|
+
n_jobs=n_jobs,
|
71
|
+
study_name=study_name,
|
72
|
+
optimization_metrics=optimization_metrics,
|
73
|
+
metric_weights=metric_weights,
|
74
|
+
progress_callback=progress_callback,
|
75
|
+
benchmark_weights=benchmark_weights,
|
76
|
+
)
|
77
|
+
|
78
|
+
# Run optimization
|
79
|
+
return optimizer.optimize(param_space)
|
80
|
+
|
81
|
+
|
82
|
+
def optimize_for_speed(
|
83
|
+
query: str,
|
84
|
+
n_trials: int = 20,
|
85
|
+
output_dir: str = os.path.join("data", "optimization_results"),
|
86
|
+
model_name: Optional[str] = None,
|
87
|
+
provider: Optional[str] = None,
|
88
|
+
search_tool: Optional[str] = None,
|
89
|
+
progress_callback: Optional[Callable[[int, int, Dict], None]] = None,
|
90
|
+
benchmark_weights: Optional[Dict[str, float]] = None,
|
91
|
+
) -> Tuple[Dict[str, Any], float]:
|
92
|
+
"""
|
93
|
+
Optimize parameters with a focus on speed performance.
|
94
|
+
|
95
|
+
Args:
|
96
|
+
query: The research query to use for all experiments
|
97
|
+
n_trials: Number of parameter combinations to try
|
98
|
+
output_dir: Directory to save optimization results
|
99
|
+
model_name: Name of the LLM model to use
|
100
|
+
provider: LLM provider
|
101
|
+
search_tool: Search engine to use
|
102
|
+
progress_callback: Optional callback for progress updates
|
103
|
+
benchmark_weights: Dictionary mapping benchmark types to weights
|
104
|
+
(e.g., {"simpleqa": 0.6, "browsecomp": 0.4})
|
105
|
+
If None, only SimpleQA is used with weight 1.0
|
106
|
+
|
107
|
+
Returns:
|
108
|
+
Tuple of (best_parameters, best_score)
|
109
|
+
"""
|
110
|
+
# Focus on speed with reduced parameter space
|
111
|
+
param_space = {
|
112
|
+
"iterations": {
|
113
|
+
"type": "int",
|
114
|
+
"low": 1,
|
115
|
+
"high": 3,
|
116
|
+
"step": 1,
|
117
|
+
},
|
118
|
+
"questions_per_iteration": {
|
119
|
+
"type": "int",
|
120
|
+
"low": 1,
|
121
|
+
"high": 3,
|
122
|
+
"step": 1,
|
123
|
+
},
|
124
|
+
"search_strategy": {
|
125
|
+
"type": "categorical",
|
126
|
+
"choices": ["rapid", "parallel", "source_based"],
|
127
|
+
},
|
128
|
+
}
|
129
|
+
|
130
|
+
# Speed-focused weights
|
131
|
+
metric_weights = {"speed": 0.8, "quality": 0.2, "resource": 0.0}
|
132
|
+
|
133
|
+
return optimize_parameters(
|
134
|
+
query=query,
|
135
|
+
param_space=param_space,
|
136
|
+
output_dir=output_dir,
|
137
|
+
model_name=model_name,
|
138
|
+
provider=provider,
|
139
|
+
search_tool=search_tool,
|
140
|
+
n_trials=n_trials,
|
141
|
+
metric_weights=metric_weights,
|
142
|
+
optimization_metrics=["speed", "quality"],
|
143
|
+
progress_callback=progress_callback,
|
144
|
+
benchmark_weights=benchmark_weights,
|
145
|
+
)
|
146
|
+
|
147
|
+
|
148
|
+
def optimize_for_quality(
|
149
|
+
query: str,
|
150
|
+
n_trials: int = 30,
|
151
|
+
output_dir: str = os.path.join("data", "optimization_results"),
|
152
|
+
model_name: Optional[str] = None,
|
153
|
+
provider: Optional[str] = None,
|
154
|
+
search_tool: Optional[str] = None,
|
155
|
+
progress_callback: Optional[Callable[[int, int, Dict], None]] = None,
|
156
|
+
benchmark_weights: Optional[Dict[str, float]] = None,
|
157
|
+
) -> Tuple[Dict[str, Any], float]:
|
158
|
+
"""
|
159
|
+
Optimize parameters with a focus on result quality.
|
160
|
+
|
161
|
+
Args:
|
162
|
+
query: The research query to use for all experiments
|
163
|
+
n_trials: Number of parameter combinations to try
|
164
|
+
output_dir: Directory to save optimization results
|
165
|
+
model_name: Name of the LLM model to use
|
166
|
+
provider: LLM provider
|
167
|
+
search_tool: Search engine to use
|
168
|
+
progress_callback: Optional callback for progress updates
|
169
|
+
benchmark_weights: Dictionary mapping benchmark types to weights
|
170
|
+
(e.g., {"simpleqa": 0.6, "browsecomp": 0.4})
|
171
|
+
If None, only SimpleQA is used with weight 1.0
|
172
|
+
|
173
|
+
Returns:
|
174
|
+
Tuple of (best_parameters, best_score)
|
175
|
+
"""
|
176
|
+
# Quality-focused weights
|
177
|
+
metric_weights = {"quality": 0.9, "speed": 0.1, "resource": 0.0}
|
178
|
+
|
179
|
+
return optimize_parameters(
|
180
|
+
query=query,
|
181
|
+
output_dir=output_dir,
|
182
|
+
model_name=model_name,
|
183
|
+
provider=provider,
|
184
|
+
search_tool=search_tool,
|
185
|
+
n_trials=n_trials,
|
186
|
+
metric_weights=metric_weights,
|
187
|
+
optimization_metrics=["quality", "speed"],
|
188
|
+
progress_callback=progress_callback,
|
189
|
+
benchmark_weights=benchmark_weights,
|
190
|
+
)
|
191
|
+
|
192
|
+
|
193
|
+
def optimize_for_efficiency(
|
194
|
+
query: str,
|
195
|
+
n_trials: int = 25,
|
196
|
+
output_dir: str = os.path.join("data", "optimization_results"),
|
197
|
+
model_name: Optional[str] = None,
|
198
|
+
provider: Optional[str] = None,
|
199
|
+
search_tool: Optional[str] = None,
|
200
|
+
progress_callback: Optional[Callable[[int, int, Dict], None]] = None,
|
201
|
+
benchmark_weights: Optional[Dict[str, float]] = None,
|
202
|
+
) -> Tuple[Dict[str, Any], float]:
|
203
|
+
"""
|
204
|
+
Optimize parameters with a focus on resource efficiency.
|
205
|
+
|
206
|
+
Args:
|
207
|
+
query: The research query to use for all experiments
|
208
|
+
n_trials: Number of parameter combinations to try
|
209
|
+
output_dir: Directory to save optimization results
|
210
|
+
model_name: Name of the LLM model to use
|
211
|
+
provider: LLM provider
|
212
|
+
search_tool: Search engine to use
|
213
|
+
progress_callback: Optional callback for progress updates
|
214
|
+
benchmark_weights: Dictionary mapping benchmark types to weights
|
215
|
+
(e.g., {"simpleqa": 0.6, "browsecomp": 0.4})
|
216
|
+
If None, only SimpleQA is used with weight 1.0
|
217
|
+
|
218
|
+
Returns:
|
219
|
+
Tuple of (best_parameters, best_score)
|
220
|
+
"""
|
221
|
+
# Balance of quality, speed and resource usage
|
222
|
+
metric_weights = {"quality": 0.4, "speed": 0.3, "resource": 0.3}
|
223
|
+
|
224
|
+
return optimize_parameters(
|
225
|
+
query=query,
|
226
|
+
output_dir=output_dir,
|
227
|
+
model_name=model_name,
|
228
|
+
provider=provider,
|
229
|
+
search_tool=search_tool,
|
230
|
+
n_trials=n_trials,
|
231
|
+
metric_weights=metric_weights,
|
232
|
+
optimization_metrics=["quality", "speed", "resource"],
|
233
|
+
progress_callback=progress_callback,
|
234
|
+
benchmark_weights=benchmark_weights,
|
235
|
+
)
|
236
|
+
|
237
|
+
|
238
|
+
def get_default_param_space() -> Dict[str, Any]:
|
239
|
+
"""
|
240
|
+
Get the default parameter search space for optimization.
|
241
|
+
|
242
|
+
Returns:
|
243
|
+
Dictionary defining the default parameter search spaces
|
244
|
+
"""
|
245
|
+
return {
|
246
|
+
"iterations": {
|
247
|
+
"type": "int",
|
248
|
+
"low": 1,
|
249
|
+
"high": 5,
|
250
|
+
"step": 1,
|
251
|
+
},
|
252
|
+
"questions_per_iteration": {
|
253
|
+
"type": "int",
|
254
|
+
"low": 1,
|
255
|
+
"high": 5,
|
256
|
+
"step": 1,
|
257
|
+
},
|
258
|
+
"search_strategy": {
|
259
|
+
"type": "categorical",
|
260
|
+
"choices": ["iterdrag", "standard", "rapid", "parallel", "source_based"],
|
261
|
+
},
|
262
|
+
"max_results": {
|
263
|
+
"type": "int",
|
264
|
+
"low": 10,
|
265
|
+
"high": 100,
|
266
|
+
"step": 10,
|
267
|
+
},
|
268
|
+
"max_filtered_results": {
|
269
|
+
"type": "int",
|
270
|
+
"low": 5,
|
271
|
+
"high": 50,
|
272
|
+
"step": 5,
|
273
|
+
},
|
274
|
+
}
|
@@ -0,0 +1,20 @@
|
|
1
|
+
"""
|
2
|
+
Optimization metrics calculation.
|
3
|
+
|
4
|
+
This module is maintained for backward compatibility.
|
5
|
+
New code should use the unified metrics module.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from ..metrics.calculation import (
|
9
|
+
calculate_combined_score,
|
10
|
+
calculate_quality_metrics,
|
11
|
+
calculate_resource_metrics,
|
12
|
+
calculate_speed_metrics,
|
13
|
+
)
|
14
|
+
|
15
|
+
__all__ = [
|
16
|
+
"calculate_quality_metrics",
|
17
|
+
"calculate_speed_metrics",
|
18
|
+
"calculate_resource_metrics",
|
19
|
+
"calculate_combined_score",
|
20
|
+
]
|