local-deep-research 0.3.12__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__version__.py +1 -1
- local_deep_research/advanced_search_system/filters/base_filter.py +2 -3
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +4 -5
- local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +298 -0
- local_deep_research/advanced_search_system/findings/repository.py +0 -3
- local_deep_research/advanced_search_system/strategies/base_strategy.py +1 -2
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +14 -18
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +4 -8
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +5 -6
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +2 -2
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +9 -7
- local_deep_research/api/benchmark_functions.py +288 -0
- local_deep_research/api/research_functions.py +8 -4
- local_deep_research/benchmarks/README.md +162 -0
- local_deep_research/benchmarks/__init__.py +51 -0
- local_deep_research/benchmarks/benchmark_functions.py +353 -0
- local_deep_research/benchmarks/cli/__init__.py +16 -0
- local_deep_research/benchmarks/cli/benchmark_commands.py +338 -0
- local_deep_research/benchmarks/cli.py +347 -0
- local_deep_research/benchmarks/comparison/__init__.py +12 -0
- local_deep_research/benchmarks/comparison/evaluator.py +768 -0
- local_deep_research/benchmarks/datasets/__init__.py +53 -0
- local_deep_research/benchmarks/datasets/base.py +295 -0
- local_deep_research/benchmarks/datasets/browsecomp.py +116 -0
- local_deep_research/benchmarks/datasets/custom_dataset_template.py +98 -0
- local_deep_research/benchmarks/datasets/simpleqa.py +74 -0
- local_deep_research/benchmarks/datasets/utils.py +116 -0
- local_deep_research/benchmarks/datasets.py +31 -0
- local_deep_research/benchmarks/efficiency/__init__.py +14 -0
- local_deep_research/benchmarks/efficiency/resource_monitor.py +367 -0
- local_deep_research/benchmarks/efficiency/speed_profiler.py +214 -0
- local_deep_research/benchmarks/evaluators/__init__.py +18 -0
- local_deep_research/benchmarks/evaluators/base.py +74 -0
- local_deep_research/benchmarks/evaluators/browsecomp.py +83 -0
- local_deep_research/benchmarks/evaluators/composite.py +121 -0
- local_deep_research/benchmarks/evaluators/simpleqa.py +271 -0
- local_deep_research/benchmarks/graders.py +410 -0
- local_deep_research/benchmarks/metrics/README.md +80 -0
- local_deep_research/benchmarks/metrics/__init__.py +24 -0
- local_deep_research/benchmarks/metrics/calculation.py +385 -0
- local_deep_research/benchmarks/metrics/reporting.py +155 -0
- local_deep_research/benchmarks/metrics/visualization.py +205 -0
- local_deep_research/benchmarks/metrics.py +11 -0
- local_deep_research/benchmarks/optimization/__init__.py +32 -0
- local_deep_research/benchmarks/optimization/api.py +274 -0
- local_deep_research/benchmarks/optimization/metrics.py +20 -0
- local_deep_research/benchmarks/optimization/optuna_optimizer.py +1163 -0
- local_deep_research/benchmarks/runners.py +434 -0
- local_deep_research/benchmarks/templates.py +65 -0
- local_deep_research/config/llm_config.py +26 -23
- local_deep_research/config/search_config.py +1 -5
- local_deep_research/defaults/default_settings.json +108 -7
- local_deep_research/search_system.py +16 -8
- local_deep_research/utilities/db_utils.py +3 -6
- local_deep_research/utilities/es_utils.py +441 -0
- local_deep_research/utilities/log_utils.py +36 -0
- local_deep_research/utilities/search_utilities.py +8 -9
- local_deep_research/web/app.py +7 -9
- local_deep_research/web/app_factory.py +9 -12
- local_deep_research/web/database/migrations.py +8 -5
- local_deep_research/web/database/models.py +20 -0
- local_deep_research/web/database/schema_upgrade.py +5 -8
- local_deep_research/web/models/database.py +15 -18
- local_deep_research/web/routes/benchmark_routes.py +427 -0
- local_deep_research/web/routes/research_routes.py +13 -17
- local_deep_research/web/routes/settings_routes.py +264 -67
- local_deep_research/web/services/research_service.py +47 -57
- local_deep_research/web/services/settings_manager.py +1 -4
- local_deep_research/web/services/settings_service.py +4 -6
- local_deep_research/web/static/css/styles.css +12 -0
- local_deep_research/web/static/js/components/logpanel.js +164 -155
- local_deep_research/web/static/js/components/research.js +44 -3
- local_deep_research/web/static/js/components/settings.js +27 -0
- local_deep_research/web/static/js/services/socket.js +47 -0
- local_deep_research/web_search_engines/default_search_engines.py +38 -0
- local_deep_research/web_search_engines/engines/meta_search_engine.py +100 -33
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +31 -17
- local_deep_research/web_search_engines/engines/search_engine_brave.py +8 -3
- local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +343 -0
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +14 -6
- local_deep_research/web_search_engines/engines/search_engine_local.py +19 -23
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +9 -12
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +12 -17
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +8 -4
- local_deep_research/web_search_engines/search_engine_base.py +22 -5
- local_deep_research/web_search_engines/search_engine_factory.py +32 -11
- local_deep_research/web_search_engines/search_engines_config.py +14 -1
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/METADATA +10 -2
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/RECORD +92 -49
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/WHEEL +0 -0
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,347 @@
|
|
1
|
+
"""
|
2
|
+
Command-line interface for benchmarking functionality.
|
3
|
+
|
4
|
+
This module provides a command-line interface for running parameter
|
5
|
+
optimization, comparison, and benchmarking tasks.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import argparse
|
9
|
+
import logging
|
10
|
+
import os
|
11
|
+
import sys
|
12
|
+
from datetime import datetime
|
13
|
+
from typing import Any, Dict, List, Optional
|
14
|
+
|
15
|
+
from .comparison import compare_configurations
|
16
|
+
from .efficiency import ResourceMonitor, SpeedProfiler
|
17
|
+
from .optimization import optimize_parameters
|
18
|
+
|
19
|
+
# Configure logging
|
20
|
+
logging.basicConfig(
|
21
|
+
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
22
|
+
)
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
25
|
+
|
26
|
+
def parse_args():
|
27
|
+
"""Parse command line arguments."""
|
28
|
+
parser = argparse.ArgumentParser(
|
29
|
+
description="Local Deep Research Benchmarking Tools",
|
30
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
31
|
+
epilog="""
|
32
|
+
Examples:
|
33
|
+
# Run parameter optimization
|
34
|
+
python -m local_deep_research.benchmarks.cli optimize "What are the latest advancements in quantum computing?"
|
35
|
+
|
36
|
+
# Compare different configurations
|
37
|
+
python -m local_deep_research.benchmarks.cli compare "What are the effects of climate change?" --configs configs.json
|
38
|
+
|
39
|
+
# Run efficiency profiling
|
40
|
+
python -m local_deep_research.benchmarks.cli profile "How do neural networks work?"
|
41
|
+
""",
|
42
|
+
)
|
43
|
+
|
44
|
+
subparsers = parser.add_subparsers(dest="command", help="Command to run")
|
45
|
+
|
46
|
+
# Optimizer parser
|
47
|
+
optimize_parser = subparsers.add_parser("optimize", help="Optimize parameters")
|
48
|
+
optimize_parser.add_argument("query", help="Research query to optimize for")
|
49
|
+
optimize_parser.add_argument(
|
50
|
+
"--output-dir",
|
51
|
+
default="data/optimization_results",
|
52
|
+
help="Directory to save results",
|
53
|
+
)
|
54
|
+
optimize_parser.add_argument("--model", help="Model name for the LLM")
|
55
|
+
optimize_parser.add_argument("--provider", help="Provider for the LLM")
|
56
|
+
optimize_parser.add_argument("--search-tool", help="Search tool to use")
|
57
|
+
optimize_parser.add_argument(
|
58
|
+
"--temperature", type=float, default=0.7, help="LLM temperature"
|
59
|
+
)
|
60
|
+
optimize_parser.add_argument(
|
61
|
+
"--n-trials",
|
62
|
+
type=int,
|
63
|
+
default=30,
|
64
|
+
help="Number of parameter combinations to try",
|
65
|
+
)
|
66
|
+
optimize_parser.add_argument(
|
67
|
+
"--timeout", type=int, help="Maximum seconds to run optimization"
|
68
|
+
)
|
69
|
+
optimize_parser.add_argument(
|
70
|
+
"--n-jobs", type=int, default=1, help="Number of parallel jobs for optimization"
|
71
|
+
)
|
72
|
+
optimize_parser.add_argument("--study-name", help="Name of the Optuna study")
|
73
|
+
optimize_parser.add_argument(
|
74
|
+
"--speed-focus", action="store_true", help="Focus optimization on speed"
|
75
|
+
)
|
76
|
+
optimize_parser.add_argument(
|
77
|
+
"--quality-focus", action="store_true", help="Focus optimization on quality"
|
78
|
+
)
|
79
|
+
|
80
|
+
# Comparison parser
|
81
|
+
compare_parser = subparsers.add_parser("compare", help="Compare configurations")
|
82
|
+
compare_parser.add_argument("query", help="Research query to compare with")
|
83
|
+
compare_parser.add_argument(
|
84
|
+
"--configs", required=True, help="JSON file with configurations to compare"
|
85
|
+
)
|
86
|
+
compare_parser.add_argument(
|
87
|
+
"--output-dir",
|
88
|
+
default="data/benchmark_results/comparison",
|
89
|
+
help="Directory to save results",
|
90
|
+
)
|
91
|
+
compare_parser.add_argument("--model", help="Model name for the LLM")
|
92
|
+
compare_parser.add_argument("--provider", help="Provider for the LLM")
|
93
|
+
compare_parser.add_argument("--search-tool", help="Search tool to use")
|
94
|
+
compare_parser.add_argument(
|
95
|
+
"--repetitions",
|
96
|
+
type=int,
|
97
|
+
default=1,
|
98
|
+
help="Number of repetitions for each configuration",
|
99
|
+
)
|
100
|
+
|
101
|
+
# Profiling parser
|
102
|
+
profile_parser = subparsers.add_parser("profile", help="Profile resource usage")
|
103
|
+
profile_parser.add_argument("query", help="Research query to profile")
|
104
|
+
profile_parser.add_argument(
|
105
|
+
"--output-dir",
|
106
|
+
default="data/benchmark_results/profiling",
|
107
|
+
help="Directory to save results",
|
108
|
+
)
|
109
|
+
profile_parser.add_argument("--model", help="Model name for the LLM")
|
110
|
+
profile_parser.add_argument("--provider", help="Provider for the LLM")
|
111
|
+
profile_parser.add_argument("--search-tool", help="Search tool to use")
|
112
|
+
profile_parser.add_argument(
|
113
|
+
"--iterations", type=int, default=2, help="Number of search iterations"
|
114
|
+
)
|
115
|
+
profile_parser.add_argument(
|
116
|
+
"--questions", type=int, default=2, help="Questions per iteration"
|
117
|
+
)
|
118
|
+
profile_parser.add_argument(
|
119
|
+
"--strategy", default="iterdrag", help="Search strategy to use"
|
120
|
+
)
|
121
|
+
|
122
|
+
return parser.parse_args()
|
123
|
+
|
124
|
+
|
125
|
+
def run_optimization(args):
|
126
|
+
"""Run parameter optimization."""
|
127
|
+
logger.info(f"Starting parameter optimization for query: {args.query}")
|
128
|
+
|
129
|
+
# Determine metric weights based on focus
|
130
|
+
metric_weights = None
|
131
|
+
if args.speed_focus:
|
132
|
+
metric_weights = {"speed": 0.8, "quality": 0.2}
|
133
|
+
elif args.quality_focus:
|
134
|
+
metric_weights = {"quality": 0.8, "speed": 0.2}
|
135
|
+
|
136
|
+
# Run optimization
|
137
|
+
best_params, best_score = optimize_parameters(
|
138
|
+
query=args.query,
|
139
|
+
output_dir=args.output_dir,
|
140
|
+
model_name=args.model,
|
141
|
+
provider=args.provider,
|
142
|
+
search_tool=args.search_tool,
|
143
|
+
temperature=args.temperature,
|
144
|
+
n_trials=args.n_trials,
|
145
|
+
timeout=args.timeout,
|
146
|
+
n_jobs=args.n_jobs,
|
147
|
+
study_name=args.study_name,
|
148
|
+
metric_weights=metric_weights,
|
149
|
+
)
|
150
|
+
|
151
|
+
# Print results
|
152
|
+
print("\nOptimization Results:")
|
153
|
+
print("====================")
|
154
|
+
print(f"Best Parameters: {best_params}")
|
155
|
+
print(f"Best Score: {best_score:.4f}")
|
156
|
+
print(f"Results saved to: {args.output_dir}")
|
157
|
+
|
158
|
+
return 0
|
159
|
+
|
160
|
+
|
161
|
+
def run_comparison(args):
|
162
|
+
"""Run configuration comparison."""
|
163
|
+
import json
|
164
|
+
|
165
|
+
logger.info(f"Comparing configurations for query: {args.query}")
|
166
|
+
|
167
|
+
# Load configurations from file
|
168
|
+
try:
|
169
|
+
with open(args.configs, "r") as f:
|
170
|
+
configurations = json.load(f)
|
171
|
+
|
172
|
+
if not isinstance(configurations, list):
|
173
|
+
logger.error("Configurations file must contain a JSON array")
|
174
|
+
return 1
|
175
|
+
|
176
|
+
if not configurations:
|
177
|
+
logger.error("No configurations found in the file")
|
178
|
+
return 1
|
179
|
+
except Exception as e:
|
180
|
+
logger.error(f"Error loading configurations file: {str(e)}")
|
181
|
+
return 1
|
182
|
+
|
183
|
+
# Run comparison
|
184
|
+
results = compare_configurations(
|
185
|
+
query=args.query,
|
186
|
+
configurations=configurations,
|
187
|
+
output_dir=args.output_dir,
|
188
|
+
model_name=args.model,
|
189
|
+
provider=args.provider,
|
190
|
+
search_tool=args.search_tool,
|
191
|
+
repetitions=args.repetitions,
|
192
|
+
)
|
193
|
+
|
194
|
+
# Print summary
|
195
|
+
print("\nComparison Results:")
|
196
|
+
print("==================")
|
197
|
+
print(f"Configurations tested: {results['configurations_tested']}")
|
198
|
+
print(f"Successful configurations: {results['successful_configurations']}")
|
199
|
+
print(f"Failed configurations: {results['failed_configurations']}")
|
200
|
+
|
201
|
+
# Print ranking
|
202
|
+
print("\nRanking by Overall Score:")
|
203
|
+
for i, result in enumerate(
|
204
|
+
[r for r in results["results"] if r.get("success", False)]
|
205
|
+
):
|
206
|
+
print(f"{i+1}. {result['name']}: {result.get('overall_score', 0):.4f}")
|
207
|
+
|
208
|
+
print(f"\nResults saved to: {results.get('report_path', args.output_dir)}")
|
209
|
+
|
210
|
+
return 0
|
211
|
+
|
212
|
+
|
213
|
+
def run_profiling(args):
|
214
|
+
"""Run resource profiling."""
|
215
|
+
import json
|
216
|
+
|
217
|
+
from local_deep_research.config.llm_config import get_llm
|
218
|
+
from local_deep_research.config.search_config import get_search
|
219
|
+
from local_deep_research.search_system import AdvancedSearchSystem
|
220
|
+
|
221
|
+
logger.info(f"Profiling resource usage for query: {args.query}")
|
222
|
+
|
223
|
+
# Create output directory
|
224
|
+
os.makedirs(args.output_dir, exist_ok=True)
|
225
|
+
|
226
|
+
# Initialize profiling tools
|
227
|
+
speed_profiler = SpeedProfiler()
|
228
|
+
resource_monitor = ResourceMonitor(sampling_interval=0.5)
|
229
|
+
|
230
|
+
# Start profiling
|
231
|
+
speed_profiler.start()
|
232
|
+
resource_monitor.start()
|
233
|
+
|
234
|
+
try:
|
235
|
+
# Initialize system
|
236
|
+
with speed_profiler.timer("initialization"):
|
237
|
+
# Get LLM
|
238
|
+
llm = get_llm(model_name=args.model, provider=args.provider)
|
239
|
+
|
240
|
+
# Get search engine
|
241
|
+
search = None
|
242
|
+
if args.search_tool:
|
243
|
+
search = get_search(args.search_tool, llm_instance=llm)
|
244
|
+
|
245
|
+
# Create search system
|
246
|
+
system = AdvancedSearchSystem(llm=llm, search=search)
|
247
|
+
system.max_iterations = args.iterations
|
248
|
+
system.questions_per_iteration = args.questions
|
249
|
+
system.strategy_name = args.strategy
|
250
|
+
|
251
|
+
# Run analysis
|
252
|
+
with speed_profiler.timer("analysis"):
|
253
|
+
results = system.analyze_topic(args.query)
|
254
|
+
|
255
|
+
# Stop profiling
|
256
|
+
speed_profiler.stop()
|
257
|
+
resource_monitor.stop()
|
258
|
+
|
259
|
+
# Get profiling results
|
260
|
+
timing_results = speed_profiler.get_summary()
|
261
|
+
resource_results = resource_monitor.get_combined_stats()
|
262
|
+
|
263
|
+
# Print summary
|
264
|
+
print("\nProfiling Results:")
|
265
|
+
print("=================")
|
266
|
+
|
267
|
+
# Timing summary
|
268
|
+
print("\nTiming Summary:")
|
269
|
+
total_duration = timing_results.get("total_duration", 0)
|
270
|
+
print(f"Total execution time: {total_duration:.2f} seconds")
|
271
|
+
|
272
|
+
# Component breakdown
|
273
|
+
print("\nComponent Breakdown:")
|
274
|
+
for name, value in timing_results.items():
|
275
|
+
if name != "total_duration" and name.endswith("_duration"):
|
276
|
+
component = name.replace("_duration", "")
|
277
|
+
duration = value
|
278
|
+
percent = (duration / total_duration * 100) if total_duration > 0 else 0
|
279
|
+
print(f"- {component}: {duration:.2f}s ({percent:.1f}%)")
|
280
|
+
|
281
|
+
# Resource summary
|
282
|
+
print("\nResource Usage Summary:")
|
283
|
+
print(f"Peak memory: {resource_results.get('process_memory_max_mb', 0):.1f} MB")
|
284
|
+
print(
|
285
|
+
f"Average memory: {resource_results.get('process_memory_avg_mb', 0):.1f} MB"
|
286
|
+
)
|
287
|
+
print(f"Peak CPU: {resource_results.get('process_cpu_max', 0):.1f}%")
|
288
|
+
print(f"Average CPU: {resource_results.get('process_cpu_avg', 0):.1f}%")
|
289
|
+
|
290
|
+
# Save results
|
291
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
292
|
+
results_file = os.path.join(
|
293
|
+
args.output_dir, f"profiling_results_{timestamp}.json"
|
294
|
+
)
|
295
|
+
|
296
|
+
with open(results_file, "w") as f:
|
297
|
+
json.dump(
|
298
|
+
{
|
299
|
+
"query": args.query,
|
300
|
+
"configuration": {
|
301
|
+
"model": args.model,
|
302
|
+
"provider": args.provider,
|
303
|
+
"search_tool": args.search_tool,
|
304
|
+
"iterations": args.iterations,
|
305
|
+
"questions_per_iteration": args.questions,
|
306
|
+
"strategy": args.strategy,
|
307
|
+
},
|
308
|
+
"timing_results": timing_results,
|
309
|
+
"resource_results": resource_results,
|
310
|
+
"findings_count": len(results.get("findings", [])),
|
311
|
+
"knowledge_length": len(results.get("current_knowledge", "")),
|
312
|
+
"timestamp": timestamp,
|
313
|
+
},
|
314
|
+
f,
|
315
|
+
indent=2,
|
316
|
+
)
|
317
|
+
|
318
|
+
print(f"\nDetailed results saved to: {results_file}")
|
319
|
+
|
320
|
+
return 0
|
321
|
+
|
322
|
+
except Exception as e:
|
323
|
+
# Stop profiling on error
|
324
|
+
speed_profiler.stop()
|
325
|
+
resource_monitor.stop()
|
326
|
+
|
327
|
+
logger.error(f"Error during profiling: {str(e)}")
|
328
|
+
return 1
|
329
|
+
|
330
|
+
|
331
|
+
def main():
|
332
|
+
"""Main entry point for the CLI."""
|
333
|
+
args = parse_args()
|
334
|
+
|
335
|
+
if args.command == "optimize":
|
336
|
+
return run_optimization(args)
|
337
|
+
elif args.command == "compare":
|
338
|
+
return run_comparison(args)
|
339
|
+
elif args.command == "profile":
|
340
|
+
return run_profiling(args)
|
341
|
+
else:
|
342
|
+
print("Please specify a command. Use --help for more information.")
|
343
|
+
return 1
|
344
|
+
|
345
|
+
|
346
|
+
if __name__ == "__main__":
|
347
|
+
sys.exit(main())
|
@@ -0,0 +1,12 @@
|
|
1
|
+
"""
|
2
|
+
Comparison submodule for evaluating different configurations of Local Deep Research.
|
3
|
+
|
4
|
+
This module provides tools for comparing the performance of different
|
5
|
+
parameters, models, and search engines.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from local_deep_research.benchmarks.comparison.evaluator import compare_configurations
|
9
|
+
|
10
|
+
__all__ = [
|
11
|
+
'compare_configurations',
|
12
|
+
]
|