local-deep-research 0.3.12__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. local_deep_research/__version__.py +1 -1
  2. local_deep_research/advanced_search_system/filters/base_filter.py +2 -3
  3. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +4 -5
  4. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +298 -0
  5. local_deep_research/advanced_search_system/findings/repository.py +0 -3
  6. local_deep_research/advanced_search_system/strategies/base_strategy.py +1 -2
  7. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +14 -18
  8. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +4 -8
  9. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +5 -6
  10. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +2 -2
  11. local_deep_research/advanced_search_system/strategies/standard_strategy.py +9 -7
  12. local_deep_research/api/benchmark_functions.py +288 -0
  13. local_deep_research/api/research_functions.py +8 -4
  14. local_deep_research/benchmarks/README.md +162 -0
  15. local_deep_research/benchmarks/__init__.py +51 -0
  16. local_deep_research/benchmarks/benchmark_functions.py +353 -0
  17. local_deep_research/benchmarks/cli/__init__.py +16 -0
  18. local_deep_research/benchmarks/cli/benchmark_commands.py +338 -0
  19. local_deep_research/benchmarks/cli.py +347 -0
  20. local_deep_research/benchmarks/comparison/__init__.py +12 -0
  21. local_deep_research/benchmarks/comparison/evaluator.py +768 -0
  22. local_deep_research/benchmarks/datasets/__init__.py +53 -0
  23. local_deep_research/benchmarks/datasets/base.py +295 -0
  24. local_deep_research/benchmarks/datasets/browsecomp.py +116 -0
  25. local_deep_research/benchmarks/datasets/custom_dataset_template.py +98 -0
  26. local_deep_research/benchmarks/datasets/simpleqa.py +74 -0
  27. local_deep_research/benchmarks/datasets/utils.py +116 -0
  28. local_deep_research/benchmarks/datasets.py +31 -0
  29. local_deep_research/benchmarks/efficiency/__init__.py +14 -0
  30. local_deep_research/benchmarks/efficiency/resource_monitor.py +367 -0
  31. local_deep_research/benchmarks/efficiency/speed_profiler.py +214 -0
  32. local_deep_research/benchmarks/evaluators/__init__.py +18 -0
  33. local_deep_research/benchmarks/evaluators/base.py +74 -0
  34. local_deep_research/benchmarks/evaluators/browsecomp.py +83 -0
  35. local_deep_research/benchmarks/evaluators/composite.py +121 -0
  36. local_deep_research/benchmarks/evaluators/simpleqa.py +271 -0
  37. local_deep_research/benchmarks/graders.py +410 -0
  38. local_deep_research/benchmarks/metrics/README.md +80 -0
  39. local_deep_research/benchmarks/metrics/__init__.py +24 -0
  40. local_deep_research/benchmarks/metrics/calculation.py +385 -0
  41. local_deep_research/benchmarks/metrics/reporting.py +155 -0
  42. local_deep_research/benchmarks/metrics/visualization.py +205 -0
  43. local_deep_research/benchmarks/metrics.py +11 -0
  44. local_deep_research/benchmarks/optimization/__init__.py +32 -0
  45. local_deep_research/benchmarks/optimization/api.py +274 -0
  46. local_deep_research/benchmarks/optimization/metrics.py +20 -0
  47. local_deep_research/benchmarks/optimization/optuna_optimizer.py +1163 -0
  48. local_deep_research/benchmarks/runners.py +434 -0
  49. local_deep_research/benchmarks/templates.py +65 -0
  50. local_deep_research/config/llm_config.py +26 -23
  51. local_deep_research/config/search_config.py +1 -5
  52. local_deep_research/defaults/default_settings.json +108 -7
  53. local_deep_research/search_system.py +16 -8
  54. local_deep_research/utilities/db_utils.py +3 -6
  55. local_deep_research/utilities/es_utils.py +441 -0
  56. local_deep_research/utilities/log_utils.py +36 -0
  57. local_deep_research/utilities/search_utilities.py +8 -9
  58. local_deep_research/web/app.py +7 -9
  59. local_deep_research/web/app_factory.py +9 -12
  60. local_deep_research/web/database/migrations.py +8 -5
  61. local_deep_research/web/database/models.py +20 -0
  62. local_deep_research/web/database/schema_upgrade.py +5 -8
  63. local_deep_research/web/models/database.py +15 -18
  64. local_deep_research/web/routes/benchmark_routes.py +427 -0
  65. local_deep_research/web/routes/research_routes.py +13 -17
  66. local_deep_research/web/routes/settings_routes.py +264 -67
  67. local_deep_research/web/services/research_service.py +47 -57
  68. local_deep_research/web/services/settings_manager.py +1 -4
  69. local_deep_research/web/services/settings_service.py +4 -6
  70. local_deep_research/web/static/css/styles.css +12 -0
  71. local_deep_research/web/static/js/components/logpanel.js +164 -155
  72. local_deep_research/web/static/js/components/research.js +44 -3
  73. local_deep_research/web/static/js/components/settings.js +27 -0
  74. local_deep_research/web/static/js/services/socket.js +47 -0
  75. local_deep_research/web_search_engines/default_search_engines.py +38 -0
  76. local_deep_research/web_search_engines/engines/meta_search_engine.py +100 -33
  77. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +31 -17
  78. local_deep_research/web_search_engines/engines/search_engine_brave.py +8 -3
  79. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +343 -0
  80. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +14 -6
  81. local_deep_research/web_search_engines/engines/search_engine_local.py +19 -23
  82. local_deep_research/web_search_engines/engines/search_engine_local_all.py +9 -12
  83. local_deep_research/web_search_engines/engines/search_engine_searxng.py +12 -17
  84. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +8 -4
  85. local_deep_research/web_search_engines/search_engine_base.py +22 -5
  86. local_deep_research/web_search_engines/search_engine_factory.py +32 -11
  87. local_deep_research/web_search_engines/search_engines_config.py +14 -1
  88. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/METADATA +10 -2
  89. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/RECORD +92 -49
  90. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/WHEEL +0 -0
  91. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/entry_points.txt +0 -0
  92. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,768 @@
1
+ """
2
+ Configuration comparison for Local Deep Research.
3
+
4
+ This module provides functions for comparing different parameter configurations
5
+ and evaluating their performance across various metrics.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ import os
11
+ import time
12
+ from datetime import datetime
13
+ from typing import Dict, List, Optional, Any, Tuple
14
+
15
+ import numpy as np
16
+ import matplotlib.pyplot as plt
17
+
18
+ from local_deep_research.config.llm_config import get_llm
19
+ from local_deep_research.config.search_config import get_search
20
+ from local_deep_research.search_system import AdvancedSearchSystem
21
+ from local_deep_research.benchmarks.efficiency.speed_profiler import SpeedProfiler
22
+ from local_deep_research.benchmarks.efficiency.resource_monitor import ResourceMonitor
23
+ from local_deep_research.benchmarks.optimization.metrics import (
24
+ calculate_quality_metrics,
25
+ calculate_speed_metrics,
26
+ calculate_resource_metrics,
27
+ calculate_combined_score
28
+ )
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ def compare_configurations(
34
+ query: str,
35
+ configurations: List[Dict[str, Any]],
36
+ output_dir: str = "comparison_results",
37
+ model_name: Optional[str] = None,
38
+ provider: Optional[str] = None,
39
+ search_tool: Optional[str] = None,
40
+ repetitions: int = 1,
41
+ metric_weights: Optional[Dict[str, float]] = None,
42
+ ) -> Dict[str, Any]:
43
+ """
44
+ Compare multiple parameter configurations.
45
+
46
+ Args:
47
+ query: Research query to use for evaluation
48
+ configurations: List of parameter configurations to compare
49
+ output_dir: Directory to save comparison results
50
+ model_name: Name of the LLM model to use
51
+ provider: LLM provider
52
+ search_tool: Search engine to use
53
+ repetitions: Number of repetitions for each configuration
54
+ metric_weights: Dictionary of weights for each metric type
55
+
56
+ Returns:
57
+ Dictionary with comparison results
58
+ """
59
+ os.makedirs(output_dir, exist_ok=True)
60
+
61
+ # Default metric weights if not provided
62
+ if metric_weights is None:
63
+ metric_weights = {
64
+ "quality": 0.6,
65
+ "speed": 0.4,
66
+ "resource": 0.0 # Disabled by default
67
+ }
68
+
69
+ # Verify valid configurations
70
+ if not configurations:
71
+ logger.error("No configurations provided for comparison")
72
+ return {"error": "No configurations provided"}
73
+
74
+ # Results storage
75
+ results = []
76
+
77
+ # Process each configuration
78
+ for i, config in enumerate(configurations):
79
+ logger.info(f"Evaluating configuration {i+1}/{len(configurations)}: {config}")
80
+
81
+ # Name for this configuration
82
+ config_name = config.get("name", f"Configuration {i+1}")
83
+
84
+ # Results for all repetitions of this configuration
85
+ config_results = []
86
+
87
+ # Run multiple repetitions
88
+ for rep in range(repetitions):
89
+ logger.info(f"Starting repetition {rep+1}/{repetitions} for {config_name}")
90
+
91
+ try:
92
+ # Run the configuration
93
+ result = _evaluate_single_configuration(
94
+ query=query,
95
+ config=config,
96
+ model_name=model_name,
97
+ provider=provider,
98
+ search_tool=search_tool
99
+ )
100
+
101
+ config_results.append(result)
102
+ logger.info(f"Completed repetition {rep+1} for {config_name}")
103
+
104
+ except Exception as e:
105
+ logger.error(f"Error in {config_name}, repetition {rep+1}: {str(e)}")
106
+ # Add error info but continue with other configurations
107
+ config_results.append({
108
+ "error": str(e),
109
+ "success": False
110
+ })
111
+
112
+ # Calculate aggregate metrics across repetitions
113
+ if config_results:
114
+ # Filter out failed runs
115
+ successful_runs = [r for r in config_results if r.get("success", False)]
116
+
117
+ if successful_runs:
118
+ # Calculate average metrics
119
+ avg_metrics = _calculate_average_metrics(successful_runs)
120
+
121
+ # Calculate overall score
122
+ overall_score = calculate_combined_score(
123
+ quality_metrics=avg_metrics.get("quality_metrics", {}),
124
+ speed_metrics=avg_metrics.get("speed_metrics", {}),
125
+ resource_metrics=avg_metrics.get("resource_metrics", {}),
126
+ weights=metric_weights
127
+ )
128
+
129
+ result_summary = {
130
+ "name": config_name,
131
+ "configuration": config,
132
+ "success": True,
133
+ "runs_completed": len(successful_runs),
134
+ "runs_failed": len(config_results) - len(successful_runs),
135
+ "avg_metrics": avg_metrics,
136
+ "overall_score": overall_score,
137
+ "individual_results": config_results
138
+ }
139
+ else:
140
+ # All runs failed
141
+ result_summary = {
142
+ "name": config_name,
143
+ "configuration": config,
144
+ "success": False,
145
+ "runs_completed": 0,
146
+ "runs_failed": len(config_results),
147
+ "error": "All runs failed",
148
+ "individual_results": config_results
149
+ }
150
+
151
+ results.append(result_summary)
152
+
153
+ # Sort results by overall score (if available)
154
+ sorted_results = sorted(
155
+ [r for r in results if r.get("success", False)],
156
+ key=lambda x: x.get("overall_score", 0),
157
+ reverse=True
158
+ )
159
+
160
+ # Add failed configurations at the end
161
+ sorted_results.extend([r for r in results if not r.get("success", False)])
162
+
163
+ # Create comparison report
164
+ comparison_report = {
165
+ "query": query,
166
+ "configurations_tested": len(configurations),
167
+ "successful_configurations": len([r for r in results if r.get("success", False)]),
168
+ "failed_configurations": len([r for r in results if not r.get("success", False)]),
169
+ "repetitions": repetitions,
170
+ "metric_weights": metric_weights,
171
+ "timestamp": datetime.now().isoformat(),
172
+ "results": sorted_results
173
+ }
174
+
175
+ # Save results to file
176
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
177
+ result_file = os.path.join(output_dir, f"comparison_results_{timestamp}.json")
178
+
179
+ with open(result_file, "w") as f:
180
+ json.dump(comparison_report, f, indent=2)
181
+
182
+ # Generate visualizations
183
+ visualizations_dir = os.path.join(output_dir, "visualizations")
184
+ os.makedirs(visualizations_dir, exist_ok=True)
185
+
186
+ _create_comparison_visualizations(
187
+ comparison_report,
188
+ output_dir=visualizations_dir,
189
+ timestamp=timestamp
190
+ )
191
+
192
+ logger.info(f"Comparison completed. Results saved to {result_file}")
193
+
194
+ # Add report path to the result
195
+ comparison_report["report_path"] = result_file
196
+
197
+ return comparison_report
198
+
199
+
200
+ def _evaluate_single_configuration(
201
+ query: str,
202
+ config: Dict[str, Any],
203
+ model_name: Optional[str] = None,
204
+ provider: Optional[str] = None,
205
+ search_tool: Optional[str] = None,
206
+ ) -> Dict[str, Any]:
207
+ """
208
+ Evaluate a single configuration.
209
+
210
+ Args:
211
+ query: Research query to evaluate
212
+ config: Configuration parameters
213
+ model_name: Name of the LLM model to use
214
+ provider: LLM provider
215
+ search_tool: Search engine to use
216
+
217
+ Returns:
218
+ Dictionary with evaluation results
219
+ """
220
+ # Extract configuration parameters
221
+ config_model_name = config.get("model_name", model_name)
222
+ config_provider = config.get("provider", provider)
223
+ config_search_tool = config.get("search_tool", search_tool)
224
+ config_iterations = config.get("iterations", 2)
225
+ config_questions_per_iteration = config.get("questions_per_iteration", 2)
226
+ config_search_strategy = config.get("search_strategy", "iterdrag")
227
+ config_max_results = config.get("max_results", 50)
228
+ config_max_filtered_results = config.get("max_filtered_results", 20)
229
+
230
+ # Initialize profiling tools
231
+ speed_profiler = SpeedProfiler()
232
+ resource_monitor = ResourceMonitor(sampling_interval=0.5)
233
+
234
+ # Start profiling
235
+ speed_profiler.start()
236
+ resource_monitor.start()
237
+
238
+ try:
239
+ # Get LLM
240
+ with speed_profiler.timer("llm_initialization"):
241
+ llm = get_llm(
242
+ temperature=config.get("temperature", 0.7),
243
+ model_name=config_model_name,
244
+ provider=config_provider
245
+ )
246
+
247
+ # Set up search engine if specified
248
+ with speed_profiler.timer("search_initialization"):
249
+ search = None
250
+ if config_search_tool:
251
+ search = get_search(
252
+ config_search_tool,
253
+ llm_instance=llm,
254
+ max_results=config_max_results,
255
+ max_filtered_results=config_max_filtered_results
256
+ )
257
+
258
+ # Create search system
259
+ system = AdvancedSearchSystem(llm=llm, search=search)
260
+ system.max_iterations = config_iterations
261
+ system.questions_per_iteration = config_questions_per_iteration
262
+ system.strategy_name = config_search_strategy
263
+
264
+ # Run the analysis
265
+ with speed_profiler.timer("analysis"):
266
+ results = system.analyze_topic(query)
267
+
268
+ # Stop profiling
269
+ speed_profiler.stop()
270
+ resource_monitor.stop()
271
+
272
+ # Calculate metrics
273
+ quality_metrics = calculate_quality_metrics(
274
+ results=results,
275
+ system_info={"all_links_of_system": getattr(system, "all_links_of_system", [])}
276
+ )
277
+
278
+ speed_metrics = calculate_speed_metrics(
279
+ timing_info=speed_profiler.get_summary(),
280
+ system_info={
281
+ "iterations": config_iterations,
282
+ "questions_per_iteration": config_questions_per_iteration,
283
+ "results": results
284
+ }
285
+ )
286
+
287
+ resource_metrics = calculate_resource_metrics(
288
+ resource_info=resource_monitor.get_combined_stats(),
289
+ system_info={
290
+ "iterations": config_iterations,
291
+ "questions_per_iteration": config_questions_per_iteration,
292
+ "results": results
293
+ }
294
+ )
295
+
296
+ # Return comprehensive results
297
+ return {
298
+ "query": query,
299
+ "config": config,
300
+ "success": True,
301
+ "findings_count": len(results.get("findings", [])),
302
+ "knowledge_length": len(results.get("current_knowledge", "")),
303
+ "quality_metrics": quality_metrics,
304
+ "speed_metrics": speed_metrics,
305
+ "resource_metrics": resource_metrics,
306
+ "timing_details": speed_profiler.get_timings(),
307
+ "resource_details": resource_monitor.get_combined_stats()
308
+ }
309
+
310
+ except Exception as e:
311
+ # Stop profiling on error
312
+ speed_profiler.stop()
313
+ resource_monitor.stop()
314
+
315
+ # Log the error
316
+ logger.error(f"Error evaluating configuration: {str(e)}")
317
+
318
+ # Return error information
319
+ return {
320
+ "query": query,
321
+ "config": config,
322
+ "success": False,
323
+ "error": str(e),
324
+ "timing_details": speed_profiler.get_timings(),
325
+ "resource_details": resource_monitor.get_combined_stats()
326
+ }
327
+
328
+
329
+ def _calculate_average_metrics(results: List[Dict[str, Any]]) -> Dict[str, Any]:
330
+ """
331
+ Calculate average metrics across multiple runs.
332
+
333
+ Args:
334
+ results: List of individual run results
335
+
336
+ Returns:
337
+ Dictionary with averaged metrics
338
+ """
339
+ # Check if there are any successful results
340
+ if not results:
341
+ return {}
342
+
343
+ # Initialize average metrics
344
+ avg_metrics = {
345
+ "quality_metrics": {},
346
+ "speed_metrics": {},
347
+ "resource_metrics": {}
348
+ }
349
+
350
+ # Quality metrics
351
+ quality_keys = set()
352
+ for result in results:
353
+ quality_metrics = result.get("quality_metrics", {})
354
+ quality_keys.update(quality_metrics.keys())
355
+
356
+ for key in quality_keys:
357
+ values = [r.get("quality_metrics", {}).get(key) for r in results]
358
+ values = [v for v in values if v is not None]
359
+ if values:
360
+ avg_metrics["quality_metrics"][key] = sum(values) / len(values)
361
+
362
+ # Speed metrics
363
+ speed_keys = set()
364
+ for result in results:
365
+ speed_metrics = result.get("speed_metrics", {})
366
+ speed_keys.update(speed_metrics.keys())
367
+
368
+ for key in speed_keys:
369
+ values = [r.get("speed_metrics", {}).get(key) for r in results]
370
+ values = [v for v in values if v is not None]
371
+ if values:
372
+ avg_metrics["speed_metrics"][key] = sum(values) / len(values)
373
+
374
+ # Resource metrics
375
+ resource_keys = set()
376
+ for result in results:
377
+ resource_metrics = result.get("resource_metrics", {})
378
+ resource_keys.update(resource_metrics.keys())
379
+
380
+ for key in resource_keys:
381
+ values = [r.get("resource_metrics", {}).get(key) for r in results]
382
+ values = [v for v in values if v is not None]
383
+ if values:
384
+ avg_metrics["resource_metrics"][key] = sum(values) / len(values)
385
+
386
+ return avg_metrics
387
+
388
+
389
+ def _create_comparison_visualizations(
390
+ comparison_report: Dict[str, Any],
391
+ output_dir: str,
392
+ timestamp: str
393
+ ):
394
+ """
395
+ Create visualizations for the comparison results.
396
+
397
+ Args:
398
+ comparison_report: Comparison report dictionary
399
+ output_dir: Directory to save visualizations
400
+ timestamp: Timestamp string for filenames
401
+ """
402
+ # Check if there are successful results
403
+ successful_results = [
404
+ r for r in comparison_report.get("results", [])
405
+ if r.get("success", False)
406
+ ]
407
+
408
+ if not successful_results:
409
+ logger.warning("No successful configurations to visualize")
410
+ return
411
+
412
+ # Extract configuration names
413
+ config_names = [r.get("name", f"Config {i+1}") for i, r in enumerate(successful_results)]
414
+
415
+ # 1. Overall score comparison
416
+ plt.figure(figsize=(12, 6))
417
+ scores = [r.get("overall_score", 0) for r in successful_results]
418
+
419
+ # Create horizontal bar chart
420
+ plt.barh(config_names, scores, color='skyblue')
421
+ plt.xlabel('Overall Score')
422
+ plt.ylabel('Configuration')
423
+ plt.title('Configuration Performance Comparison')
424
+ plt.grid(axis='x', linestyle='--', alpha=0.7)
425
+ plt.tight_layout()
426
+ plt.savefig(os.path.join(output_dir, f"overall_score_comparison_{timestamp}.png"))
427
+ plt.close()
428
+
429
+ # 2. Quality metrics comparison
430
+ quality_metrics = ["overall_quality", "source_count", "lexical_diversity"]
431
+ _create_metric_comparison_chart(
432
+ successful_results,
433
+ config_names,
434
+ quality_metrics,
435
+ "quality_metrics",
436
+ "Quality Metrics Comparison",
437
+ os.path.join(output_dir, f"quality_metrics_comparison_{timestamp}.png")
438
+ )
439
+
440
+ # 3. Speed metrics comparison
441
+ speed_metrics = ["overall_speed", "total_duration", "duration_per_question"]
442
+ _create_metric_comparison_chart(
443
+ successful_results,
444
+ config_names,
445
+ speed_metrics,
446
+ "speed_metrics",
447
+ "Speed Metrics Comparison",
448
+ os.path.join(output_dir, f"speed_metrics_comparison_{timestamp}.png")
449
+ )
450
+
451
+ # 4. Resource metrics comparison
452
+ resource_metrics = ["overall_resource", "process_memory_max_mb", "system_cpu_avg"]
453
+ _create_metric_comparison_chart(
454
+ successful_results,
455
+ config_names,
456
+ resource_metrics,
457
+ "resource_metrics",
458
+ "Resource Usage Comparison",
459
+ os.path.join(output_dir, f"resource_metrics_comparison_{timestamp}.png")
460
+ )
461
+
462
+ # 5. Spider chart for multi-dimensional comparison
463
+ _create_spider_chart(
464
+ successful_results,
465
+ config_names,
466
+ os.path.join(output_dir, f"spider_chart_comparison_{timestamp}.png")
467
+ )
468
+
469
+ # 6. Pareto frontier chart for quality vs. speed
470
+ _create_pareto_chart(
471
+ successful_results,
472
+ os.path.join(output_dir, f"pareto_chart_comparison_{timestamp}.png")
473
+ )
474
+
475
+
476
+ def _create_metric_comparison_chart(
477
+ results: List[Dict[str, Any]],
478
+ config_names: List[str],
479
+ metric_keys: List[str],
480
+ metric_category: str,
481
+ title: str,
482
+ output_path: str
483
+ ):
484
+ """
485
+ Create a chart comparing specific metrics across configurations.
486
+
487
+ Args:
488
+ results: List of configuration results
489
+ config_names: Names of configurations
490
+ metric_keys: Keys of metrics to compare
491
+ metric_category: Category of metrics (quality_metrics, speed_metrics, etc.)
492
+ title: Chart title
493
+ output_path: Path to save the chart
494
+ """
495
+ # Create figure with multiple subplots (one per metric)
496
+ fig, axes = plt.subplots(len(metric_keys), 1, figsize=(12, 5 * len(metric_keys)))
497
+
498
+ # Handle case with only one metric
499
+ if len(metric_keys) == 1:
500
+ axes = [axes]
501
+
502
+ for i, metric_key in enumerate(metric_keys):
503
+ ax = axes[i]
504
+
505
+ # Get metric values
506
+ metric_values = []
507
+ for result in results:
508
+ metrics = result.get("avg_metrics", {}).get(metric_category, {})
509
+ value = metrics.get(metric_key)
510
+
511
+ # Handle time values for better visualization
512
+ if "duration" in metric_key and value is not None:
513
+ # Convert to seconds if > 60 seconds, minutes if > 60 minutes
514
+ if value > 3600:
515
+ value = value / 3600 # Convert to hours
516
+ metric_key += " (hours)"
517
+ elif value > 60:
518
+ value = value / 60 # Convert to minutes
519
+ metric_key += " (minutes)"
520
+ else:
521
+ metric_key += " (seconds)"
522
+
523
+ metric_values.append(value if value is not None else 0)
524
+
525
+ # Create horizontal bar chart
526
+ bars = ax.barh(config_names, metric_values, color='lightblue')
527
+ ax.set_xlabel(metric_key.replace('_', ' ').title())
528
+ ax.set_title(f"{metric_key.replace('_', ' ').title()}")
529
+ ax.grid(axis='x', linestyle='--', alpha=0.7)
530
+
531
+ # Add value labels to bars
532
+ for bar in bars:
533
+ width = bar.get_width()
534
+ label_x_pos = width * 1.01
535
+ ax.text(label_x_pos, bar.get_y() + bar.get_height()/2, f'{width:.2f}',
536
+ va='center')
537
+
538
+ plt.suptitle(title, fontsize=16)
539
+ plt.tight_layout()
540
+ plt.savefig(output_path)
541
+ plt.close()
542
+
543
+
544
+ def _create_spider_chart(
545
+ results: List[Dict[str, Any]],
546
+ config_names: List[str],
547
+ output_path: str
548
+ ):
549
+ """
550
+ Create a spider chart comparing metrics across configurations.
551
+
552
+ Args:
553
+ results: List of configuration results
554
+ config_names: Names of configurations
555
+ output_path: Path to save the chart
556
+ """
557
+ # Try to import the radar chart module
558
+ try:
559
+ from matplotlib.path import Path
560
+ from matplotlib.projections import register_projection
561
+ from matplotlib.projections.polar import PolarAxes
562
+ from matplotlib.spines import Spine
563
+
564
+ def radar_factory(num_vars, frame='circle'):
565
+ """Create a radar chart with `num_vars` axes."""
566
+ # Calculate evenly-spaced axis angles
567
+ theta = np.linspace(0, 2*np.pi, num_vars, endpoint=False)
568
+
569
+ class RadarAxes(PolarAxes):
570
+ name = 'radar'
571
+
572
+ def __init__(self, *args, **kwargs):
573
+ super().__init__(*args, **kwargs)
574
+ self.set_theta_zero_location('N')
575
+
576
+ def fill(self, *args, closed=True, **kwargs):
577
+ return super().fill(closed=closed, *args, **kwargs)
578
+
579
+ def plot(self, *args, **kwargs):
580
+ return super().plot(*args, **kwargs)
581
+
582
+ def set_varlabels(self, labels):
583
+ self.set_thetagrids(np.degrees(theta), labels)
584
+
585
+ def _gen_axes_patch(self):
586
+ if frame == 'circle':
587
+ return Circle((0.5, 0.5), 0.5)
588
+ elif frame == 'polygon':
589
+ return RegularPolygon((0.5, 0.5), num_vars, radius=0.5, edgecolor="k")
590
+ else:
591
+ raise ValueError("Unknown value for 'frame': %s" % frame)
592
+
593
+ def _gen_axes_spines(self):
594
+ if frame == 'circle':
595
+ return super()._gen_axes_spines()
596
+ elif frame == 'polygon':
597
+ spine_type = Spine.circular_spine
598
+ verts = unit_poly_verts(num_vars)
599
+ vertices = [(0.5, 0.5)] + verts
600
+ codes = [Path.MOVETO] + [Path.LINETO] * num_vars + [Path.CLOSEPOLY]
601
+ path = Path(vertices, codes)
602
+ spine = Spine(self, spine_type, path)
603
+ spine.set_transform(self.transAxes)
604
+ return {'polar': spine}
605
+ else:
606
+ raise ValueError("Unknown value for 'frame': %s" % frame)
607
+
608
+ def unit_poly_verts(num_vars):
609
+ """Return vertices of polygon for radar chart."""
610
+ verts = []
611
+ for i in range(num_vars):
612
+ angle = theta[i]
613
+ verts.append((0.5 * (1 + np.cos(angle)), 0.5 * (1 + np.sin(angle))))
614
+ return verts
615
+
616
+ register_projection(RadarAxes)
617
+ return theta
618
+
619
+ # Select metrics for the spider chart
620
+ metrics = [
621
+ {"name": "Quality", "key": "quality_metrics.overall_quality"},
622
+ {"name": "Speed", "key": "speed_metrics.overall_speed"},
623
+ {"name": "Sources", "key": "quality_metrics.normalized_source_count"},
624
+ {"name": "Content", "key": "quality_metrics.normalized_knowledge_length"},
625
+ {"name": "Memory", "key": "resource_metrics.normalized_memory_usage", "invert": True},
626
+ ]
627
+
628
+ # Extract metric values
629
+ spoke_labels = [m["name"] for m in metrics]
630
+ num_vars = len(spoke_labels)
631
+ theta = radar_factory(num_vars)
632
+
633
+ fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection='radar'))
634
+
635
+ # Color map for different configurations
636
+ colors = plt.cm.viridis(np.linspace(0, 1, len(results)))
637
+
638
+ for i, result in enumerate(results):
639
+ values = []
640
+ for metric in metrics:
641
+ # Extract metric value using the key path (e.g., "quality_metrics.overall_quality")
642
+ key_parts = metric["key"].split(".")
643
+ value = result.get("avg_metrics", {})
644
+ for part in key_parts:
645
+ value = value.get(part, 0) if isinstance(value, dict) else 0
646
+
647
+ # Invert if needed (for metrics where lower is better)
648
+ if metric.get("invert", False):
649
+ value = 1.0 - value
650
+
651
+ values.append(value)
652
+
653
+ # Plot this configuration
654
+ ax.plot(theta, values, color=colors[i], linewidth=2, label=config_names[i])
655
+ ax.fill(theta, values, color=colors[i], alpha=0.25)
656
+
657
+ # Set chart properties
658
+ ax.set_varlabels(spoke_labels)
659
+ plt.legend(loc='best', bbox_to_anchor=(0.5, 0.1))
660
+ plt.title('Multi-Dimensional Configuration Comparison', size=16, y=1.05)
661
+ plt.tight_layout()
662
+
663
+ # Save chart
664
+ plt.savefig(output_path)
665
+ plt.close()
666
+
667
+ except Exception as e:
668
+ logger.error(f"Error creating spider chart: {str(e)}")
669
+ # Create a text-based chart as fallback
670
+ plt.figure(figsize=(10, 6))
671
+ plt.text(0.5, 0.5, f"Spider chart could not be created: {str(e)}",
672
+ horizontalalignment='center', verticalalignment='center')
673
+ plt.axis('off')
674
+ plt.savefig(output_path)
675
+ plt.close()
676
+
677
+
678
+ def _create_pareto_chart(
679
+ results: List[Dict[str, Any]],
680
+ output_path: str
681
+ ):
682
+ """
683
+ Create a Pareto frontier chart showing quality vs. speed tradeoff.
684
+
685
+ Args:
686
+ results: List of configuration results
687
+ output_path: Path to save the chart
688
+ """
689
+ # Extract quality and speed metrics
690
+ quality_scores = []
691
+ speed_scores = []
692
+ names = []
693
+
694
+ for result in results:
695
+ metrics = result.get("avg_metrics", {})
696
+ quality = metrics.get("quality_metrics", {}).get("overall_quality", 0)
697
+
698
+ # For speed, we use inverse of duration (so higher is better)
699
+ duration = metrics.get("speed_metrics", {}).get("total_duration", 1)
700
+ speed = 1.0 / max(duration, 0.001) # Avoid division by zero
701
+
702
+ quality_scores.append(quality)
703
+ speed_scores.append(speed)
704
+ names.append(result.get("name", "Configuration"))
705
+
706
+ # Create scatter plot
707
+ plt.figure(figsize=(10, 8))
708
+ plt.scatter(quality_scores, speed_scores, s=100, alpha=0.7)
709
+
710
+ # Add labels for each point
711
+ for i, name in enumerate(names):
712
+ plt.annotate(name,
713
+ (quality_scores[i], speed_scores[i]),
714
+ xytext=(5, 5),
715
+ textcoords='offset points')
716
+
717
+ # Identify Pareto frontier
718
+ pareto_points = []
719
+ for i, (q, s) in enumerate(zip(quality_scores, speed_scores)):
720
+ is_pareto = True
721
+ for q2, s2 in zip(quality_scores, speed_scores):
722
+ if q2 > q and s2 > s: # Dominated
723
+ is_pareto = False
724
+ break
725
+ if is_pareto:
726
+ pareto_points.append(i)
727
+
728
+ # Highlight Pareto frontier
729
+ pareto_quality = [quality_scores[i] for i in pareto_points]
730
+ pareto_speed = [speed_scores[i] for i in pareto_points]
731
+
732
+ # Sort pareto points for line drawing
733
+ pareto_sorted = sorted(zip(pareto_quality, pareto_speed, pareto_points))
734
+ pareto_quality = [p[0] for p in pareto_sorted]
735
+ pareto_speed = [p[1] for p in pareto_sorted]
736
+ pareto_indices = [p[2] for p in pareto_sorted]
737
+
738
+ # Draw Pareto frontier line
739
+ plt.plot(pareto_quality, pareto_speed, 'r--', linewidth=2)
740
+
741
+ # Highlight Pareto optimal points
742
+ plt.scatter([quality_scores[i] for i in pareto_indices],
743
+ [speed_scores[i] for i in pareto_indices],
744
+ s=150, facecolors='none', edgecolors='r', linewidth=2)
745
+
746
+ # Add labels for Pareto optimal configurations
747
+ for i in pareto_indices:
748
+ plt.annotate(names[i],
749
+ (quality_scores[i], speed_scores[i]),
750
+ xytext=(8, 8),
751
+ textcoords='offset points',
752
+ bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.7))
753
+
754
+ # Set chart properties
755
+ plt.xlabel('Quality Score (higher is better)')
756
+ plt.ylabel('Speed Score (higher is better)')
757
+ plt.title('Quality vs. Speed Tradeoff (Pareto Frontier)', size=14)
758
+ plt.grid(True, linestyle='--', alpha=0.7)
759
+
760
+ # Add explanation
761
+ plt.figtext(0.5, 0.01,
762
+ "Points on the red line are Pareto optimal configurations\n"
763
+ "(no other configuration is better in both quality and speed)",
764
+ ha='center', fontsize=10, bbox=dict(boxstyle='round', fc='white', alpha=0.7))
765
+
766
+ plt.tight_layout()
767
+ plt.savefig(output_path)
768
+ plt.close()