local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -8,24 +8,28 @@ and evaluating their performance across various metrics.
8
8
  import json
9
9
  import logging
10
10
  import os
11
- import time
12
11
  from datetime import datetime
13
- from typing import Dict, List, Optional, Any, Tuple
12
+ from typing import Any, Dict, List, Optional
14
13
 
15
- import numpy as np
16
14
  import matplotlib.pyplot as plt
15
+ from matplotlib.patches import Circle, RegularPolygon
16
+ import numpy as np
17
17
 
18
- from local_deep_research.config.llm_config import get_llm
19
- from local_deep_research.config.search_config import get_search
20
- from local_deep_research.search_system import AdvancedSearchSystem
21
- from local_deep_research.benchmarks.efficiency.speed_profiler import SpeedProfiler
22
- from local_deep_research.benchmarks.efficiency.resource_monitor import ResourceMonitor
18
+ from local_deep_research.benchmarks.efficiency.resource_monitor import (
19
+ ResourceMonitor,
20
+ )
21
+ from local_deep_research.benchmarks.efficiency.speed_profiler import (
22
+ SpeedProfiler,
23
+ )
23
24
  from local_deep_research.benchmarks.optimization.metrics import (
25
+ calculate_combined_score,
24
26
  calculate_quality_metrics,
25
- calculate_speed_metrics,
26
27
  calculate_resource_metrics,
27
- calculate_combined_score
28
+ calculate_speed_metrics,
28
29
  )
30
+ from local_deep_research.config.llm_config import get_llm
31
+ from local_deep_research.config.search_config import get_search
32
+ from local_deep_research.search_system import AdvancedSearchSystem
29
33
 
30
34
  logger = logging.getLogger(__name__)
31
35
 
@@ -42,7 +46,7 @@ def compare_configurations(
42
46
  ) -> Dict[str, Any]:
43
47
  """
44
48
  Compare multiple parameter configurations.
45
-
49
+
46
50
  Args:
47
51
  query: Research query to use for evaluation
48
52
  configurations: List of parameter configurations to compare
@@ -52,42 +56,46 @@ def compare_configurations(
52
56
  search_tool: Search engine to use
53
57
  repetitions: Number of repetitions for each configuration
54
58
  metric_weights: Dictionary of weights for each metric type
55
-
59
+
56
60
  Returns:
57
61
  Dictionary with comparison results
58
62
  """
59
63
  os.makedirs(output_dir, exist_ok=True)
60
-
64
+
61
65
  # Default metric weights if not provided
62
66
  if metric_weights is None:
63
67
  metric_weights = {
64
68
  "quality": 0.6,
65
69
  "speed": 0.4,
66
- "resource": 0.0 # Disabled by default
70
+ "resource": 0.0, # Disabled by default
67
71
  }
68
-
72
+
69
73
  # Verify valid configurations
70
74
  if not configurations:
71
75
  logger.error("No configurations provided for comparison")
72
76
  return {"error": "No configurations provided"}
73
-
77
+
74
78
  # Results storage
75
79
  results = []
76
-
80
+
77
81
  # Process each configuration
78
82
  for i, config in enumerate(configurations):
79
- logger.info(f"Evaluating configuration {i+1}/{len(configurations)}: {config}")
80
-
83
+ logger.info(
84
+ f"Evaluating configuration {i + 1}/{len(configurations)}: {config}"
85
+ )
86
+
81
87
  # Name for this configuration
82
- config_name = config.get("name", f"Configuration {i+1}")
83
-
88
+ config_name = config.get("name", f"Configuration {i + 1}")
89
+
84
90
  # Results for all repetitions of this configuration
85
91
  config_results = []
86
-
92
+
87
93
  # Run multiple repetitions
88
94
  for rep in range(repetitions):
89
- logger.info(f"Starting repetition {rep+1}/{repetitions} for {config_name}")
90
-
95
+ logger.info(
96
+ f"Starting repetition {rep + 1}/{repetitions} for {config_name}"
97
+ )
98
+
91
99
  try:
92
100
  # Run the configuration
93
101
  result = _evaluate_single_configuration(
@@ -95,37 +103,38 @@ def compare_configurations(
95
103
  config=config,
96
104
  model_name=model_name,
97
105
  provider=provider,
98
- search_tool=search_tool
106
+ search_tool=search_tool,
99
107
  )
100
-
108
+
101
109
  config_results.append(result)
102
- logger.info(f"Completed repetition {rep+1} for {config_name}")
103
-
110
+ logger.info(f"Completed repetition {rep + 1} for {config_name}")
111
+
104
112
  except Exception as e:
105
- logger.error(f"Error in {config_name}, repetition {rep+1}: {str(e)}")
113
+ logger.error(
114
+ f"Error in {config_name}, repetition {rep + 1}: {str(e)}"
115
+ )
106
116
  # Add error info but continue with other configurations
107
- config_results.append({
108
- "error": str(e),
109
- "success": False
110
- })
111
-
117
+ config_results.append({"error": str(e), "success": False})
118
+
112
119
  # Calculate aggregate metrics across repetitions
113
120
  if config_results:
114
121
  # Filter out failed runs
115
- successful_runs = [r for r in config_results if r.get("success", False)]
116
-
122
+ successful_runs = [
123
+ r for r in config_results if r.get("success", False)
124
+ ]
125
+
117
126
  if successful_runs:
118
127
  # Calculate average metrics
119
128
  avg_metrics = _calculate_average_metrics(successful_runs)
120
-
129
+
121
130
  # Calculate overall score
122
131
  overall_score = calculate_combined_score(
123
132
  quality_metrics=avg_metrics.get("quality_metrics", {}),
124
133
  speed_metrics=avg_metrics.get("speed_metrics", {}),
125
134
  resource_metrics=avg_metrics.get("resource_metrics", {}),
126
- weights=metric_weights
135
+ weights=metric_weights,
127
136
  )
128
-
137
+
129
138
  result_summary = {
130
139
  "name": config_name,
131
140
  "configuration": config,
@@ -134,7 +143,7 @@ def compare_configurations(
134
143
  "runs_failed": len(config_results) - len(successful_runs),
135
144
  "avg_metrics": avg_metrics,
136
145
  "overall_score": overall_score,
137
- "individual_results": config_results
146
+ "individual_results": config_results,
138
147
  }
139
148
  else:
140
149
  # All runs failed
@@ -145,55 +154,59 @@ def compare_configurations(
145
154
  "runs_completed": 0,
146
155
  "runs_failed": len(config_results),
147
156
  "error": "All runs failed",
148
- "individual_results": config_results
157
+ "individual_results": config_results,
149
158
  }
150
-
159
+
151
160
  results.append(result_summary)
152
-
161
+
153
162
  # Sort results by overall score (if available)
154
163
  sorted_results = sorted(
155
164
  [r for r in results if r.get("success", False)],
156
165
  key=lambda x: x.get("overall_score", 0),
157
- reverse=True
166
+ reverse=True,
158
167
  )
159
-
168
+
160
169
  # Add failed configurations at the end
161
170
  sorted_results.extend([r for r in results if not r.get("success", False)])
162
-
171
+
163
172
  # Create comparison report
164
173
  comparison_report = {
165
174
  "query": query,
166
175
  "configurations_tested": len(configurations),
167
- "successful_configurations": len([r for r in results if r.get("success", False)]),
168
- "failed_configurations": len([r for r in results if not r.get("success", False)]),
176
+ "successful_configurations": len(
177
+ [r for r in results if r.get("success", False)]
178
+ ),
179
+ "failed_configurations": len(
180
+ [r for r in results if not r.get("success", False)]
181
+ ),
169
182
  "repetitions": repetitions,
170
183
  "metric_weights": metric_weights,
171
184
  "timestamp": datetime.now().isoformat(),
172
- "results": sorted_results
185
+ "results": sorted_results,
173
186
  }
174
-
187
+
175
188
  # Save results to file
176
189
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
177
- result_file = os.path.join(output_dir, f"comparison_results_{timestamp}.json")
178
-
190
+ result_file = os.path.join(
191
+ output_dir, f"comparison_results_{timestamp}.json"
192
+ )
193
+
179
194
  with open(result_file, "w") as f:
180
195
  json.dump(comparison_report, f, indent=2)
181
-
196
+
182
197
  # Generate visualizations
183
198
  visualizations_dir = os.path.join(output_dir, "visualizations")
184
199
  os.makedirs(visualizations_dir, exist_ok=True)
185
-
200
+
186
201
  _create_comparison_visualizations(
187
- comparison_report,
188
- output_dir=visualizations_dir,
189
- timestamp=timestamp
202
+ comparison_report, output_dir=visualizations_dir, timestamp=timestamp
190
203
  )
191
-
204
+
192
205
  logger.info(f"Comparison completed. Results saved to {result_file}")
193
-
206
+
194
207
  # Add report path to the result
195
208
  comparison_report["report_path"] = result_file
196
-
209
+
197
210
  return comparison_report
198
211
 
199
212
 
@@ -206,14 +219,14 @@ def _evaluate_single_configuration(
206
219
  ) -> Dict[str, Any]:
207
220
  """
208
221
  Evaluate a single configuration.
209
-
222
+
210
223
  Args:
211
224
  query: Research query to evaluate
212
225
  config: Configuration parameters
213
226
  model_name: Name of the LLM model to use
214
227
  provider: LLM provider
215
228
  search_tool: Search engine to use
216
-
229
+
217
230
  Returns:
218
231
  Dictionary with evaluation results
219
232
  """
@@ -226,73 +239,77 @@ def _evaluate_single_configuration(
226
239
  config_search_strategy = config.get("search_strategy", "iterdrag")
227
240
  config_max_results = config.get("max_results", 50)
228
241
  config_max_filtered_results = config.get("max_filtered_results", 20)
229
-
242
+
230
243
  # Initialize profiling tools
231
244
  speed_profiler = SpeedProfiler()
232
245
  resource_monitor = ResourceMonitor(sampling_interval=0.5)
233
-
246
+
234
247
  # Start profiling
235
248
  speed_profiler.start()
236
249
  resource_monitor.start()
237
-
250
+
238
251
  try:
239
252
  # Get LLM
240
253
  with speed_profiler.timer("llm_initialization"):
241
254
  llm = get_llm(
242
255
  temperature=config.get("temperature", 0.7),
243
256
  model_name=config_model_name,
244
- provider=config_provider
257
+ provider=config_provider,
245
258
  )
246
-
259
+
247
260
  # Set up search engine if specified
248
261
  with speed_profiler.timer("search_initialization"):
249
262
  search = None
250
263
  if config_search_tool:
251
264
  search = get_search(
252
- config_search_tool,
265
+ config_search_tool,
253
266
  llm_instance=llm,
254
267
  max_results=config_max_results,
255
- max_filtered_results=config_max_filtered_results
268
+ max_filtered_results=config_max_filtered_results,
256
269
  )
257
-
270
+
258
271
  # Create search system
259
272
  system = AdvancedSearchSystem(llm=llm, search=search)
260
273
  system.max_iterations = config_iterations
261
274
  system.questions_per_iteration = config_questions_per_iteration
262
275
  system.strategy_name = config_search_strategy
263
-
276
+
264
277
  # Run the analysis
265
278
  with speed_profiler.timer("analysis"):
266
279
  results = system.analyze_topic(query)
267
-
280
+
268
281
  # Stop profiling
269
282
  speed_profiler.stop()
270
283
  resource_monitor.stop()
271
-
284
+
272
285
  # Calculate metrics
273
286
  quality_metrics = calculate_quality_metrics(
274
287
  results=results,
275
- system_info={"all_links_of_system": getattr(system, "all_links_of_system", [])}
288
+ system_info={
289
+ "all_links_of_system": getattr(
290
+ system, "all_links_of_system", []
291
+ )
292
+ },
276
293
  )
277
-
294
+
278
295
  speed_metrics = calculate_speed_metrics(
279
296
  timing_info=speed_profiler.get_summary(),
280
297
  system_info={
281
298
  "iterations": config_iterations,
282
299
  "questions_per_iteration": config_questions_per_iteration,
283
- "results": results
284
- }
300
+ "results": results,
301
+ },
285
302
  )
286
-
303
+
287
304
  resource_metrics = calculate_resource_metrics(
288
305
  resource_info=resource_monitor.get_combined_stats(),
289
306
  system_info={
290
307
  "iterations": config_iterations,
291
308
  "questions_per_iteration": config_questions_per_iteration,
292
- "results": results
293
- }
309
+ "results": results,
310
+ },
294
311
  )
295
-
312
+
296
313
  # Return comprehensive results
297
314
  return {
298
315
  "query": query,
@@ -304,17 +321,17 @@ def _evaluate_single_configuration(
304
321
  "speed_metrics": speed_metrics,
305
322
  "resource_metrics": resource_metrics,
306
323
  "timing_details": speed_profiler.get_timings(),
307
- "resource_details": resource_monitor.get_combined_stats()
324
+ "resource_details": resource_monitor.get_combined_stats(),
308
325
  }
309
-
326
+
310
327
  except Exception as e:
311
328
  # Stop profiling on error
312
329
  speed_profiler.stop()
313
330
  resource_monitor.stop()
314
-
331
+
315
332
  # Log the error
316
333
  logger.error(f"Error evaluating configuration: {str(e)}")
317
-
334
+
318
335
  # Return error information
319
336
  return {
320
337
  "query": query,
@@ -322,78 +339,76 @@ def _evaluate_single_configuration(
322
339
  "success": False,
323
340
  "error": str(e),
324
341
  "timing_details": speed_profiler.get_timings(),
325
- "resource_details": resource_monitor.get_combined_stats()
342
+ "resource_details": resource_monitor.get_combined_stats(),
326
343
  }
327
344
 
328
345
 
329
346
  def _calculate_average_metrics(results: List[Dict[str, Any]]) -> Dict[str, Any]:
330
347
  """
331
348
  Calculate average metrics across multiple runs.
332
-
349
+
333
350
  Args:
334
351
  results: List of individual run results
335
-
352
+
336
353
  Returns:
337
354
  Dictionary with averaged metrics
338
355
  """
339
356
  # Check if there are any successful results
340
357
  if not results:
341
358
  return {}
342
-
359
+
343
360
  # Initialize average metrics
344
361
  avg_metrics = {
345
362
  "quality_metrics": {},
346
363
  "speed_metrics": {},
347
- "resource_metrics": {}
364
+ "resource_metrics": {},
348
365
  }
349
-
366
+
350
367
  # Quality metrics
351
368
  quality_keys = set()
352
369
  for result in results:
353
370
  quality_metrics = result.get("quality_metrics", {})
354
371
  quality_keys.update(quality_metrics.keys())
355
-
372
+
356
373
  for key in quality_keys:
357
374
  values = [r.get("quality_metrics", {}).get(key) for r in results]
358
375
  values = [v for v in values if v is not None]
359
376
  if values:
360
377
  avg_metrics["quality_metrics"][key] = sum(values) / len(values)
361
-
378
+
362
379
  # Speed metrics
363
380
  speed_keys = set()
364
381
  for result in results:
365
382
  speed_metrics = result.get("speed_metrics", {})
366
383
  speed_keys.update(speed_metrics.keys())
367
-
384
+
368
385
  for key in speed_keys:
369
386
  values = [r.get("speed_metrics", {}).get(key) for r in results]
370
387
  values = [v for v in values if v is not None]
371
388
  if values:
372
389
  avg_metrics["speed_metrics"][key] = sum(values) / len(values)
373
-
390
+
374
391
  # Resource metrics
375
392
  resource_keys = set()
376
393
  for result in results:
377
394
  resource_metrics = result.get("resource_metrics", {})
378
395
  resource_keys.update(resource_metrics.keys())
379
-
396
+
380
397
  for key in resource_keys:
381
398
  values = [r.get("resource_metrics", {}).get(key) for r in results]
382
399
  values = [v for v in values if v is not None]
383
400
  if values:
384
401
  avg_metrics["resource_metrics"][key] = sum(values) / len(values)
385
-
402
+
386
403
  return avg_metrics
387
404
 
388
405
 
389
406
  def _create_comparison_visualizations(
390
- comparison_report: Dict[str, Any],
391
- output_dir: str,
392
- timestamp: str
407
+ comparison_report: Dict[str, Any], output_dir: str, timestamp: str
393
408
  ):
394
409
  """
395
410
  Create visualizations for the comparison results.
396
-
411
+
397
412
  Args:
398
413
  comparison_report: Comparison report dictionary
399
414
  output_dir: Directory to save visualizations
@@ -401,75 +416,87 @@ def _create_comparison_visualizations(
401
416
  """
402
417
  # Check if there are successful results
403
418
  successful_results = [
404
- r for r in comparison_report.get("results", [])
419
+ r
420
+ for r in comparison_report.get("results", [])
405
421
  if r.get("success", False)
406
422
  ]
407
-
423
+
408
424
  if not successful_results:
409
425
  logger.warning("No successful configurations to visualize")
410
426
  return
411
-
427
+
412
428
  # Extract configuration names
413
- config_names = [r.get("name", f"Config {i+1}") for i, r in enumerate(successful_results)]
414
-
429
+ config_names = [
430
+ r.get("name", f"Config {i + 1}")
431
+ for i, r in enumerate(successful_results)
432
+ ]
433
+
415
434
  # 1. Overall score comparison
416
435
  plt.figure(figsize=(12, 6))
417
436
  scores = [r.get("overall_score", 0) for r in successful_results]
418
-
437
+
419
438
  # Create horizontal bar chart
420
- plt.barh(config_names, scores, color='skyblue')
421
- plt.xlabel('Overall Score')
422
- plt.ylabel('Configuration')
423
- plt.title('Configuration Performance Comparison')
424
- plt.grid(axis='x', linestyle='--', alpha=0.7)
439
+ plt.barh(config_names, scores, color="skyblue")
440
+ plt.xlabel("Overall Score")
441
+ plt.ylabel("Configuration")
442
+ plt.title("Configuration Performance Comparison")
443
+ plt.grid(axis="x", linestyle="--", alpha=0.7)
425
444
  plt.tight_layout()
426
- plt.savefig(os.path.join(output_dir, f"overall_score_comparison_{timestamp}.png"))
445
+ plt.savefig(
446
+ os.path.join(output_dir, f"overall_score_comparison_{timestamp}.png")
447
+ )
427
448
  plt.close()
428
-
449
+
429
450
  # 2. Quality metrics comparison
430
451
  quality_metrics = ["overall_quality", "source_count", "lexical_diversity"]
431
452
  _create_metric_comparison_chart(
432
- successful_results,
433
- config_names,
434
- quality_metrics,
453
+ successful_results,
454
+ config_names,
455
+ quality_metrics,
435
456
  "quality_metrics",
436
457
  "Quality Metrics Comparison",
437
- os.path.join(output_dir, f"quality_metrics_comparison_{timestamp}.png")
458
+ os.path.join(output_dir, f"quality_metrics_comparison_{timestamp}.png"),
438
459
  )
439
-
460
+
440
461
  # 3. Speed metrics comparison
441
462
  speed_metrics = ["overall_speed", "total_duration", "duration_per_question"]
442
463
  _create_metric_comparison_chart(
443
- successful_results,
444
- config_names,
445
- speed_metrics,
464
+ successful_results,
465
+ config_names,
466
+ speed_metrics,
446
467
  "speed_metrics",
447
468
  "Speed Metrics Comparison",
448
- os.path.join(output_dir, f"speed_metrics_comparison_{timestamp}.png")
469
+ os.path.join(output_dir, f"speed_metrics_comparison_{timestamp}.png"),
449
470
  )
450
-
471
+
451
472
  # 4. Resource metrics comparison
452
- resource_metrics = ["overall_resource", "process_memory_max_mb", "system_cpu_avg"]
473
+ resource_metrics = [
474
+ "overall_resource",
475
+ "process_memory_max_mb",
476
+ "system_cpu_avg",
477
+ ]
453
478
  _create_metric_comparison_chart(
454
- successful_results,
455
- config_names,
456
- resource_metrics,
479
+ successful_results,
480
+ config_names,
481
+ resource_metrics,
457
482
  "resource_metrics",
458
483
  "Resource Usage Comparison",
459
- os.path.join(output_dir, f"resource_metrics_comparison_{timestamp}.png")
484
+ os.path.join(
485
+ output_dir, f"resource_metrics_comparison_{timestamp}.png"
486
+ ),
460
487
  )
461
-
488
+
462
489
  # 5. Spider chart for multi-dimensional comparison
463
490
  _create_spider_chart(
464
491
  successful_results,
465
492
  config_names,
466
- os.path.join(output_dir, f"spider_chart_comparison_{timestamp}.png")
493
+ os.path.join(output_dir, f"spider_chart_comparison_{timestamp}.png"),
467
494
  )
468
-
495
+
469
496
  # 6. Pareto frontier chart for quality vs. speed
470
497
  _create_pareto_chart(
471
498
  successful_results,
472
- os.path.join(output_dir, f"pareto_chart_comparison_{timestamp}.png")
499
+ os.path.join(output_dir, f"pareto_chart_comparison_{timestamp}.png"),
473
500
  )
474
501
 
475
502
 
@@ -479,11 +506,11 @@ def _create_metric_comparison_chart(
479
506
  metric_keys: List[str],
480
507
  metric_category: str,
481
508
  title: str,
482
- output_path: str
509
+ output_path: str,
483
510
  ):
484
511
  """
485
512
  Create a chart comparing specific metrics across configurations.
486
-
513
+
487
514
  Args:
488
515
  results: List of configuration results
489
516
  config_names: Names of configurations
@@ -493,21 +520,23 @@ def _create_metric_comparison_chart(
493
520
  output_path: Path to save the chart
494
521
  """
495
522
  # Create figure with multiple subplots (one per metric)
496
- fig, axes = plt.subplots(len(metric_keys), 1, figsize=(12, 5 * len(metric_keys)))
497
-
523
+ fig, axes = plt.subplots(
524
+ len(metric_keys), 1, figsize=(12, 5 * len(metric_keys))
525
+ )
526
+
498
527
  # Handle case with only one metric
499
528
  if len(metric_keys) == 1:
500
529
  axes = [axes]
501
-
530
+
502
531
  for i, metric_key in enumerate(metric_keys):
503
532
  ax = axes[i]
504
-
533
+
505
534
  # Get metric values
506
535
  metric_values = []
507
536
  for result in results:
508
537
  metrics = result.get("avg_metrics", {}).get(metric_category, {})
509
538
  value = metrics.get(metric_key)
510
-
539
+
511
540
  # Handle time values for better visualization
512
541
  if "duration" in metric_key and value is not None:
513
542
  # Convert to seconds if > 60 seconds, minutes if > 60 minutes
@@ -519,22 +548,26 @@ def _create_metric_comparison_chart(
519
548
  metric_key += " (minutes)"
520
549
  else:
521
550
  metric_key += " (seconds)"
522
-
551
+
523
552
  metric_values.append(value if value is not None else 0)
524
-
553
+
525
554
  # Create horizontal bar chart
526
- bars = ax.barh(config_names, metric_values, color='lightblue')
527
- ax.set_xlabel(metric_key.replace('_', ' ').title())
555
+ bars = ax.barh(config_names, metric_values, color="lightblue")
556
+ ax.set_xlabel(metric_key.replace("_", " ").title())
528
557
  ax.set_title(f"{metric_key.replace('_', ' ').title()}")
529
- ax.grid(axis='x', linestyle='--', alpha=0.7)
530
-
558
+ ax.grid(axis="x", linestyle="--", alpha=0.7)
559
+
531
560
  # Add value labels to bars
532
561
  for bar in bars:
533
562
  width = bar.get_width()
534
563
  label_x_pos = width * 1.01
535
- ax.text(label_x_pos, bar.get_y() + bar.get_height()/2, f'{width:.2f}',
536
- va='center')
537
-
564
+ ax.text(
565
+ label_x_pos,
566
+ bar.get_y() + bar.get_height() / 2,
567
+ f"{width:.2f}",
568
+ va="center",
569
+ )
570
+
538
571
  plt.suptitle(title, fontsize=16)
539
572
  plt.tight_layout()
540
573
  plt.savefig(output_path)
@@ -542,13 +575,11 @@ def _create_metric_comparison_chart(
542
575
 
543
576
 
544
577
  def _create_spider_chart(
545
- results: List[Dict[str, Any]],
546
- config_names: List[str],
547
- output_path: str
578
+ results: List[Dict[str, Any]], config_names: List[str], output_path: str
548
579
  ):
549
580
  """
550
581
  Create a spider chart comparing metrics across configurations.
551
-
582
+
552
583
  Args:
553
584
  results: List of configuration results
554
585
  config_names: Names of configurations
@@ -560,81 +591,105 @@ def _create_spider_chart(
560
591
  from matplotlib.projections import register_projection
561
592
  from matplotlib.projections.polar import PolarAxes
562
593
  from matplotlib.spines import Spine
563
-
564
- def radar_factory(num_vars, frame='circle'):
594
+
595
+ def radar_factory(num_vars, frame="circle"):
565
596
  """Create a radar chart with `num_vars` axes."""
566
597
  # Calculate evenly-spaced axis angles
567
- theta = np.linspace(0, 2*np.pi, num_vars, endpoint=False)
568
-
598
+ theta = np.linspace(0, 2 * np.pi, num_vars, endpoint=False)
599
+
569
600
  class RadarAxes(PolarAxes):
570
- name = 'radar'
571
-
601
+ name = "radar"
602
+
572
603
  def __init__(self, *args, **kwargs):
573
604
  super().__init__(*args, **kwargs)
574
- self.set_theta_zero_location('N')
575
-
605
+ self.set_theta_zero_location("N")
606
+
576
607
  def fill(self, *args, closed=True, **kwargs):
577
608
  return super().fill(closed=closed, *args, **kwargs)
578
-
609
+
579
610
  def plot(self, *args, **kwargs):
580
611
  return super().plot(*args, **kwargs)
581
-
612
+
582
613
  def set_varlabels(self, labels):
583
614
  self.set_thetagrids(np.degrees(theta), labels)
584
-
615
+
585
616
  def _gen_axes_patch(self):
586
- if frame == 'circle':
617
+ if frame == "circle":
587
618
  return Circle((0.5, 0.5), 0.5)
588
- elif frame == 'polygon':
589
- return RegularPolygon((0.5, 0.5), num_vars, radius=0.5, edgecolor="k")
619
+ elif frame == "polygon":
620
+ return RegularPolygon(
621
+ (0.5, 0.5), num_vars, radius=0.5, edgecolor="k"
622
+ )
590
623
  else:
591
- raise ValueError("Unknown value for 'frame': %s" % frame)
592
-
624
+ raise ValueError(
625
+ "Unknown value for 'frame': %s" % frame
626
+ )
627
+
593
628
  def _gen_axes_spines(self):
594
- if frame == 'circle':
629
+ if frame == "circle":
595
630
  return super()._gen_axes_spines()
596
- elif frame == 'polygon':
631
+ elif frame == "polygon":
597
632
  spine_type = Spine.circular_spine
598
633
  verts = unit_poly_verts(num_vars)
599
634
  vertices = [(0.5, 0.5)] + verts
600
- codes = [Path.MOVETO] + [Path.LINETO] * num_vars + [Path.CLOSEPOLY]
635
+ codes = (
636
+ [Path.MOVETO]
637
+ + [Path.LINETO] * num_vars
638
+ + [Path.CLOSEPOLY]
639
+ )
601
640
  path = Path(vertices, codes)
602
641
  spine = Spine(self, spine_type, path)
603
642
  spine.set_transform(self.transAxes)
604
- return {'polar': spine}
643
+ return {"polar": spine}
605
644
  else:
606
- raise ValueError("Unknown value for 'frame': %s" % frame)
607
-
645
+ raise ValueError(
646
+ "Unknown value for 'frame': %s" % frame
647
+ )
648
+
608
649
  def unit_poly_verts(num_vars):
609
650
  """Return vertices of polygon for radar chart."""
610
651
  verts = []
611
652
  for i in range(num_vars):
612
653
  angle = theta[i]
613
- verts.append((0.5 * (1 + np.cos(angle)), 0.5 * (1 + np.sin(angle))))
654
+ verts.append(
655
+ (0.5 * (1 + np.cos(angle)), 0.5 * (1 + np.sin(angle)))
656
+ )
614
657
  return verts
615
-
658
+
616
659
  register_projection(RadarAxes)
617
660
  return theta
618
-
661
+
619
662
  # Select metrics for the spider chart
620
663
  metrics = [
621
664
  {"name": "Quality", "key": "quality_metrics.overall_quality"},
622
665
  {"name": "Speed", "key": "speed_metrics.overall_speed"},
623
- {"name": "Sources", "key": "quality_metrics.normalized_source_count"},
624
- {"name": "Content", "key": "quality_metrics.normalized_knowledge_length"},
625
- {"name": "Memory", "key": "resource_metrics.normalized_memory_usage", "invert": True},
666
+ {
667
+ "name": "Sources",
668
+ "key": "quality_metrics.normalized_source_count",
669
+ },
670
+ {
671
+ "name": "Content",
672
+ "key": "quality_metrics.normalized_knowledge_length",
673
+ },
674
+ {
675
+ "name": "Memory",
676
+ "key": "resource_metrics.normalized_memory_usage",
677
+ "invert": True,
678
+ },
626
679
  ]
627
-
680
+
628
681
  # Extract metric values
629
682
  spoke_labels = [m["name"] for m in metrics]
630
683
  num_vars = len(spoke_labels)
631
684
  theta = radar_factory(num_vars)
632
-
633
- fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection='radar'))
634
-
685
+
686
+ fig, ax = plt.subplots(
687
+ figsize=(10, 10), subplot_kw=dict(projection="radar")
688
+ )
689
+
635
690
  # Color map for different configurations
636
691
  colors = plt.cm.viridis(np.linspace(0, 1, len(results)))
637
-
692
+
638
693
  for i, result in enumerate(results):
639
694
  values = []
640
695
  for metric in metrics:
@@ -643,45 +698,53 @@ def _create_spider_chart(
643
698
  value = result.get("avg_metrics", {})
644
699
  for part in key_parts:
645
700
  value = value.get(part, 0) if isinstance(value, dict) else 0
646
-
701
+
647
702
  # Invert if needed (for metrics where lower is better)
648
703
  if metric.get("invert", False):
649
704
  value = 1.0 - value
650
-
705
+
651
706
  values.append(value)
652
-
707
+
653
708
  # Plot this configuration
654
- ax.plot(theta, values, color=colors[i], linewidth=2, label=config_names[i])
709
+ ax.plot(
710
+ theta,
711
+ values,
712
+ color=colors[i],
713
+ linewidth=2,
714
+ label=config_names[i],
715
+ )
655
716
  ax.fill(theta, values, color=colors[i], alpha=0.25)
656
-
717
+
657
718
  # Set chart properties
658
719
  ax.set_varlabels(spoke_labels)
659
- plt.legend(loc='best', bbox_to_anchor=(0.5, 0.1))
660
- plt.title('Multi-Dimensional Configuration Comparison', size=16, y=1.05)
720
+ plt.legend(loc="best", bbox_to_anchor=(0.5, 0.1))
721
+ plt.title("Multi-Dimensional Configuration Comparison", size=16, y=1.05)
661
722
  plt.tight_layout()
662
-
723
+
663
724
  # Save chart
664
725
  plt.savefig(output_path)
665
726
  plt.close()
666
-
727
+
667
728
  except Exception as e:
668
729
  logger.error(f"Error creating spider chart: {str(e)}")
669
730
  # Create a text-based chart as fallback
670
731
  plt.figure(figsize=(10, 6))
671
- plt.text(0.5, 0.5, f"Spider chart could not be created: {str(e)}",
672
- horizontalalignment='center', verticalalignment='center')
673
- plt.axis('off')
732
+ plt.text(
733
+ 0.5,
734
+ 0.5,
735
+ f"Spider chart could not be created: {str(e)}",
736
+ horizontalalignment="center",
737
+ verticalalignment="center",
738
+ )
739
+ plt.axis("off")
674
740
  plt.savefig(output_path)
675
741
  plt.close()
676
742
 
677
743
 
678
- def _create_pareto_chart(
679
- results: List[Dict[str, Any]],
680
- output_path: str
681
- ):
744
+ def _create_pareto_chart(results: List[Dict[str, Any]], output_path: str):
682
745
  """
683
746
  Create a Pareto frontier chart showing quality vs. speed tradeoff.
684
-
747
+
685
748
  Args:
686
749
  results: List of configuration results
687
750
  output_path: Path to save the chart
@@ -690,30 +753,32 @@ def _create_pareto_chart(
690
753
  quality_scores = []
691
754
  speed_scores = []
692
755
  names = []
693
-
756
+
694
757
  for result in results:
695
758
  metrics = result.get("avg_metrics", {})
696
759
  quality = metrics.get("quality_metrics", {}).get("overall_quality", 0)
697
-
760
+
698
761
  # For speed, we use inverse of duration (so higher is better)
699
762
  duration = metrics.get("speed_metrics", {}).get("total_duration", 1)
700
763
  speed = 1.0 / max(duration, 0.001) # Avoid division by zero
701
-
764
+
702
765
  quality_scores.append(quality)
703
766
  speed_scores.append(speed)
704
767
  names.append(result.get("name", "Configuration"))
705
-
768
+
706
769
  # Create scatter plot
707
770
  plt.figure(figsize=(10, 8))
708
771
  plt.scatter(quality_scores, speed_scores, s=100, alpha=0.7)
709
-
772
+
710
773
  # Add labels for each point
711
774
  for i, name in enumerate(names):
712
- plt.annotate(name,
713
- (quality_scores[i], speed_scores[i]),
714
- xytext=(5, 5),
715
- textcoords='offset points')
716
-
775
+ plt.annotate(
776
+ name,
777
+ (quality_scores[i], speed_scores[i]),
778
+ xytext=(5, 5),
779
+ textcoords="offset points",
780
+ )
781
+
717
782
  # Identify Pareto frontier
718
783
  pareto_points = []
719
784
  for i, (q, s) in enumerate(zip(quality_scores, speed_scores)):
@@ -724,45 +789,57 @@ def _create_pareto_chart(
724
789
  break
725
790
  if is_pareto:
726
791
  pareto_points.append(i)
727
-
792
+
728
793
  # Highlight Pareto frontier
729
794
  pareto_quality = [quality_scores[i] for i in pareto_points]
730
795
  pareto_speed = [speed_scores[i] for i in pareto_points]
731
-
796
+
732
797
  # Sort pareto points for line drawing
733
798
  pareto_sorted = sorted(zip(pareto_quality, pareto_speed, pareto_points))
734
799
  pareto_quality = [p[0] for p in pareto_sorted]
735
800
  pareto_speed = [p[1] for p in pareto_sorted]
736
801
  pareto_indices = [p[2] for p in pareto_sorted]
737
-
802
+
738
803
  # Draw Pareto frontier line
739
- plt.plot(pareto_quality, pareto_speed, 'r--', linewidth=2)
740
-
804
+ plt.plot(pareto_quality, pareto_speed, "r--", linewidth=2)
805
+
741
806
  # Highlight Pareto optimal points
742
- plt.scatter([quality_scores[i] for i in pareto_indices],
743
- [speed_scores[i] for i in pareto_indices],
744
- s=150, facecolors='none', edgecolors='r', linewidth=2)
745
-
807
+ plt.scatter(
808
+ [quality_scores[i] for i in pareto_indices],
809
+ [speed_scores[i] for i in pareto_indices],
810
+ s=150,
811
+ facecolors="none",
812
+ edgecolors="r",
813
+ linewidth=2,
814
+ )
815
+
746
816
  # Add labels for Pareto optimal configurations
747
817
  for i in pareto_indices:
748
- plt.annotate(names[i],
749
- (quality_scores[i], speed_scores[i]),
750
- xytext=(8, 8),
751
- textcoords='offset points',
752
- bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.7))
753
-
818
+ plt.annotate(
819
+ names[i],
820
+ (quality_scores[i], speed_scores[i]),
821
+ xytext=(8, 8),
822
+ textcoords="offset points",
823
+ bbox=dict(boxstyle="round,pad=0.5", fc="yellow", alpha=0.7),
824
+ )
825
+
754
826
  # Set chart properties
755
- plt.xlabel('Quality Score (higher is better)')
756
- plt.ylabel('Speed Score (higher is better)')
757
- plt.title('Quality vs. Speed Tradeoff (Pareto Frontier)', size=14)
758
- plt.grid(True, linestyle='--', alpha=0.7)
759
-
827
+ plt.xlabel("Quality Score (higher is better)")
828
+ plt.ylabel("Speed Score (higher is better)")
829
+ plt.title("Quality vs. Speed Tradeoff (Pareto Frontier)", size=14)
830
+ plt.grid(True, linestyle="--", alpha=0.7)
831
+
760
832
  # Add explanation
761
- plt.figtext(0.5, 0.01,
762
- "Points on the red line are Pareto optimal configurations\n"
763
- "(no other configuration is better in both quality and speed)",
764
- ha='center', fontsize=10, bbox=dict(boxstyle='round', fc='white', alpha=0.7))
765
-
833
+ plt.figtext(
834
+ 0.5,
835
+ 0.01,
836
+ "Points on the red line are Pareto optimal configurations\n"
837
+ "(no other configuration is better in both quality and speed)",
838
+ ha="center",
839
+ fontsize=10,
840
+ bbox=dict(boxstyle="round", fc="white", alpha=0.7),
841
+ )
842
+
766
843
  plt.tight_layout()
767
844
  plt.savefig(output_path)
768
- plt.close()
845
+ plt.close()