local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +7 -0
- local_deep_research/__version__.py +1 -1
- local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
- local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
- local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
- local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
- local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
- local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
- local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
- local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
- local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
- local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
- local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
- local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
- local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
- local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
- local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
- local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
- local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
- local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
- local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
- local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
- local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
- local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
- local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
- local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
- local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
- local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
- local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
- local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
- local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
- local_deep_research/advanced_search_system/findings/repository.py +54 -17
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
- local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
- local_deep_research/advanced_search_system/questions/__init__.py +16 -0
- local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
- local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
- local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
- local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
- local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
- local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
- local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
- local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
- local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
- local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
- local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
- local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
- local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
- local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
- local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
- local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
- local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
- local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
- local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
- local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
- local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
- local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
- local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
- local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
- local_deep_research/api/benchmark_functions.py +6 -2
- local_deep_research/api/research_functions.py +10 -4
- local_deep_research/benchmarks/__init__.py +9 -7
- local_deep_research/benchmarks/benchmark_functions.py +6 -2
- local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
- local_deep_research/benchmarks/cli.py +38 -13
- local_deep_research/benchmarks/comparison/__init__.py +4 -2
- local_deep_research/benchmarks/comparison/evaluator.py +316 -239
- local_deep_research/benchmarks/datasets/__init__.py +1 -1
- local_deep_research/benchmarks/datasets/base.py +91 -72
- local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
- local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
- local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
- local_deep_research/benchmarks/datasets/utils.py +48 -29
- local_deep_research/benchmarks/datasets.py +4 -11
- local_deep_research/benchmarks/efficiency/__init__.py +8 -4
- local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
- local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
- local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
- local_deep_research/benchmarks/evaluators/composite.py +6 -2
- local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
- local_deep_research/benchmarks/graders.py +32 -10
- local_deep_research/benchmarks/metrics/README.md +1 -1
- local_deep_research/benchmarks/metrics/calculation.py +25 -10
- local_deep_research/benchmarks/metrics/reporting.py +7 -3
- local_deep_research/benchmarks/metrics/visualization.py +42 -23
- local_deep_research/benchmarks/metrics.py +1 -1
- local_deep_research/benchmarks/optimization/__init__.py +3 -1
- local_deep_research/benchmarks/optimization/api.py +7 -1
- local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
- local_deep_research/benchmarks/runners.py +48 -15
- local_deep_research/citation_handler.py +65 -92
- local_deep_research/citation_handlers/__init__.py +15 -0
- local_deep_research/citation_handlers/base_citation_handler.py +70 -0
- local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
- local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
- local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
- local_deep_research/config/llm_config.py +271 -169
- local_deep_research/config/search_config.py +14 -5
- local_deep_research/defaults/__init__.py +0 -1
- local_deep_research/metrics/__init__.py +13 -0
- local_deep_research/metrics/database.py +58 -0
- local_deep_research/metrics/db_models.py +115 -0
- local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
- local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
- local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
- local_deep_research/metrics/migrate_research_ratings.py +31 -0
- local_deep_research/metrics/models.py +61 -0
- local_deep_research/metrics/pricing/__init__.py +12 -0
- local_deep_research/metrics/pricing/cost_calculator.py +237 -0
- local_deep_research/metrics/pricing/pricing_cache.py +143 -0
- local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
- local_deep_research/metrics/query_utils.py +51 -0
- local_deep_research/metrics/search_tracker.py +380 -0
- local_deep_research/metrics/token_counter.py +1078 -0
- local_deep_research/migrate_db.py +3 -1
- local_deep_research/report_generator.py +22 -8
- local_deep_research/search_system.py +390 -9
- local_deep_research/test_migration.py +15 -5
- local_deep_research/utilities/db_utils.py +7 -4
- local_deep_research/utilities/es_utils.py +115 -104
- local_deep_research/utilities/llm_utils.py +15 -5
- local_deep_research/utilities/log_utils.py +151 -0
- local_deep_research/utilities/search_cache.py +387 -0
- local_deep_research/utilities/search_utilities.py +14 -6
- local_deep_research/utilities/threading_utils.py +92 -0
- local_deep_research/utilities/url_utils.py +6 -0
- local_deep_research/web/api.py +347 -0
- local_deep_research/web/app.py +13 -17
- local_deep_research/web/app_factory.py +71 -66
- local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
- local_deep_research/web/database/migrations.py +20 -3
- local_deep_research/web/database/models.py +74 -25
- local_deep_research/web/database/schema_upgrade.py +49 -29
- local_deep_research/web/models/database.py +63 -83
- local_deep_research/web/routes/api_routes.py +56 -22
- local_deep_research/web/routes/benchmark_routes.py +4 -1
- local_deep_research/web/routes/globals.py +22 -0
- local_deep_research/web/routes/history_routes.py +71 -46
- local_deep_research/web/routes/metrics_routes.py +1155 -0
- local_deep_research/web/routes/research_routes.py +192 -54
- local_deep_research/web/routes/settings_routes.py +156 -55
- local_deep_research/web/services/research_service.py +412 -251
- local_deep_research/web/services/resource_service.py +36 -11
- local_deep_research/web/services/settings_manager.py +55 -17
- local_deep_research/web/services/settings_service.py +12 -4
- local_deep_research/web/services/socket_service.py +295 -188
- local_deep_research/web/static/css/custom_dropdown.css +180 -0
- local_deep_research/web/static/css/styles.css +39 -1
- local_deep_research/web/static/js/components/detail.js +633 -267
- local_deep_research/web/static/js/components/details.js +751 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
- local_deep_research/web/static/js/components/fallback/ui.js +23 -23
- local_deep_research/web/static/js/components/history.js +76 -76
- local_deep_research/web/static/js/components/logpanel.js +61 -13
- local_deep_research/web/static/js/components/progress.js +13 -2
- local_deep_research/web/static/js/components/research.js +99 -12
- local_deep_research/web/static/js/components/results.js +239 -106
- local_deep_research/web/static/js/main.js +40 -40
- local_deep_research/web/static/js/services/audio.js +1 -1
- local_deep_research/web/static/js/services/formatting.js +11 -11
- local_deep_research/web/static/js/services/keyboard.js +157 -0
- local_deep_research/web/static/js/services/pdf.js +80 -80
- local_deep_research/web/static/sounds/README.md +1 -1
- local_deep_research/web/templates/base.html +1 -0
- local_deep_research/web/templates/components/log_panel.html +7 -1
- local_deep_research/web/templates/components/mobile_nav.html +1 -1
- local_deep_research/web/templates/components/sidebar.html +3 -0
- local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
- local_deep_research/web/templates/pages/details.html +325 -24
- local_deep_research/web/templates/pages/history.html +1 -1
- local_deep_research/web/templates/pages/metrics.html +1929 -0
- local_deep_research/web/templates/pages/progress.html +2 -2
- local_deep_research/web/templates/pages/research.html +53 -17
- local_deep_research/web/templates/pages/results.html +12 -1
- local_deep_research/web/templates/pages/star_reviews.html +803 -0
- local_deep_research/web/utils/formatters.py +9 -3
- local_deep_research/web_search_engines/default_search_engines.py +5 -3
- local_deep_research/web_search_engines/engines/full_search.py +8 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
- local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
- local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
- local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
- local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
- local_deep_research/web_search_engines/search_engine_base.py +83 -35
- local_deep_research/web_search_engines/search_engine_factory.py +25 -8
- local_deep_research/web_search_engines/search_engines_config.py +9 -3
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
- local_deep_research-0.5.2.dist-info/RECORD +265 -0
- local_deep_research-0.4.4.dist-info/RECORD +0 -177
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -8,24 +8,28 @@ and evaluating their performance across various metrics.
|
|
8
8
|
import json
|
9
9
|
import logging
|
10
10
|
import os
|
11
|
-
import time
|
12
11
|
from datetime import datetime
|
13
|
-
from typing import Dict, List, Optional
|
12
|
+
from typing import Any, Dict, List, Optional
|
14
13
|
|
15
|
-
import numpy as np
|
16
14
|
import matplotlib.pyplot as plt
|
15
|
+
from matplotlib.patches import Circle, RegularPolygon
|
16
|
+
import numpy as np
|
17
17
|
|
18
|
-
from local_deep_research.
|
19
|
-
|
20
|
-
|
21
|
-
from local_deep_research.benchmarks.efficiency.speed_profiler import
|
22
|
-
|
18
|
+
from local_deep_research.benchmarks.efficiency.resource_monitor import (
|
19
|
+
ResourceMonitor,
|
20
|
+
)
|
21
|
+
from local_deep_research.benchmarks.efficiency.speed_profiler import (
|
22
|
+
SpeedProfiler,
|
23
|
+
)
|
23
24
|
from local_deep_research.benchmarks.optimization.metrics import (
|
25
|
+
calculate_combined_score,
|
24
26
|
calculate_quality_metrics,
|
25
|
-
calculate_speed_metrics,
|
26
27
|
calculate_resource_metrics,
|
27
|
-
|
28
|
+
calculate_speed_metrics,
|
28
29
|
)
|
30
|
+
from local_deep_research.config.llm_config import get_llm
|
31
|
+
from local_deep_research.config.search_config import get_search
|
32
|
+
from local_deep_research.search_system import AdvancedSearchSystem
|
29
33
|
|
30
34
|
logger = logging.getLogger(__name__)
|
31
35
|
|
@@ -42,7 +46,7 @@ def compare_configurations(
|
|
42
46
|
) -> Dict[str, Any]:
|
43
47
|
"""
|
44
48
|
Compare multiple parameter configurations.
|
45
|
-
|
49
|
+
|
46
50
|
Args:
|
47
51
|
query: Research query to use for evaluation
|
48
52
|
configurations: List of parameter configurations to compare
|
@@ -52,42 +56,46 @@ def compare_configurations(
|
|
52
56
|
search_tool: Search engine to use
|
53
57
|
repetitions: Number of repetitions for each configuration
|
54
58
|
metric_weights: Dictionary of weights for each metric type
|
55
|
-
|
59
|
+
|
56
60
|
Returns:
|
57
61
|
Dictionary with comparison results
|
58
62
|
"""
|
59
63
|
os.makedirs(output_dir, exist_ok=True)
|
60
|
-
|
64
|
+
|
61
65
|
# Default metric weights if not provided
|
62
66
|
if metric_weights is None:
|
63
67
|
metric_weights = {
|
64
68
|
"quality": 0.6,
|
65
69
|
"speed": 0.4,
|
66
|
-
"resource": 0.0 # Disabled by default
|
70
|
+
"resource": 0.0, # Disabled by default
|
67
71
|
}
|
68
|
-
|
72
|
+
|
69
73
|
# Verify valid configurations
|
70
74
|
if not configurations:
|
71
75
|
logger.error("No configurations provided for comparison")
|
72
76
|
return {"error": "No configurations provided"}
|
73
|
-
|
77
|
+
|
74
78
|
# Results storage
|
75
79
|
results = []
|
76
|
-
|
80
|
+
|
77
81
|
# Process each configuration
|
78
82
|
for i, config in enumerate(configurations):
|
79
|
-
logger.info(
|
80
|
-
|
83
|
+
logger.info(
|
84
|
+
f"Evaluating configuration {i + 1}/{len(configurations)}: {config}"
|
85
|
+
)
|
86
|
+
|
81
87
|
# Name for this configuration
|
82
|
-
config_name = config.get("name", f"Configuration {i+1}")
|
83
|
-
|
88
|
+
config_name = config.get("name", f"Configuration {i + 1}")
|
89
|
+
|
84
90
|
# Results for all repetitions of this configuration
|
85
91
|
config_results = []
|
86
|
-
|
92
|
+
|
87
93
|
# Run multiple repetitions
|
88
94
|
for rep in range(repetitions):
|
89
|
-
logger.info(
|
90
|
-
|
95
|
+
logger.info(
|
96
|
+
f"Starting repetition {rep + 1}/{repetitions} for {config_name}"
|
97
|
+
)
|
98
|
+
|
91
99
|
try:
|
92
100
|
# Run the configuration
|
93
101
|
result = _evaluate_single_configuration(
|
@@ -95,37 +103,38 @@ def compare_configurations(
|
|
95
103
|
config=config,
|
96
104
|
model_name=model_name,
|
97
105
|
provider=provider,
|
98
|
-
search_tool=search_tool
|
106
|
+
search_tool=search_tool,
|
99
107
|
)
|
100
|
-
|
108
|
+
|
101
109
|
config_results.append(result)
|
102
|
-
logger.info(f"Completed repetition {rep+1} for {config_name}")
|
103
|
-
|
110
|
+
logger.info(f"Completed repetition {rep + 1} for {config_name}")
|
111
|
+
|
104
112
|
except Exception as e:
|
105
|
-
logger.error(
|
113
|
+
logger.error(
|
114
|
+
f"Error in {config_name}, repetition {rep + 1}: {str(e)}"
|
115
|
+
)
|
106
116
|
# Add error info but continue with other configurations
|
107
|
-
config_results.append({
|
108
|
-
|
109
|
-
"success": False
|
110
|
-
})
|
111
|
-
|
117
|
+
config_results.append({"error": str(e), "success": False})
|
118
|
+
|
112
119
|
# Calculate aggregate metrics across repetitions
|
113
120
|
if config_results:
|
114
121
|
# Filter out failed runs
|
115
|
-
successful_runs = [
|
116
|
-
|
122
|
+
successful_runs = [
|
123
|
+
r for r in config_results if r.get("success", False)
|
124
|
+
]
|
125
|
+
|
117
126
|
if successful_runs:
|
118
127
|
# Calculate average metrics
|
119
128
|
avg_metrics = _calculate_average_metrics(successful_runs)
|
120
|
-
|
129
|
+
|
121
130
|
# Calculate overall score
|
122
131
|
overall_score = calculate_combined_score(
|
123
132
|
quality_metrics=avg_metrics.get("quality_metrics", {}),
|
124
133
|
speed_metrics=avg_metrics.get("speed_metrics", {}),
|
125
134
|
resource_metrics=avg_metrics.get("resource_metrics", {}),
|
126
|
-
weights=metric_weights
|
135
|
+
weights=metric_weights,
|
127
136
|
)
|
128
|
-
|
137
|
+
|
129
138
|
result_summary = {
|
130
139
|
"name": config_name,
|
131
140
|
"configuration": config,
|
@@ -134,7 +143,7 @@ def compare_configurations(
|
|
134
143
|
"runs_failed": len(config_results) - len(successful_runs),
|
135
144
|
"avg_metrics": avg_metrics,
|
136
145
|
"overall_score": overall_score,
|
137
|
-
"individual_results": config_results
|
146
|
+
"individual_results": config_results,
|
138
147
|
}
|
139
148
|
else:
|
140
149
|
# All runs failed
|
@@ -145,55 +154,59 @@ def compare_configurations(
|
|
145
154
|
"runs_completed": 0,
|
146
155
|
"runs_failed": len(config_results),
|
147
156
|
"error": "All runs failed",
|
148
|
-
"individual_results": config_results
|
157
|
+
"individual_results": config_results,
|
149
158
|
}
|
150
|
-
|
159
|
+
|
151
160
|
results.append(result_summary)
|
152
|
-
|
161
|
+
|
153
162
|
# Sort results by overall score (if available)
|
154
163
|
sorted_results = sorted(
|
155
164
|
[r for r in results if r.get("success", False)],
|
156
165
|
key=lambda x: x.get("overall_score", 0),
|
157
|
-
reverse=True
|
166
|
+
reverse=True,
|
158
167
|
)
|
159
|
-
|
168
|
+
|
160
169
|
# Add failed configurations at the end
|
161
170
|
sorted_results.extend([r for r in results if not r.get("success", False)])
|
162
|
-
|
171
|
+
|
163
172
|
# Create comparison report
|
164
173
|
comparison_report = {
|
165
174
|
"query": query,
|
166
175
|
"configurations_tested": len(configurations),
|
167
|
-
"successful_configurations": len(
|
168
|
-
|
176
|
+
"successful_configurations": len(
|
177
|
+
[r for r in results if r.get("success", False)]
|
178
|
+
),
|
179
|
+
"failed_configurations": len(
|
180
|
+
[r for r in results if not r.get("success", False)]
|
181
|
+
),
|
169
182
|
"repetitions": repetitions,
|
170
183
|
"metric_weights": metric_weights,
|
171
184
|
"timestamp": datetime.now().isoformat(),
|
172
|
-
"results": sorted_results
|
185
|
+
"results": sorted_results,
|
173
186
|
}
|
174
|
-
|
187
|
+
|
175
188
|
# Save results to file
|
176
189
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
177
|
-
result_file = os.path.join(
|
178
|
-
|
190
|
+
result_file = os.path.join(
|
191
|
+
output_dir, f"comparison_results_{timestamp}.json"
|
192
|
+
)
|
193
|
+
|
179
194
|
with open(result_file, "w") as f:
|
180
195
|
json.dump(comparison_report, f, indent=2)
|
181
|
-
|
196
|
+
|
182
197
|
# Generate visualizations
|
183
198
|
visualizations_dir = os.path.join(output_dir, "visualizations")
|
184
199
|
os.makedirs(visualizations_dir, exist_ok=True)
|
185
|
-
|
200
|
+
|
186
201
|
_create_comparison_visualizations(
|
187
|
-
comparison_report,
|
188
|
-
output_dir=visualizations_dir,
|
189
|
-
timestamp=timestamp
|
202
|
+
comparison_report, output_dir=visualizations_dir, timestamp=timestamp
|
190
203
|
)
|
191
|
-
|
204
|
+
|
192
205
|
logger.info(f"Comparison completed. Results saved to {result_file}")
|
193
|
-
|
206
|
+
|
194
207
|
# Add report path to the result
|
195
208
|
comparison_report["report_path"] = result_file
|
196
|
-
|
209
|
+
|
197
210
|
return comparison_report
|
198
211
|
|
199
212
|
|
@@ -206,14 +219,14 @@ def _evaluate_single_configuration(
|
|
206
219
|
) -> Dict[str, Any]:
|
207
220
|
"""
|
208
221
|
Evaluate a single configuration.
|
209
|
-
|
222
|
+
|
210
223
|
Args:
|
211
224
|
query: Research query to evaluate
|
212
225
|
config: Configuration parameters
|
213
226
|
model_name: Name of the LLM model to use
|
214
227
|
provider: LLM provider
|
215
228
|
search_tool: Search engine to use
|
216
|
-
|
229
|
+
|
217
230
|
Returns:
|
218
231
|
Dictionary with evaluation results
|
219
232
|
"""
|
@@ -226,73 +239,77 @@ def _evaluate_single_configuration(
|
|
226
239
|
config_search_strategy = config.get("search_strategy", "iterdrag")
|
227
240
|
config_max_results = config.get("max_results", 50)
|
228
241
|
config_max_filtered_results = config.get("max_filtered_results", 20)
|
229
|
-
|
242
|
+
|
230
243
|
# Initialize profiling tools
|
231
244
|
speed_profiler = SpeedProfiler()
|
232
245
|
resource_monitor = ResourceMonitor(sampling_interval=0.5)
|
233
|
-
|
246
|
+
|
234
247
|
# Start profiling
|
235
248
|
speed_profiler.start()
|
236
249
|
resource_monitor.start()
|
237
|
-
|
250
|
+
|
238
251
|
try:
|
239
252
|
# Get LLM
|
240
253
|
with speed_profiler.timer("llm_initialization"):
|
241
254
|
llm = get_llm(
|
242
255
|
temperature=config.get("temperature", 0.7),
|
243
256
|
model_name=config_model_name,
|
244
|
-
provider=config_provider
|
257
|
+
provider=config_provider,
|
245
258
|
)
|
246
|
-
|
259
|
+
|
247
260
|
# Set up search engine if specified
|
248
261
|
with speed_profiler.timer("search_initialization"):
|
249
262
|
search = None
|
250
263
|
if config_search_tool:
|
251
264
|
search = get_search(
|
252
|
-
config_search_tool,
|
265
|
+
config_search_tool,
|
253
266
|
llm_instance=llm,
|
254
267
|
max_results=config_max_results,
|
255
|
-
max_filtered_results=config_max_filtered_results
|
268
|
+
max_filtered_results=config_max_filtered_results,
|
256
269
|
)
|
257
|
-
|
270
|
+
|
258
271
|
# Create search system
|
259
272
|
system = AdvancedSearchSystem(llm=llm, search=search)
|
260
273
|
system.max_iterations = config_iterations
|
261
274
|
system.questions_per_iteration = config_questions_per_iteration
|
262
275
|
system.strategy_name = config_search_strategy
|
263
|
-
|
276
|
+
|
264
277
|
# Run the analysis
|
265
278
|
with speed_profiler.timer("analysis"):
|
266
279
|
results = system.analyze_topic(query)
|
267
|
-
|
280
|
+
|
268
281
|
# Stop profiling
|
269
282
|
speed_profiler.stop()
|
270
283
|
resource_monitor.stop()
|
271
|
-
|
284
|
+
|
272
285
|
# Calculate metrics
|
273
286
|
quality_metrics = calculate_quality_metrics(
|
274
287
|
results=results,
|
275
|
-
system_info={
|
288
|
+
system_info={
|
289
|
+
"all_links_of_system": getattr(
|
290
|
+
system, "all_links_of_system", []
|
291
|
+
)
|
292
|
+
},
|
276
293
|
)
|
277
|
-
|
294
|
+
|
278
295
|
speed_metrics = calculate_speed_metrics(
|
279
296
|
timing_info=speed_profiler.get_summary(),
|
280
297
|
system_info={
|
281
298
|
"iterations": config_iterations,
|
282
299
|
"questions_per_iteration": config_questions_per_iteration,
|
283
|
-
"results": results
|
284
|
-
}
|
300
|
+
"results": results,
|
301
|
+
},
|
285
302
|
)
|
286
|
-
|
303
|
+
|
287
304
|
resource_metrics = calculate_resource_metrics(
|
288
305
|
resource_info=resource_monitor.get_combined_stats(),
|
289
306
|
system_info={
|
290
307
|
"iterations": config_iterations,
|
291
308
|
"questions_per_iteration": config_questions_per_iteration,
|
292
|
-
"results": results
|
293
|
-
}
|
309
|
+
"results": results,
|
310
|
+
},
|
294
311
|
)
|
295
|
-
|
312
|
+
|
296
313
|
# Return comprehensive results
|
297
314
|
return {
|
298
315
|
"query": query,
|
@@ -304,17 +321,17 @@ def _evaluate_single_configuration(
|
|
304
321
|
"speed_metrics": speed_metrics,
|
305
322
|
"resource_metrics": resource_metrics,
|
306
323
|
"timing_details": speed_profiler.get_timings(),
|
307
|
-
"resource_details": resource_monitor.get_combined_stats()
|
324
|
+
"resource_details": resource_monitor.get_combined_stats(),
|
308
325
|
}
|
309
|
-
|
326
|
+
|
310
327
|
except Exception as e:
|
311
328
|
# Stop profiling on error
|
312
329
|
speed_profiler.stop()
|
313
330
|
resource_monitor.stop()
|
314
|
-
|
331
|
+
|
315
332
|
# Log the error
|
316
333
|
logger.error(f"Error evaluating configuration: {str(e)}")
|
317
|
-
|
334
|
+
|
318
335
|
# Return error information
|
319
336
|
return {
|
320
337
|
"query": query,
|
@@ -322,78 +339,76 @@ def _evaluate_single_configuration(
|
|
322
339
|
"success": False,
|
323
340
|
"error": str(e),
|
324
341
|
"timing_details": speed_profiler.get_timings(),
|
325
|
-
"resource_details": resource_monitor.get_combined_stats()
|
342
|
+
"resource_details": resource_monitor.get_combined_stats(),
|
326
343
|
}
|
327
344
|
|
328
345
|
|
329
346
|
def _calculate_average_metrics(results: List[Dict[str, Any]]) -> Dict[str, Any]:
|
330
347
|
"""
|
331
348
|
Calculate average metrics across multiple runs.
|
332
|
-
|
349
|
+
|
333
350
|
Args:
|
334
351
|
results: List of individual run results
|
335
|
-
|
352
|
+
|
336
353
|
Returns:
|
337
354
|
Dictionary with averaged metrics
|
338
355
|
"""
|
339
356
|
# Check if there are any successful results
|
340
357
|
if not results:
|
341
358
|
return {}
|
342
|
-
|
359
|
+
|
343
360
|
# Initialize average metrics
|
344
361
|
avg_metrics = {
|
345
362
|
"quality_metrics": {},
|
346
363
|
"speed_metrics": {},
|
347
|
-
"resource_metrics": {}
|
364
|
+
"resource_metrics": {},
|
348
365
|
}
|
349
|
-
|
366
|
+
|
350
367
|
# Quality metrics
|
351
368
|
quality_keys = set()
|
352
369
|
for result in results:
|
353
370
|
quality_metrics = result.get("quality_metrics", {})
|
354
371
|
quality_keys.update(quality_metrics.keys())
|
355
|
-
|
372
|
+
|
356
373
|
for key in quality_keys:
|
357
374
|
values = [r.get("quality_metrics", {}).get(key) for r in results]
|
358
375
|
values = [v for v in values if v is not None]
|
359
376
|
if values:
|
360
377
|
avg_metrics["quality_metrics"][key] = sum(values) / len(values)
|
361
|
-
|
378
|
+
|
362
379
|
# Speed metrics
|
363
380
|
speed_keys = set()
|
364
381
|
for result in results:
|
365
382
|
speed_metrics = result.get("speed_metrics", {})
|
366
383
|
speed_keys.update(speed_metrics.keys())
|
367
|
-
|
384
|
+
|
368
385
|
for key in speed_keys:
|
369
386
|
values = [r.get("speed_metrics", {}).get(key) for r in results]
|
370
387
|
values = [v for v in values if v is not None]
|
371
388
|
if values:
|
372
389
|
avg_metrics["speed_metrics"][key] = sum(values) / len(values)
|
373
|
-
|
390
|
+
|
374
391
|
# Resource metrics
|
375
392
|
resource_keys = set()
|
376
393
|
for result in results:
|
377
394
|
resource_metrics = result.get("resource_metrics", {})
|
378
395
|
resource_keys.update(resource_metrics.keys())
|
379
|
-
|
396
|
+
|
380
397
|
for key in resource_keys:
|
381
398
|
values = [r.get("resource_metrics", {}).get(key) for r in results]
|
382
399
|
values = [v for v in values if v is not None]
|
383
400
|
if values:
|
384
401
|
avg_metrics["resource_metrics"][key] = sum(values) / len(values)
|
385
|
-
|
402
|
+
|
386
403
|
return avg_metrics
|
387
404
|
|
388
405
|
|
389
406
|
def _create_comparison_visualizations(
|
390
|
-
comparison_report: Dict[str, Any],
|
391
|
-
output_dir: str,
|
392
|
-
timestamp: str
|
407
|
+
comparison_report: Dict[str, Any], output_dir: str, timestamp: str
|
393
408
|
):
|
394
409
|
"""
|
395
410
|
Create visualizations for the comparison results.
|
396
|
-
|
411
|
+
|
397
412
|
Args:
|
398
413
|
comparison_report: Comparison report dictionary
|
399
414
|
output_dir: Directory to save visualizations
|
@@ -401,75 +416,87 @@ def _create_comparison_visualizations(
|
|
401
416
|
"""
|
402
417
|
# Check if there are successful results
|
403
418
|
successful_results = [
|
404
|
-
r
|
419
|
+
r
|
420
|
+
for r in comparison_report.get("results", [])
|
405
421
|
if r.get("success", False)
|
406
422
|
]
|
407
|
-
|
423
|
+
|
408
424
|
if not successful_results:
|
409
425
|
logger.warning("No successful configurations to visualize")
|
410
426
|
return
|
411
|
-
|
427
|
+
|
412
428
|
# Extract configuration names
|
413
|
-
config_names = [
|
414
|
-
|
429
|
+
config_names = [
|
430
|
+
r.get("name", f"Config {i + 1}")
|
431
|
+
for i, r in enumerate(successful_results)
|
432
|
+
]
|
433
|
+
|
415
434
|
# 1. Overall score comparison
|
416
435
|
plt.figure(figsize=(12, 6))
|
417
436
|
scores = [r.get("overall_score", 0) for r in successful_results]
|
418
|
-
|
437
|
+
|
419
438
|
# Create horizontal bar chart
|
420
|
-
plt.barh(config_names, scores, color=
|
421
|
-
plt.xlabel(
|
422
|
-
plt.ylabel(
|
423
|
-
plt.title(
|
424
|
-
plt.grid(axis=
|
439
|
+
plt.barh(config_names, scores, color="skyblue")
|
440
|
+
plt.xlabel("Overall Score")
|
441
|
+
plt.ylabel("Configuration")
|
442
|
+
plt.title("Configuration Performance Comparison")
|
443
|
+
plt.grid(axis="x", linestyle="--", alpha=0.7)
|
425
444
|
plt.tight_layout()
|
426
|
-
plt.savefig(
|
445
|
+
plt.savefig(
|
446
|
+
os.path.join(output_dir, f"overall_score_comparison_{timestamp}.png")
|
447
|
+
)
|
427
448
|
plt.close()
|
428
|
-
|
449
|
+
|
429
450
|
# 2. Quality metrics comparison
|
430
451
|
quality_metrics = ["overall_quality", "source_count", "lexical_diversity"]
|
431
452
|
_create_metric_comparison_chart(
|
432
|
-
successful_results,
|
433
|
-
config_names,
|
434
|
-
quality_metrics,
|
453
|
+
successful_results,
|
454
|
+
config_names,
|
455
|
+
quality_metrics,
|
435
456
|
"quality_metrics",
|
436
457
|
"Quality Metrics Comparison",
|
437
|
-
os.path.join(output_dir, f"quality_metrics_comparison_{timestamp}.png")
|
458
|
+
os.path.join(output_dir, f"quality_metrics_comparison_{timestamp}.png"),
|
438
459
|
)
|
439
|
-
|
460
|
+
|
440
461
|
# 3. Speed metrics comparison
|
441
462
|
speed_metrics = ["overall_speed", "total_duration", "duration_per_question"]
|
442
463
|
_create_metric_comparison_chart(
|
443
|
-
successful_results,
|
444
|
-
config_names,
|
445
|
-
speed_metrics,
|
464
|
+
successful_results,
|
465
|
+
config_names,
|
466
|
+
speed_metrics,
|
446
467
|
"speed_metrics",
|
447
468
|
"Speed Metrics Comparison",
|
448
|
-
os.path.join(output_dir, f"speed_metrics_comparison_{timestamp}.png")
|
469
|
+
os.path.join(output_dir, f"speed_metrics_comparison_{timestamp}.png"),
|
449
470
|
)
|
450
|
-
|
471
|
+
|
451
472
|
# 4. Resource metrics comparison
|
452
|
-
resource_metrics = [
|
473
|
+
resource_metrics = [
|
474
|
+
"overall_resource",
|
475
|
+
"process_memory_max_mb",
|
476
|
+
"system_cpu_avg",
|
477
|
+
]
|
453
478
|
_create_metric_comparison_chart(
|
454
|
-
successful_results,
|
455
|
-
config_names,
|
456
|
-
resource_metrics,
|
479
|
+
successful_results,
|
480
|
+
config_names,
|
481
|
+
resource_metrics,
|
457
482
|
"resource_metrics",
|
458
483
|
"Resource Usage Comparison",
|
459
|
-
os.path.join(
|
484
|
+
os.path.join(
|
485
|
+
output_dir, f"resource_metrics_comparison_{timestamp}.png"
|
486
|
+
),
|
460
487
|
)
|
461
|
-
|
488
|
+
|
462
489
|
# 5. Spider chart for multi-dimensional comparison
|
463
490
|
_create_spider_chart(
|
464
491
|
successful_results,
|
465
492
|
config_names,
|
466
|
-
os.path.join(output_dir, f"spider_chart_comparison_{timestamp}.png")
|
493
|
+
os.path.join(output_dir, f"spider_chart_comparison_{timestamp}.png"),
|
467
494
|
)
|
468
|
-
|
495
|
+
|
469
496
|
# 6. Pareto frontier chart for quality vs. speed
|
470
497
|
_create_pareto_chart(
|
471
498
|
successful_results,
|
472
|
-
os.path.join(output_dir, f"pareto_chart_comparison_{timestamp}.png")
|
499
|
+
os.path.join(output_dir, f"pareto_chart_comparison_{timestamp}.png"),
|
473
500
|
)
|
474
501
|
|
475
502
|
|
@@ -479,11 +506,11 @@ def _create_metric_comparison_chart(
|
|
479
506
|
metric_keys: List[str],
|
480
507
|
metric_category: str,
|
481
508
|
title: str,
|
482
|
-
output_path: str
|
509
|
+
output_path: str,
|
483
510
|
):
|
484
511
|
"""
|
485
512
|
Create a chart comparing specific metrics across configurations.
|
486
|
-
|
513
|
+
|
487
514
|
Args:
|
488
515
|
results: List of configuration results
|
489
516
|
config_names: Names of configurations
|
@@ -493,21 +520,23 @@ def _create_metric_comparison_chart(
|
|
493
520
|
output_path: Path to save the chart
|
494
521
|
"""
|
495
522
|
# Create figure with multiple subplots (one per metric)
|
496
|
-
fig, axes = plt.subplots(
|
497
|
-
|
523
|
+
fig, axes = plt.subplots(
|
524
|
+
len(metric_keys), 1, figsize=(12, 5 * len(metric_keys))
|
525
|
+
)
|
526
|
+
|
498
527
|
# Handle case with only one metric
|
499
528
|
if len(metric_keys) == 1:
|
500
529
|
axes = [axes]
|
501
|
-
|
530
|
+
|
502
531
|
for i, metric_key in enumerate(metric_keys):
|
503
532
|
ax = axes[i]
|
504
|
-
|
533
|
+
|
505
534
|
# Get metric values
|
506
535
|
metric_values = []
|
507
536
|
for result in results:
|
508
537
|
metrics = result.get("avg_metrics", {}).get(metric_category, {})
|
509
538
|
value = metrics.get(metric_key)
|
510
|
-
|
539
|
+
|
511
540
|
# Handle time values for better visualization
|
512
541
|
if "duration" in metric_key and value is not None:
|
513
542
|
# Convert to seconds if > 60 seconds, minutes if > 60 minutes
|
@@ -519,22 +548,26 @@ def _create_metric_comparison_chart(
|
|
519
548
|
metric_key += " (minutes)"
|
520
549
|
else:
|
521
550
|
metric_key += " (seconds)"
|
522
|
-
|
551
|
+
|
523
552
|
metric_values.append(value if value is not None else 0)
|
524
|
-
|
553
|
+
|
525
554
|
# Create horizontal bar chart
|
526
|
-
bars = ax.barh(config_names, metric_values, color=
|
527
|
-
ax.set_xlabel(metric_key.replace(
|
555
|
+
bars = ax.barh(config_names, metric_values, color="lightblue")
|
556
|
+
ax.set_xlabel(metric_key.replace("_", " ").title())
|
528
557
|
ax.set_title(f"{metric_key.replace('_', ' ').title()}")
|
529
|
-
ax.grid(axis=
|
530
|
-
|
558
|
+
ax.grid(axis="x", linestyle="--", alpha=0.7)
|
559
|
+
|
531
560
|
# Add value labels to bars
|
532
561
|
for bar in bars:
|
533
562
|
width = bar.get_width()
|
534
563
|
label_x_pos = width * 1.01
|
535
|
-
ax.text(
|
536
|
-
|
537
|
-
|
564
|
+
ax.text(
|
565
|
+
label_x_pos,
|
566
|
+
bar.get_y() + bar.get_height() / 2,
|
567
|
+
f"{width:.2f}",
|
568
|
+
va="center",
|
569
|
+
)
|
570
|
+
|
538
571
|
plt.suptitle(title, fontsize=16)
|
539
572
|
plt.tight_layout()
|
540
573
|
plt.savefig(output_path)
|
@@ -542,13 +575,11 @@ def _create_metric_comparison_chart(
|
|
542
575
|
|
543
576
|
|
544
577
|
def _create_spider_chart(
|
545
|
-
results: List[Dict[str, Any]],
|
546
|
-
config_names: List[str],
|
547
|
-
output_path: str
|
578
|
+
results: List[Dict[str, Any]], config_names: List[str], output_path: str
|
548
579
|
):
|
549
580
|
"""
|
550
581
|
Create a spider chart comparing metrics across configurations.
|
551
|
-
|
582
|
+
|
552
583
|
Args:
|
553
584
|
results: List of configuration results
|
554
585
|
config_names: Names of configurations
|
@@ -560,81 +591,105 @@ def _create_spider_chart(
|
|
560
591
|
from matplotlib.projections import register_projection
|
561
592
|
from matplotlib.projections.polar import PolarAxes
|
562
593
|
from matplotlib.spines import Spine
|
563
|
-
|
564
|
-
def radar_factory(num_vars, frame=
|
594
|
+
|
595
|
+
def radar_factory(num_vars, frame="circle"):
|
565
596
|
"""Create a radar chart with `num_vars` axes."""
|
566
597
|
# Calculate evenly-spaced axis angles
|
567
|
-
theta = np.linspace(0, 2*np.pi, num_vars, endpoint=False)
|
568
|
-
|
598
|
+
theta = np.linspace(0, 2 * np.pi, num_vars, endpoint=False)
|
599
|
+
|
569
600
|
class RadarAxes(PolarAxes):
|
570
|
-
name =
|
571
|
-
|
601
|
+
name = "radar"
|
602
|
+
|
572
603
|
def __init__(self, *args, **kwargs):
|
573
604
|
super().__init__(*args, **kwargs)
|
574
|
-
self.set_theta_zero_location(
|
575
|
-
|
605
|
+
self.set_theta_zero_location("N")
|
606
|
+
|
576
607
|
def fill(self, *args, closed=True, **kwargs):
|
577
608
|
return super().fill(closed=closed, *args, **kwargs)
|
578
|
-
|
609
|
+
|
579
610
|
def plot(self, *args, **kwargs):
|
580
611
|
return super().plot(*args, **kwargs)
|
581
|
-
|
612
|
+
|
582
613
|
def set_varlabels(self, labels):
|
583
614
|
self.set_thetagrids(np.degrees(theta), labels)
|
584
|
-
|
615
|
+
|
585
616
|
def _gen_axes_patch(self):
|
586
|
-
if frame ==
|
617
|
+
if frame == "circle":
|
587
618
|
return Circle((0.5, 0.5), 0.5)
|
588
|
-
elif frame ==
|
589
|
-
return RegularPolygon(
|
619
|
+
elif frame == "polygon":
|
620
|
+
return RegularPolygon(
|
621
|
+
(0.5, 0.5), num_vars, radius=0.5, edgecolor="k"
|
622
|
+
)
|
590
623
|
else:
|
591
|
-
raise ValueError(
|
592
|
-
|
624
|
+
raise ValueError(
|
625
|
+
"Unknown value for 'frame': %s" % frame
|
626
|
+
)
|
627
|
+
|
593
628
|
def _gen_axes_spines(self):
|
594
|
-
if frame ==
|
629
|
+
if frame == "circle":
|
595
630
|
return super()._gen_axes_spines()
|
596
|
-
elif frame ==
|
631
|
+
elif frame == "polygon":
|
597
632
|
spine_type = Spine.circular_spine
|
598
633
|
verts = unit_poly_verts(num_vars)
|
599
634
|
vertices = [(0.5, 0.5)] + verts
|
600
|
-
codes =
|
635
|
+
codes = (
|
636
|
+
[Path.MOVETO]
|
637
|
+
+ [Path.LINETO] * num_vars
|
638
|
+
+ [Path.CLOSEPOLY]
|
639
|
+
)
|
601
640
|
path = Path(vertices, codes)
|
602
641
|
spine = Spine(self, spine_type, path)
|
603
642
|
spine.set_transform(self.transAxes)
|
604
|
-
return {
|
643
|
+
return {"polar": spine}
|
605
644
|
else:
|
606
|
-
raise ValueError(
|
607
|
-
|
645
|
+
raise ValueError(
|
646
|
+
"Unknown value for 'frame': %s" % frame
|
647
|
+
)
|
648
|
+
|
608
649
|
def unit_poly_verts(num_vars):
|
609
650
|
"""Return vertices of polygon for radar chart."""
|
610
651
|
verts = []
|
611
652
|
for i in range(num_vars):
|
612
653
|
angle = theta[i]
|
613
|
-
verts.append(
|
654
|
+
verts.append(
|
655
|
+
(0.5 * (1 + np.cos(angle)), 0.5 * (1 + np.sin(angle)))
|
656
|
+
)
|
614
657
|
return verts
|
615
|
-
|
658
|
+
|
616
659
|
register_projection(RadarAxes)
|
617
660
|
return theta
|
618
|
-
|
661
|
+
|
619
662
|
# Select metrics for the spider chart
|
620
663
|
metrics = [
|
621
664
|
{"name": "Quality", "key": "quality_metrics.overall_quality"},
|
622
665
|
{"name": "Speed", "key": "speed_metrics.overall_speed"},
|
623
|
-
{
|
624
|
-
|
625
|
-
|
666
|
+
{
|
667
|
+
"name": "Sources",
|
668
|
+
"key": "quality_metrics.normalized_source_count",
|
669
|
+
},
|
670
|
+
{
|
671
|
+
"name": "Content",
|
672
|
+
"key": "quality_metrics.normalized_knowledge_length",
|
673
|
+
},
|
674
|
+
{
|
675
|
+
"name": "Memory",
|
676
|
+
"key": "resource_metrics.normalized_memory_usage",
|
677
|
+
"invert": True,
|
678
|
+
},
|
626
679
|
]
|
627
|
-
|
680
|
+
|
628
681
|
# Extract metric values
|
629
682
|
spoke_labels = [m["name"] for m in metrics]
|
630
683
|
num_vars = len(spoke_labels)
|
631
684
|
theta = radar_factory(num_vars)
|
632
|
-
|
633
|
-
fig, ax = plt.subplots(
|
634
|
-
|
685
|
+
|
686
|
+
fig, ax = plt.subplots(
|
687
|
+
figsize=(10, 10), subplot_kw=dict(projection="radar")
|
688
|
+
)
|
689
|
+
|
635
690
|
# Color map for different configurations
|
636
691
|
colors = plt.cm.viridis(np.linspace(0, 1, len(results)))
|
637
|
-
|
692
|
+
|
638
693
|
for i, result in enumerate(results):
|
639
694
|
values = []
|
640
695
|
for metric in metrics:
|
@@ -643,45 +698,53 @@ def _create_spider_chart(
|
|
643
698
|
value = result.get("avg_metrics", {})
|
644
699
|
for part in key_parts:
|
645
700
|
value = value.get(part, 0) if isinstance(value, dict) else 0
|
646
|
-
|
701
|
+
|
647
702
|
# Invert if needed (for metrics where lower is better)
|
648
703
|
if metric.get("invert", False):
|
649
704
|
value = 1.0 - value
|
650
|
-
|
705
|
+
|
651
706
|
values.append(value)
|
652
|
-
|
707
|
+
|
653
708
|
# Plot this configuration
|
654
|
-
ax.plot(
|
709
|
+
ax.plot(
|
710
|
+
theta,
|
711
|
+
values,
|
712
|
+
color=colors[i],
|
713
|
+
linewidth=2,
|
714
|
+
label=config_names[i],
|
715
|
+
)
|
655
716
|
ax.fill(theta, values, color=colors[i], alpha=0.25)
|
656
|
-
|
717
|
+
|
657
718
|
# Set chart properties
|
658
719
|
ax.set_varlabels(spoke_labels)
|
659
|
-
plt.legend(loc=
|
660
|
-
plt.title(
|
720
|
+
plt.legend(loc="best", bbox_to_anchor=(0.5, 0.1))
|
721
|
+
plt.title("Multi-Dimensional Configuration Comparison", size=16, y=1.05)
|
661
722
|
plt.tight_layout()
|
662
|
-
|
723
|
+
|
663
724
|
# Save chart
|
664
725
|
plt.savefig(output_path)
|
665
726
|
plt.close()
|
666
|
-
|
727
|
+
|
667
728
|
except Exception as e:
|
668
729
|
logger.error(f"Error creating spider chart: {str(e)}")
|
669
730
|
# Create a text-based chart as fallback
|
670
731
|
plt.figure(figsize=(10, 6))
|
671
|
-
plt.text(
|
672
|
-
|
673
|
-
|
732
|
+
plt.text(
|
733
|
+
0.5,
|
734
|
+
0.5,
|
735
|
+
f"Spider chart could not be created: {str(e)}",
|
736
|
+
horizontalalignment="center",
|
737
|
+
verticalalignment="center",
|
738
|
+
)
|
739
|
+
plt.axis("off")
|
674
740
|
plt.savefig(output_path)
|
675
741
|
plt.close()
|
676
742
|
|
677
743
|
|
678
|
-
def _create_pareto_chart(
|
679
|
-
results: List[Dict[str, Any]],
|
680
|
-
output_path: str
|
681
|
-
):
|
744
|
+
def _create_pareto_chart(results: List[Dict[str, Any]], output_path: str):
|
682
745
|
"""
|
683
746
|
Create a Pareto frontier chart showing quality vs. speed tradeoff.
|
684
|
-
|
747
|
+
|
685
748
|
Args:
|
686
749
|
results: List of configuration results
|
687
750
|
output_path: Path to save the chart
|
@@ -690,30 +753,32 @@ def _create_pareto_chart(
|
|
690
753
|
quality_scores = []
|
691
754
|
speed_scores = []
|
692
755
|
names = []
|
693
|
-
|
756
|
+
|
694
757
|
for result in results:
|
695
758
|
metrics = result.get("avg_metrics", {})
|
696
759
|
quality = metrics.get("quality_metrics", {}).get("overall_quality", 0)
|
697
|
-
|
760
|
+
|
698
761
|
# For speed, we use inverse of duration (so higher is better)
|
699
762
|
duration = metrics.get("speed_metrics", {}).get("total_duration", 1)
|
700
763
|
speed = 1.0 / max(duration, 0.001) # Avoid division by zero
|
701
|
-
|
764
|
+
|
702
765
|
quality_scores.append(quality)
|
703
766
|
speed_scores.append(speed)
|
704
767
|
names.append(result.get("name", "Configuration"))
|
705
|
-
|
768
|
+
|
706
769
|
# Create scatter plot
|
707
770
|
plt.figure(figsize=(10, 8))
|
708
771
|
plt.scatter(quality_scores, speed_scores, s=100, alpha=0.7)
|
709
|
-
|
772
|
+
|
710
773
|
# Add labels for each point
|
711
774
|
for i, name in enumerate(names):
|
712
|
-
plt.annotate(
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
775
|
+
plt.annotate(
|
776
|
+
name,
|
777
|
+
(quality_scores[i], speed_scores[i]),
|
778
|
+
xytext=(5, 5),
|
779
|
+
textcoords="offset points",
|
780
|
+
)
|
781
|
+
|
717
782
|
# Identify Pareto frontier
|
718
783
|
pareto_points = []
|
719
784
|
for i, (q, s) in enumerate(zip(quality_scores, speed_scores)):
|
@@ -724,45 +789,57 @@ def _create_pareto_chart(
|
|
724
789
|
break
|
725
790
|
if is_pareto:
|
726
791
|
pareto_points.append(i)
|
727
|
-
|
792
|
+
|
728
793
|
# Highlight Pareto frontier
|
729
794
|
pareto_quality = [quality_scores[i] for i in pareto_points]
|
730
795
|
pareto_speed = [speed_scores[i] for i in pareto_points]
|
731
|
-
|
796
|
+
|
732
797
|
# Sort pareto points for line drawing
|
733
798
|
pareto_sorted = sorted(zip(pareto_quality, pareto_speed, pareto_points))
|
734
799
|
pareto_quality = [p[0] for p in pareto_sorted]
|
735
800
|
pareto_speed = [p[1] for p in pareto_sorted]
|
736
801
|
pareto_indices = [p[2] for p in pareto_sorted]
|
737
|
-
|
802
|
+
|
738
803
|
# Draw Pareto frontier line
|
739
|
-
plt.plot(pareto_quality, pareto_speed,
|
740
|
-
|
804
|
+
plt.plot(pareto_quality, pareto_speed, "r--", linewidth=2)
|
805
|
+
|
741
806
|
# Highlight Pareto optimal points
|
742
|
-
plt.scatter(
|
743
|
-
|
744
|
-
|
745
|
-
|
807
|
+
plt.scatter(
|
808
|
+
[quality_scores[i] for i in pareto_indices],
|
809
|
+
[speed_scores[i] for i in pareto_indices],
|
810
|
+
s=150,
|
811
|
+
facecolors="none",
|
812
|
+
edgecolors="r",
|
813
|
+
linewidth=2,
|
814
|
+
)
|
815
|
+
|
746
816
|
# Add labels for Pareto optimal configurations
|
747
817
|
for i in pareto_indices:
|
748
|
-
plt.annotate(
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
818
|
+
plt.annotate(
|
819
|
+
names[i],
|
820
|
+
(quality_scores[i], speed_scores[i]),
|
821
|
+
xytext=(8, 8),
|
822
|
+
textcoords="offset points",
|
823
|
+
bbox=dict(boxstyle="round,pad=0.5", fc="yellow", alpha=0.7),
|
824
|
+
)
|
825
|
+
|
754
826
|
# Set chart properties
|
755
|
-
plt.xlabel(
|
756
|
-
plt.ylabel(
|
757
|
-
plt.title(
|
758
|
-
plt.grid(True, linestyle=
|
759
|
-
|
827
|
+
plt.xlabel("Quality Score (higher is better)")
|
828
|
+
plt.ylabel("Speed Score (higher is better)")
|
829
|
+
plt.title("Quality vs. Speed Tradeoff (Pareto Frontier)", size=14)
|
830
|
+
plt.grid(True, linestyle="--", alpha=0.7)
|
831
|
+
|
760
832
|
# Add explanation
|
761
|
-
plt.figtext(
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
833
|
+
plt.figtext(
|
834
|
+
0.5,
|
835
|
+
0.01,
|
836
|
+
"Points on the red line are Pareto optimal configurations\n"
|
837
|
+
"(no other configuration is better in both quality and speed)",
|
838
|
+
ha="center",
|
839
|
+
fontsize=10,
|
840
|
+
bbox=dict(boxstyle="round", fc="white", alpha=0.7),
|
841
|
+
)
|
842
|
+
|
766
843
|
plt.tight_layout()
|
767
844
|
plt.savefig(output_path)
|
768
|
-
plt.close()
|
845
|
+
plt.close()
|