PyEvoMotion 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,316 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to analyze model selection accuracy from test5 regression results.
4
+
5
+ This script analyzes the out_regression_results.json files from both linear and powerlaw
6
+ test datasets to compute accuracy metrics and create visualizations.
7
+
8
+ Success criteria:
9
+ - Linear datasets: success when "selected" field is "linear"
10
+ - Powerlaw datasets: success when "selected" field is "power_law"
11
+ """
12
+
13
+ import json
14
+ import os
15
+ import glob
16
+ import pandas as pd
17
+ import matplotlib.pyplot as plt
18
+ import numpy as np
19
+ from pathlib import Path
20
+ from typing import Dict, List
21
+
22
+
23
+ def load_regression_results(directory: str) -> List[Dict]:
24
+ """Load all regression results from a directory."""
25
+ results = []
26
+ pattern = os.path.join(directory, "**", "*out_regression_results.json")
27
+
28
+ for file_path in glob.glob(pattern, recursive=True):
29
+ try:
30
+ with open(file_path, 'r') as f:
31
+ data = json.load(f)
32
+ # Extract the model selection info
33
+ model_selection = data.get("scaled var number of substitutions model", {}).get("model_selection", {})
34
+ results.append({
35
+ 'file': file_path,
36
+ 'selected_model': model_selection.get("selected", "unknown"),
37
+ 'linear_AIC': model_selection.get("linear_AIC", None),
38
+ 'power_law_AIC': model_selection.get("power_law_AIC", None),
39
+ 'delta_AIC_linear': model_selection.get("delta_AIC_linear", None),
40
+ 'delta_AIC_power_law': model_selection.get("delta_AIC_power_law", None),
41
+ 'akaike_weight_linear': model_selection.get("akaike_weight_linear", None),
42
+ 'akaike_weight_power_law': model_selection.get("akaike_weight_power_law", None)
43
+ })
44
+ except Exception as e:
45
+ print(f"Error loading {file_path}: {e}")
46
+
47
+ return results
48
+
49
+
50
+ def analyze_model_selection_accuracy():
51
+ """Analyze model selection accuracy and create visualizations."""
52
+
53
+ # Define paths
54
+ base_path = Path(__file__).parent.parent / "tests" / "data" / "test5"
55
+ linear_dir = base_path / "linear" / "output"
56
+ powerlaw_dir = base_path / "powerlaw" / "output"
57
+
58
+ print("Loading regression results...")
59
+
60
+ # Load results from both directories
61
+ linear_results = load_regression_results(str(linear_dir))
62
+ powerlaw_results = load_regression_results(str(powerlaw_dir))
63
+
64
+ print(f"Loaded {len(linear_results)} linear results")
65
+ print(f"Loaded {len(powerlaw_results)} powerlaw results")
66
+
67
+ # Analyze linear dataset results
68
+ linear_success = sum(1 for r in linear_results if r['selected_model'] == 'linear')
69
+ linear_failure = len(linear_results) - linear_success
70
+
71
+ # Analyze powerlaw dataset results
72
+ powerlaw_success = sum(1 for r in powerlaw_results if r['selected_model'] == 'power_law')
73
+ powerlaw_failure = len(powerlaw_results) - powerlaw_success
74
+
75
+ # Create summary table
76
+ summary_data = {
77
+ 'Dataset Type': ['Linear', 'Powerlaw'],
78
+ 'Total Tests': [len(linear_results), len(powerlaw_results)],
79
+ 'Successes': [linear_success, powerlaw_success],
80
+ 'Failures': [linear_failure, powerlaw_failure],
81
+ 'Success Rate': [linear_success/len(linear_results) if linear_results else 0,
82
+ powerlaw_success/len(powerlaw_results) if powerlaw_results else 0]
83
+ }
84
+
85
+ df = pd.DataFrame(summary_data)
86
+ print("\nModel Selection Accuracy Summary:")
87
+ print("=" * 50)
88
+ print(df.to_string(index=False, float_format='%.3f'))
89
+
90
+ # Calculate overall accuracy metrics
91
+ total_tests = len(linear_results) + len(powerlaw_results)
92
+ total_successes = linear_success + powerlaw_success
93
+ overall_accuracy = total_successes / total_tests if total_tests > 0 else 0
94
+
95
+ # Calculate precision and recall for each model type
96
+ # For linear: TP = linear_success, FP = powerlaw_failure, FN = linear_failure, TN = powerlaw_success
97
+ linear_tp = linear_success
98
+ linear_fp = powerlaw_failure # Powerlaw datasets incorrectly classified as linear
99
+ linear_fn = linear_failure # Linear datasets incorrectly classified as powerlaw
100
+ linear_tn = powerlaw_success # Powerlaw datasets correctly classified as powerlaw
101
+
102
+ # For powerlaw: TP = powerlaw_success, FP = linear_failure, FN = powerlaw_failure, TN = linear_success
103
+ powerlaw_tp = powerlaw_success
104
+ powerlaw_fp = linear_failure # Linear datasets incorrectly classified as powerlaw
105
+ powerlaw_fn = powerlaw_failure # Powerlaw datasets incorrectly classified as linear
106
+ powerlaw_tn = linear_success # Linear datasets correctly classified as linear
107
+
108
+ # Calculate metrics
109
+ linear_precision = linear_tp / (linear_tp + linear_fp) if (linear_tp + linear_fp) > 0 else 0
110
+ linear_recall = linear_tp / (linear_tp + linear_fn) if (linear_tp + linear_fn) > 0 else 0
111
+ linear_specificity = linear_tn / (linear_tn + linear_fp) if (linear_tn + linear_fp) > 0 else 0
112
+
113
+ powerlaw_precision = powerlaw_tp / (powerlaw_tp + powerlaw_fp) if (powerlaw_tp + powerlaw_fp) > 0 else 0
114
+ powerlaw_recall = powerlaw_tp / (powerlaw_tp + powerlaw_fn) if (powerlaw_tp + powerlaw_fn) > 0 else 0
115
+ powerlaw_specificity = powerlaw_tn / (powerlaw_tn + powerlaw_fp) if (powerlaw_tn + powerlaw_fp) > 0 else 0
116
+
117
+ # F1 scores
118
+ linear_f1 = 2 * (linear_precision * linear_recall) / (linear_precision + linear_recall) if (linear_precision + linear_recall) > 0 else 0
119
+ powerlaw_f1 = 2 * (powerlaw_precision * powerlaw_recall) / (powerlaw_precision + powerlaw_recall) if (powerlaw_precision + powerlaw_recall) > 0 else 0
120
+
121
+ print(f"\nOverall Accuracy: {overall_accuracy:.3f} ({total_successes}/{total_tests})")
122
+ print("\nDetailed Metrics:")
123
+ print("=" * 50)
124
+
125
+ metrics_data = {
126
+ 'Model Type': ['Linear', 'Powerlaw'],
127
+ 'Precision': [linear_precision, powerlaw_precision],
128
+ 'Recall (Sensitivity)': [linear_recall, powerlaw_recall],
129
+ 'Specificity': [linear_specificity, powerlaw_specificity],
130
+ 'F1-Score': [linear_f1, powerlaw_f1]
131
+ }
132
+
133
+ metrics_df = pd.DataFrame(metrics_data)
134
+ print(metrics_df.to_string(index=False, float_format='%.3f'))
135
+
136
+ # Create confusion matrix data
137
+ confusion_matrix = np.array([
138
+ [linear_tp, linear_fp], # True Linear, False Linear
139
+ [linear_fn, linear_tn] # False Powerlaw, True Powerlaw
140
+ ])
141
+
142
+ print(f"\nConfusion Matrix:")
143
+ print("=" * 30)
144
+ print(" Predicted")
145
+ print(" Linear Powerlaw")
146
+ print(f"Actual Linear {linear_tp:3d} {linear_fp:3d}")
147
+ print(f" Powerlaw {linear_fn:3d} {linear_tn:3d}")
148
+
149
+ # Create visualizations
150
+ create_bar_chart(summary_data, overall_accuracy)
151
+ create_confusion_matrix_heatmap(confusion_matrix)
152
+ create_metrics_comparison(metrics_data)
153
+
154
+ # Save detailed results
155
+ save_detailed_results(linear_results, powerlaw_results, summary_data, metrics_data, overall_accuracy)
156
+
157
+ return df, metrics_df, overall_accuracy
158
+
159
+
160
+ def create_bar_chart(summary_data: Dict, overall_accuracy: float):
161
+ """Create a bar chart showing success rates."""
162
+
163
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
164
+
165
+ # Bar chart for success/failure counts
166
+ x = np.arange(len(summary_data['Dataset Type']))
167
+ width = 0.35
168
+
169
+ bars1 = ax1.bar(x - width/2, summary_data['Successes'], width, label='Successes', color='green', alpha=0.7)
170
+ bars2 = ax1.bar(x + width/2, summary_data['Failures'], width, label='Failures', color='red', alpha=0.7)
171
+
172
+ ax1.set_xlabel('Dataset Type')
173
+ ax1.set_ylabel('Number of Tests')
174
+ ax1.set_title('Model Selection Results by Dataset Type')
175
+ ax1.set_xticks(x)
176
+ ax1.set_xticklabels(summary_data['Dataset Type'])
177
+ ax1.legend()
178
+ ax1.grid(True, alpha=0.3)
179
+
180
+ # Add value labels on bars
181
+ for bar in bars1:
182
+ height = bar.get_height()
183
+ ax1.text(bar.get_x() + bar.get_width()/2., height + 0.1,
184
+ f'{int(height)}', ha='center', va='bottom')
185
+
186
+ for bar in bars2:
187
+ height = bar.get_height()
188
+ ax1.text(bar.get_x() + bar.get_width()/2., height + 0.1,
189
+ f'{int(height)}', ha='center', va='bottom')
190
+
191
+ # Success rate bar chart
192
+ bars3 = ax2.bar(summary_data['Dataset Type'], summary_data['Success Rate'],
193
+ color=['blue', 'orange'], alpha=0.7)
194
+
195
+ # Add overall accuracy line
196
+ ax2.axhline(y=overall_accuracy, color='red', linestyle='--', linewidth=2,
197
+ label=f'Overall Accuracy: {overall_accuracy:.3f}')
198
+
199
+ ax2.set_xlabel('Dataset Type')
200
+ ax2.set_ylabel('Success Rate')
201
+ ax2.set_title('Model Selection Success Rates')
202
+ ax2.set_ylim(0, 1)
203
+ ax2.legend()
204
+ ax2.grid(True, alpha=0.3)
205
+
206
+ # Add value labels on bars
207
+ for bar in bars3:
208
+ height = bar.get_height()
209
+ ax2.text(bar.get_x() + bar.get_width()/2., height + 0.01,
210
+ f'{height:.3f}', ha='center', va='bottom')
211
+
212
+ plt.tight_layout()
213
+ plt.savefig('model_selection_accuracy_chart.pdf', dpi=300, bbox_inches='tight')
214
+
215
+
216
+ def create_confusion_matrix_heatmap(confusion_matrix: np.ndarray):
217
+ """Create a heatmap of the confusion matrix."""
218
+
219
+ fig, ax = plt.subplots(figsize=(8, 6))
220
+
221
+ im = ax.imshow(confusion_matrix, interpolation='nearest', cmap='Blues')
222
+ ax.figure.colorbar(im, ax=ax)
223
+
224
+ # Set ticks and labels
225
+ ax.set_xticks([0, 1])
226
+ ax.set_yticks([0, 1])
227
+ ax.set_xticklabels(['Linear', 'Powerlaw'])
228
+ ax.set_yticklabels(['Linear', 'Powerlaw'])
229
+
230
+ # Add text annotations
231
+ thresh = confusion_matrix.max() / 2.
232
+ for i in range(confusion_matrix.shape[0]):
233
+ for j in range(confusion_matrix.shape[1]):
234
+ ax.text(j, i, format(confusion_matrix[i, j], 'd'),
235
+ ha="center", va="center",
236
+ color="white" if confusion_matrix[i, j] > thresh else "black")
237
+
238
+ ax.set_xlabel('Predicted Label')
239
+ ax.set_ylabel('True Label')
240
+ ax.set_title('Confusion Matrix: Model Selection Results')
241
+
242
+ plt.tight_layout()
243
+ plt.savefig('share/confusion_matrix_heatmap.pdf', dpi=300, bbox_inches='tight')
244
+
245
+
246
+ def create_metrics_comparison(metrics_data: Dict):
247
+ """Create a comparison chart of different metrics."""
248
+
249
+ fig, ax = plt.subplots(figsize=(12, 8))
250
+
251
+ x = np.arange(len(metrics_data['Model Type']))
252
+ width = 0.2
253
+
254
+ metrics = ['Precision', 'Recall (Sensitivity)', 'Specificity', 'F1-Score']
255
+ colors = ['blue', 'green', 'orange', 'red']
256
+
257
+ for i, (metric, color) in enumerate(zip(metrics, colors)):
258
+ values = metrics_data[metric]
259
+ ax.bar(x + i * width, values, width, label=metric, color=color, alpha=0.7)
260
+
261
+ ax.set_xlabel('Model Type')
262
+ ax.set_ylabel('Score')
263
+ ax.set_title('Model Selection Performance Metrics Comparison')
264
+ ax.set_xticks(x + width * 1.5)
265
+ ax.set_xticklabels(metrics_data['Model Type'])
266
+ ax.legend()
267
+ ax.set_ylim(0, 1)
268
+ ax.grid(True, alpha=0.3)
269
+
270
+ # Add value labels on bars
271
+ for i, metric in enumerate(metrics):
272
+ values = metrics_data[metric]
273
+ for j, value in enumerate(values):
274
+ ax.text(j + i * width, value + 0.01, f'{value:.3f}',
275
+ ha='center', va='bottom', fontsize=9)
276
+
277
+ plt.tight_layout()
278
+ plt.savefig('share/metrics_comparison_chart.pdf', dpi=300, bbox_inches='tight')
279
+
280
+
281
+ def save_detailed_results(linear_results: List[Dict], powerlaw_results: List[Dict],
282
+ summary_data: Dict, metrics_data: Dict, overall_accuracy: float):
283
+ """Save detailed results to JSON file."""
284
+
285
+ results = {
286
+ 'overall_accuracy': overall_accuracy,
287
+ 'summary': summary_data,
288
+ 'metrics': metrics_data,
289
+ 'linear_results': linear_results,
290
+ 'powerlaw_results': powerlaw_results,
291
+ 'analysis_timestamp': pd.Timestamp.now().isoformat()
292
+ }
293
+
294
+ with open('model_selection_analysis_results.json', 'w') as f:
295
+ json.dump(results, f, indent=2, default=str)
296
+
297
+ print(f"Detailed results saved as 'model_selection_analysis_results.json'")
298
+
299
+
300
+ if __name__ == "__main__":
301
+ print("Model Selection Accuracy Analysis")
302
+ print("=" * 40)
303
+ print("Analyzing regression results from test5 datasets...")
304
+ print("Success criteria:")
305
+ print("- Linear datasets: success when 'selected' = 'linear'")
306
+ print("- Powerlaw datasets: success when 'selected' = 'power_law'")
307
+ print()
308
+
309
+ try:
310
+ summary_df, metrics_df, accuracy = analyze_model_selection_accuracy()
311
+ print(f"\nAnalysis complete! Overall accuracy: {accuracy:.3f}")
312
+
313
+ except Exception as e:
314
+ print(f"Error during analysis: {e}")
315
+ import traceback
316
+ traceback.print_exc()