PyEvoMotion 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyEvoMotion/cli.py +87 -3
- PyEvoMotion/core/base.py +296 -20
- PyEvoMotion/core/core.py +73 -24
- {pyevomotion-0.1.1.dist-info → pyevomotion-0.1.2.dist-info}/METADATA +1 -1
- pyevomotion-0.1.2.dist-info/RECORD +35 -0
- share/analyze_model_selection_accuracy.py +316 -0
- share/analyze_test_runs.py +436 -0
- share/anomalous_diffusion.pdf +0 -0
- share/confusion_matrix_heatmap.pdf +0 -0
- share/figUK_plots.pdf +0 -0
- share/figUK_regression_results.json +54 -7
- share/figUK_run_args.json +1 -0
- share/figUK_stats.tsv +41 -41
- share/figUSA_plots.pdf +0 -0
- share/figUSA_regression_results.json +54 -7
- share/figUSA_run_args.json +1 -0
- share/figUSA_stats.tsv +34 -34
- share/generate_sequences_from_test5_data.py +107 -0
- share/manuscript_figure.py +450 -80
- share/run_parallel_analysis.py +196 -0
- share/synth_figure.pdf +0 -0
- share/uk_time_windows.pdf +0 -0
- share/weekly_size.pdf +0 -0
- pyevomotion-0.1.1.dist-info/RECORD +0 -31
- share/figure.pdf +0 -0
- {pyevomotion-0.1.1.dist-info → pyevomotion-0.1.2.dist-info}/WHEEL +0 -0
- {pyevomotion-0.1.1.dist-info → pyevomotion-0.1.2.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Analyze parameter variability across multiple PyEvoMotion runs.
|
|
4
|
+
|
|
5
|
+
This script loads regression results from multiple runs and creates
|
|
6
|
+
violin plots to visualize parameter distributions and assess
|
|
7
|
+
reproducibility of the nonlinear fitting process.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Dict, List
|
|
14
|
+
|
|
15
|
+
import matplotlib.pyplot as plt
|
|
16
|
+
import matplotlib as mpl
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def set_matplotlib_params():
|
|
22
|
+
"""Set consistent matplotlib styling."""
|
|
23
|
+
mpl_params = {
|
|
24
|
+
"font.sans-serif": "Helvetica",
|
|
25
|
+
"axes.linewidth": 2,
|
|
26
|
+
"axes.labelsize": 14,
|
|
27
|
+
"axes.spines.top": False,
|
|
28
|
+
"axes.spines.right": False,
|
|
29
|
+
"font.size": 12,
|
|
30
|
+
"xtick.major.width": 2,
|
|
31
|
+
"ytick.major.width": 2,
|
|
32
|
+
"xtick.major.size": 6,
|
|
33
|
+
"ytick.major.size": 6,
|
|
34
|
+
"legend.frameon": False,
|
|
35
|
+
}
|
|
36
|
+
for k, v in mpl_params.items():
|
|
37
|
+
mpl.rcParams[k] = v
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def load_regression_results(base_dir: Path, country: str, num_runs: int = 5) -> List[Dict]:
|
|
41
|
+
"""
|
|
42
|
+
Load regression results from multiple runs.
|
|
43
|
+
|
|
44
|
+
:param base_dir: Base directory containing run subdirectories
|
|
45
|
+
:type base_dir: Path
|
|
46
|
+
:param country: Either "UK" or "USA"
|
|
47
|
+
:type country: str
|
|
48
|
+
:param num_runs: Number of runs to load (default 5)
|
|
49
|
+
:type num_runs: int
|
|
50
|
+
:return: List of dictionaries containing regression results
|
|
51
|
+
:rtype: List[Dict]
|
|
52
|
+
"""
|
|
53
|
+
results = []
|
|
54
|
+
|
|
55
|
+
for run_num in range(1, num_runs + 1):
|
|
56
|
+
run_dir = base_dir / f"{country}_run{run_num}"
|
|
57
|
+
results_file = run_dir / f"fig{country}_regression_results.json"
|
|
58
|
+
|
|
59
|
+
if results_file.exists():
|
|
60
|
+
with open(results_file, 'r') as f:
|
|
61
|
+
data = json.load(f)
|
|
62
|
+
results.append({
|
|
63
|
+
'run': run_num,
|
|
64
|
+
'country': country,
|
|
65
|
+
'data': data
|
|
66
|
+
})
|
|
67
|
+
else:
|
|
68
|
+
print(f"Warning: {results_file} not found")
|
|
69
|
+
|
|
70
|
+
return results
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def extract_parameters(results: List[Dict]) -> pd.DataFrame:
|
|
74
|
+
"""
|
|
75
|
+
Extract parameters from regression results into a DataFrame.
|
|
76
|
+
|
|
77
|
+
:param results: List of regression result dictionaries
|
|
78
|
+
:type results: List[Dict]
|
|
79
|
+
:return: DataFrame with parameters from all runs
|
|
80
|
+
:rtype: pd.DataFrame
|
|
81
|
+
"""
|
|
82
|
+
records = []
|
|
83
|
+
|
|
84
|
+
for result in results:
|
|
85
|
+
run = result['run']
|
|
86
|
+
country = result['country']
|
|
87
|
+
data = result['data']
|
|
88
|
+
|
|
89
|
+
record = {
|
|
90
|
+
'run': run,
|
|
91
|
+
'country': country
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
# Extract mean model parameters
|
|
95
|
+
mean_key = None
|
|
96
|
+
for key in ["mean number of mutations model",
|
|
97
|
+
"mean number of mutations per 7D model",
|
|
98
|
+
"mean number of substitutions model"]:
|
|
99
|
+
if key in data:
|
|
100
|
+
mean_key = key
|
|
101
|
+
break
|
|
102
|
+
|
|
103
|
+
if mean_key:
|
|
104
|
+
mean_model = data[mean_key]
|
|
105
|
+
record['mean_m'] = mean_model['parameters']['m']
|
|
106
|
+
record['mean_b'] = mean_model['parameters']['b']
|
|
107
|
+
record['mean_r2'] = mean_model['r2']
|
|
108
|
+
|
|
109
|
+
# Extract variance model parameters
|
|
110
|
+
var_key = None
|
|
111
|
+
for key in ["scaled var number of mutations model",
|
|
112
|
+
"scaled var number of mutations per 7D model",
|
|
113
|
+
"scaled var number of substitutions model"]:
|
|
114
|
+
if key in data:
|
|
115
|
+
var_key = key
|
|
116
|
+
break
|
|
117
|
+
|
|
118
|
+
if var_key:
|
|
119
|
+
var_model = data[var_key]
|
|
120
|
+
|
|
121
|
+
# Check if model selection was performed
|
|
122
|
+
if "model_selection" in var_model:
|
|
123
|
+
selected = var_model["model_selection"]["selected"]
|
|
124
|
+
record['var_model_selected'] = selected
|
|
125
|
+
|
|
126
|
+
if selected == "linear" and "linear_model" in var_model:
|
|
127
|
+
linear = var_model["linear_model"]
|
|
128
|
+
record['var_m'] = linear['parameters']['m']
|
|
129
|
+
record['var_r2'] = linear['r2']
|
|
130
|
+
record['var_d'] = None
|
|
131
|
+
record['var_alpha'] = None
|
|
132
|
+
|
|
133
|
+
elif selected == "power_law" and "power_law_model" in var_model:
|
|
134
|
+
power_law = var_model["power_law_model"]
|
|
135
|
+
record['var_d'] = power_law['parameters']['d']
|
|
136
|
+
record['var_alpha'] = power_law['parameters']['alpha']
|
|
137
|
+
record['var_r2'] = power_law['r2']
|
|
138
|
+
record['var_m'] = None
|
|
139
|
+
else:
|
|
140
|
+
# Old format without model selection
|
|
141
|
+
params = var_model['parameters']
|
|
142
|
+
record['var_r2'] = var_model['r2']
|
|
143
|
+
|
|
144
|
+
if 'm' in params:
|
|
145
|
+
record['var_m'] = params['m']
|
|
146
|
+
record['var_d'] = None
|
|
147
|
+
record['var_alpha'] = None
|
|
148
|
+
record['var_model_selected'] = 'linear'
|
|
149
|
+
elif 'd' in params and 'alpha' in params:
|
|
150
|
+
record['var_d'] = params['d']
|
|
151
|
+
record['var_alpha'] = params['alpha']
|
|
152
|
+
record['var_m'] = None
|
|
153
|
+
record['var_model_selected'] = 'power_law'
|
|
154
|
+
|
|
155
|
+
records.append(record)
|
|
156
|
+
|
|
157
|
+
return pd.DataFrame(records)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def create_violin_plots(df: pd.DataFrame, export: bool = False, show: bool = True, output_filename: str = "share/test_runs_violin_plot.pdf"):
|
|
161
|
+
"""
|
|
162
|
+
Create violin plots for parameter distributions.
|
|
163
|
+
|
|
164
|
+
:param df: DataFrame with extracted parameters
|
|
165
|
+
:type df: pd.DataFrame
|
|
166
|
+
:param export: Whether to save the figure (default False)
|
|
167
|
+
:type export: bool
|
|
168
|
+
:param show: Whether to display the figure (default True)
|
|
169
|
+
:type show: bool
|
|
170
|
+
:param output_filename: Path to save the figure
|
|
171
|
+
:type output_filename: str
|
|
172
|
+
"""
|
|
173
|
+
set_matplotlib_params()
|
|
174
|
+
|
|
175
|
+
# Define colors
|
|
176
|
+
colors = {
|
|
177
|
+
"UK": "#76d6ff",
|
|
178
|
+
"USA": "#FF6346",
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
# Parameters to plot
|
|
182
|
+
mean_params = [
|
|
183
|
+
('mean_m', 'Mean: Slope (m)', 'mutations/week'),
|
|
184
|
+
('mean_b', 'Mean: Intercept (b)', 'mutations'),
|
|
185
|
+
('mean_r2', 'Mean: R²', '')
|
|
186
|
+
]
|
|
187
|
+
|
|
188
|
+
# Check which variance model is predominantly used
|
|
189
|
+
var_model_counts = df['var_model_selected'].value_counts()
|
|
190
|
+
print("\nVariance model selection:")
|
|
191
|
+
print(var_model_counts)
|
|
192
|
+
|
|
193
|
+
# Determine which variance parameters to plot
|
|
194
|
+
if var_model_counts.get('power_law', 0) > 0:
|
|
195
|
+
var_params = [
|
|
196
|
+
('var_d', 'Variance: Coefficient (d)', ''),
|
|
197
|
+
('var_alpha', 'Variance: Exponent (α)', ''),
|
|
198
|
+
('var_r2', 'Variance: R²', '')
|
|
199
|
+
]
|
|
200
|
+
else:
|
|
201
|
+
var_params = [
|
|
202
|
+
('var_m', 'Variance: Slope (m)', 'mutations²/week'),
|
|
203
|
+
('var_r2', 'Variance: R²', '')
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
all_params = mean_params + var_params
|
|
207
|
+
|
|
208
|
+
# Create subplots
|
|
209
|
+
n_params = len(all_params)
|
|
210
|
+
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
|
|
211
|
+
axes = axes.flatten()
|
|
212
|
+
|
|
213
|
+
for idx, (param, title, unit) in enumerate(all_params):
|
|
214
|
+
if idx >= len(axes):
|
|
215
|
+
break
|
|
216
|
+
|
|
217
|
+
ax = axes[idx]
|
|
218
|
+
|
|
219
|
+
# Filter out None values for this parameter
|
|
220
|
+
plot_df = df[df[param].notna()].copy()
|
|
221
|
+
|
|
222
|
+
if len(plot_df) == 0:
|
|
223
|
+
ax.text(0.5, 0.5, 'No data', ha='center', va='center', transform=ax.transAxes)
|
|
224
|
+
ax.set_title(title)
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
# Create violin plot
|
|
228
|
+
parts = ax.violinplot(
|
|
229
|
+
[plot_df[plot_df['country'] == 'UK'][param].values,
|
|
230
|
+
plot_df[plot_df['country'] == 'USA'][param].values],
|
|
231
|
+
positions=[0, 1],
|
|
232
|
+
showmeans=True,
|
|
233
|
+
showextrema=True,
|
|
234
|
+
widths=0.7
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Color the violins
|
|
238
|
+
for i, pc in enumerate(parts['bodies']):
|
|
239
|
+
country = ['UK', 'USA'][i]
|
|
240
|
+
pc.set_facecolor(colors[country])
|
|
241
|
+
pc.set_alpha(0.7)
|
|
242
|
+
pc.set_edgecolor('black')
|
|
243
|
+
pc.set_linewidth(1.5)
|
|
244
|
+
|
|
245
|
+
# Style the other elements
|
|
246
|
+
for partname in ['cmeans', 'cmaxes', 'cmins', 'cbars']:
|
|
247
|
+
if partname in parts:
|
|
248
|
+
parts[partname].set_edgecolor('black')
|
|
249
|
+
parts[partname].set_linewidth(2)
|
|
250
|
+
|
|
251
|
+
# Add scatter points for individual runs
|
|
252
|
+
for i, country in enumerate(['UK', 'USA']):
|
|
253
|
+
country_data = plot_df[plot_df['country'] == country]
|
|
254
|
+
x_pos = np.random.normal(i, 0.04, size=len(country_data))
|
|
255
|
+
ax.scatter(x_pos, country_data[param].values,
|
|
256
|
+
alpha=0.6, s=50, c='black', zorder=3, edgecolors='white', linewidth=1)
|
|
257
|
+
|
|
258
|
+
# Styling
|
|
259
|
+
ax.set_xticks([0, 1])
|
|
260
|
+
ax.set_xticklabels(['UK', 'USA'])
|
|
261
|
+
ax.set_ylabel(f'{title.split(": ")[1]} {f"({unit})" if unit else ""}'.strip())
|
|
262
|
+
ax.set_title(title, fontweight='bold')
|
|
263
|
+
ax.grid(axis='y', alpha=0.3, linestyle='--')
|
|
264
|
+
|
|
265
|
+
# Add statistics text
|
|
266
|
+
for i, country in enumerate(['UK', 'USA']):
|
|
267
|
+
country_data = plot_df[plot_df['country'] == country][param]
|
|
268
|
+
if len(country_data) > 0:
|
|
269
|
+
mean_val = country_data.mean()
|
|
270
|
+
std_val = country_data.std()
|
|
271
|
+
cv = (std_val / mean_val * 100) if mean_val != 0 else 0
|
|
272
|
+
|
|
273
|
+
text_y = ax.get_ylim()[1] * 0.95 - i * (ax.get_ylim()[1] - ax.get_ylim()[0]) * 0.08
|
|
274
|
+
ax.text(0.98, text_y,
|
|
275
|
+
f'{country}: μ={mean_val:.4f}, σ={std_val:.4f}, CV={cv:.2f}%',
|
|
276
|
+
transform=ax.transData, ha='right', va='top',
|
|
277
|
+
fontsize=9, bbox=dict(boxstyle='round', facecolor=colors[country], alpha=0.3))
|
|
278
|
+
|
|
279
|
+
# Hide unused subplots
|
|
280
|
+
for idx in range(len(all_params), len(axes)):
|
|
281
|
+
axes[idx].set_visible(False)
|
|
282
|
+
|
|
283
|
+
fig.suptitle('Parameter Variability Across Multiple Runs\n(Assessing Nonlinear Fitting Reproducibility)',
|
|
284
|
+
fontsize=16, fontweight='bold', y=0.995)
|
|
285
|
+
plt.tight_layout()
|
|
286
|
+
|
|
287
|
+
if export:
|
|
288
|
+
fig.savefig(output_filename, dpi=400, bbox_inches='tight')
|
|
289
|
+
print(f"\nViolin plot saved as {output_filename}")
|
|
290
|
+
|
|
291
|
+
if show:
|
|
292
|
+
plt.show()
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def print_summary_statistics(df: pd.DataFrame):
|
|
296
|
+
"""
|
|
297
|
+
Print summary statistics for all parameters.
|
|
298
|
+
|
|
299
|
+
Displays mean, standard deviation, and coefficient of variation (CV%)
|
|
300
|
+
for each parameter grouped by country.
|
|
301
|
+
|
|
302
|
+
:param df: DataFrame with extracted parameters
|
|
303
|
+
:type df: pd.DataFrame
|
|
304
|
+
"""
|
|
305
|
+
print("\n" + "="*80)
|
|
306
|
+
print("PARAMETER VARIABILITY SUMMARY")
|
|
307
|
+
print("="*80)
|
|
308
|
+
|
|
309
|
+
for country in ['UK', 'USA']:
|
|
310
|
+
print(f"\n{country} Dataset:")
|
|
311
|
+
print("-" * 40)
|
|
312
|
+
|
|
313
|
+
country_df = df[df['country'] == country]
|
|
314
|
+
|
|
315
|
+
# Mean model parameters
|
|
316
|
+
print("\nMean Model:")
|
|
317
|
+
for param in ['mean_m', 'mean_b', 'mean_r2']:
|
|
318
|
+
if param in country_df.columns:
|
|
319
|
+
values = country_df[param].dropna()
|
|
320
|
+
if len(values) > 0:
|
|
321
|
+
mean = values.mean()
|
|
322
|
+
std = values.std()
|
|
323
|
+
cv = (std / mean * 100) if mean != 0 else 0
|
|
324
|
+
print(f" {param:12s}: μ={mean:10.6f}, σ={std:10.6f}, CV={cv:6.2f}%")
|
|
325
|
+
|
|
326
|
+
# Variance model parameters
|
|
327
|
+
print("\nVariance Model:")
|
|
328
|
+
var_model = country_df['var_model_selected'].mode()[0] if 'var_model_selected' in country_df.columns else 'unknown'
|
|
329
|
+
print(f" Selected model: {var_model}")
|
|
330
|
+
|
|
331
|
+
if var_model == 'power_law':
|
|
332
|
+
for param in ['var_d', 'var_alpha', 'var_r2']:
|
|
333
|
+
if param in country_df.columns:
|
|
334
|
+
values = country_df[param].dropna()
|
|
335
|
+
if len(values) > 0:
|
|
336
|
+
mean = values.mean()
|
|
337
|
+
std = values.std()
|
|
338
|
+
cv = (std / mean * 100) if mean != 0 else 0
|
|
339
|
+
print(f" {param:12s}: μ={mean:10.6f}, σ={std:10.6f}, CV={cv:6.2f}%")
|
|
340
|
+
else:
|
|
341
|
+
for param in ['var_m', 'var_r2']:
|
|
342
|
+
if param in country_df.columns:
|
|
343
|
+
values = country_df[param].dropna()
|
|
344
|
+
if len(values) > 0:
|
|
345
|
+
mean = values.mean()
|
|
346
|
+
std = values.std()
|
|
347
|
+
cv = (std / mean * 100) if mean != 0 else 0
|
|
348
|
+
print(f" {param:12s}: μ={mean:10.6f}, σ={std:10.6f}, CV={cv:6.2f}%")
|
|
349
|
+
|
|
350
|
+
print("\n" + "="*80)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def main():
|
|
354
|
+
"""
|
|
355
|
+
Main execution function for analyzing test run parameter variability.
|
|
356
|
+
|
|
357
|
+
Loads regression results from batch directories, extracts parameters,
|
|
358
|
+
computes statistics, and generates violin plots to visualize parameter
|
|
359
|
+
distributions across multiple runs.
|
|
360
|
+
"""
|
|
361
|
+
|
|
362
|
+
import sys
|
|
363
|
+
|
|
364
|
+
# Parse command line arguments
|
|
365
|
+
if len(sys.argv) > 1:
|
|
366
|
+
batch_name = sys.argv[1]
|
|
367
|
+
BASE_DIR = Path(f"share/test-runs/{batch_name}")
|
|
368
|
+
output_suffix = f"_{batch_name}"
|
|
369
|
+
else:
|
|
370
|
+
# Try to auto-detect batch directories or use batch1 as default
|
|
371
|
+
test_runs_dir = Path("share/test-runs")
|
|
372
|
+
batch_dirs = [d for d in test_runs_dir.iterdir() if d.is_dir() and d.name.startswith("batch")]
|
|
373
|
+
|
|
374
|
+
if len(batch_dirs) == 0:
|
|
375
|
+
# Fall back to old structure (no batch subdirectories)
|
|
376
|
+
BASE_DIR = Path("share/test-runs")
|
|
377
|
+
output_suffix = ""
|
|
378
|
+
elif len(batch_dirs) == 1:
|
|
379
|
+
# Use the only batch found
|
|
380
|
+
BASE_DIR = batch_dirs[0]
|
|
381
|
+
output_suffix = f"_{batch_dirs[0].name}"
|
|
382
|
+
print(f"Auto-detected batch: {batch_dirs[0].name}")
|
|
383
|
+
else:
|
|
384
|
+
# Multiple batches - ask user or default to batch1
|
|
385
|
+
print(f"Found {len(batch_dirs)} batches: {[d.name for d in batch_dirs]}")
|
|
386
|
+
print("Please specify which batch to analyze:")
|
|
387
|
+
print(" python analyze_test_runs.py batch1")
|
|
388
|
+
print("Or analyze all batches separately by running for each.")
|
|
389
|
+
return
|
|
390
|
+
|
|
391
|
+
if not BASE_DIR.exists():
|
|
392
|
+
print(f"Error: Directory {BASE_DIR} does not exist!")
|
|
393
|
+
return
|
|
394
|
+
|
|
395
|
+
# Auto-detect number of runs
|
|
396
|
+
uk_runs = list(BASE_DIR.glob("UK_run*"))
|
|
397
|
+
usa_runs = list(BASE_DIR.glob("USA_run*"))
|
|
398
|
+
NUM_RUNS = max(len(uk_runs), len(usa_runs))
|
|
399
|
+
|
|
400
|
+
COUNTRIES = ["UK", "USA"]
|
|
401
|
+
|
|
402
|
+
print(f"Loading regression results from {BASE_DIR}...")
|
|
403
|
+
print(f"Detected {NUM_RUNS} runs per country")
|
|
404
|
+
|
|
405
|
+
# Load all results
|
|
406
|
+
all_results = []
|
|
407
|
+
for country in COUNTRIES:
|
|
408
|
+
results = load_regression_results(BASE_DIR, country, NUM_RUNS)
|
|
409
|
+
all_results.extend(results)
|
|
410
|
+
print(f"Loaded {len(results)} runs for {country}")
|
|
411
|
+
|
|
412
|
+
if not all_results:
|
|
413
|
+
print("Error: No results found!")
|
|
414
|
+
return
|
|
415
|
+
|
|
416
|
+
# Extract parameters into DataFrame
|
|
417
|
+
print("\nExtracting parameters...")
|
|
418
|
+
df = extract_parameters(all_results)
|
|
419
|
+
|
|
420
|
+
# Save to CSV for further analysis
|
|
421
|
+
output_csv = f"share/test_runs_parameters{output_suffix}.csv"
|
|
422
|
+
df.to_csv(output_csv, index=False)
|
|
423
|
+
print(f"Parameters saved to {output_csv}")
|
|
424
|
+
|
|
425
|
+
# Print summary statistics
|
|
426
|
+
print_summary_statistics(df)
|
|
427
|
+
|
|
428
|
+
# Create violin plots
|
|
429
|
+
print("\nCreating violin plots...")
|
|
430
|
+
output_plot = f"share/test_runs_violin_plot{output_suffix}.pdf"
|
|
431
|
+
create_violin_plots(df, export=True, show=True, output_filename=output_plot)
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
if __name__ == "__main__":
|
|
435
|
+
main()
|
|
436
|
+
|
share/anomalous_diffusion.pdf
CHANGED
|
Binary file
|
|
Binary file
|
share/figUK_plots.pdf
CHANGED
|
Binary file
|
|
@@ -1,18 +1,65 @@
|
|
|
1
1
|
{
|
|
2
|
-
"mean number of mutations
|
|
2
|
+
"mean number of mutations model": {
|
|
3
3
|
"parameters": {
|
|
4
4
|
"m": 0.19547481089767144,
|
|
5
5
|
"b": 35.634115529420946
|
|
6
6
|
},
|
|
7
|
+
"confidence_intervals": {
|
|
8
|
+
"m": [
|
|
9
|
+
0.18318447093035867,
|
|
10
|
+
0.2077651508649842
|
|
11
|
+
],
|
|
12
|
+
"b": [
|
|
13
|
+
35.35560980879931,
|
|
14
|
+
35.912621250042584
|
|
15
|
+
]
|
|
16
|
+
},
|
|
7
17
|
"expression": "mx + b",
|
|
18
|
+
"confidence_level": 0.95,
|
|
8
19
|
"r2": 0.9637010376538699
|
|
9
20
|
},
|
|
10
|
-
"scaled var number of mutations
|
|
11
|
-
"
|
|
12
|
-
"
|
|
13
|
-
|
|
21
|
+
"scaled var number of mutations model": {
|
|
22
|
+
"linear_model": {
|
|
23
|
+
"parameters": {
|
|
24
|
+
"m": 0.2701500514044973
|
|
25
|
+
},
|
|
26
|
+
"confidence_intervals": {
|
|
27
|
+
"m": [
|
|
28
|
+
0.21165644420002577,
|
|
29
|
+
0.3286436586089689
|
|
30
|
+
]
|
|
31
|
+
},
|
|
32
|
+
"expression": "mx",
|
|
33
|
+
"r2": 0.23974456333891003,
|
|
34
|
+
"confidence_level": 0.95
|
|
35
|
+
},
|
|
36
|
+
"power_law_model": {
|
|
37
|
+
"parameters": {
|
|
38
|
+
"d": 1.949675756351066,
|
|
39
|
+
"alpha": 0.40103856972319674
|
|
40
|
+
},
|
|
41
|
+
"confidence_intervals": {
|
|
42
|
+
"d": [
|
|
43
|
+
1.2336302228902016,
|
|
44
|
+
2.6657212898119305
|
|
45
|
+
],
|
|
46
|
+
"alpha": [
|
|
47
|
+
0.2846823845254939,
|
|
48
|
+
0.5173947549208996
|
|
49
|
+
]
|
|
50
|
+
},
|
|
51
|
+
"expression": "d*x^alpha",
|
|
52
|
+
"r2": 0.7403444327630024,
|
|
53
|
+
"confidence_level": 0.95
|
|
14
54
|
},
|
|
15
|
-
"
|
|
16
|
-
|
|
55
|
+
"model_selection": {
|
|
56
|
+
"selected": "power_law",
|
|
57
|
+
"linear_AIC": 173.79403093437475,
|
|
58
|
+
"power_law_AIC": 133.0411534485154,
|
|
59
|
+
"delta_AIC_linear": 40.75287748585936,
|
|
60
|
+
"delta_AIC_power_law": 0.0,
|
|
61
|
+
"akaike_weight_linear": 1.414572114240171e-09,
|
|
62
|
+
"akaike_weight_power_law": 0.999999998585428
|
|
63
|
+
}
|
|
17
64
|
}
|
|
18
65
|
}
|
share/figUK_run_args.json
CHANGED
share/figUK_stats.tsv
CHANGED
|
@@ -1,41 +1,41 @@
|
|
|
1
|
-
date mean number of mutations var number of mutations size
|
|
2
|
-
2020-10-29 34.515151515151516 0.8689976689976701 66
|
|
3
|
-
2020-11-05 35.029411764705884 0.9620915032679709 136
|
|
4
|
-
2020-11-12 35.63250883392226 1.9921309174748796 283
|
|
5
|
-
2020-11-19 36.07575757575758 2.5170742962621144 198
|
|
6
|
-
2020-11-26 36.37826086956522 3.6685209796848266 230
|
|
7
|
-
2020-12-03 36.20704845814978 4.297649214455573 227
|
|
8
|
-
2020-12-10 36.555865921787706 3.715357651440464 358
|
|
9
|
-
2020-12-17 37.040625 4.709943181818182 320
|
|
10
|
-
2020-12-24 37.450171821305844 6.5794051427894376 291
|
|
11
|
-
2020-12-31 37.649068322981364 6.614781060737982 322
|
|
12
|
-
2021-01-07 37.97329376854599 5.907022749752713 337
|
|
13
|
-
2021-01-14 38.126182965299684 7.1675717765443405 317
|
|
14
|
-
2021-01-21 38.33774834437086 6.476909198917517 302
|
|
15
|
-
2021-01-28 38.614285714285714 6.632053251408085 280
|
|
16
|
-
2021-02-04 38.891373801916934 8.481752273285828 313
|
|
17
|
-
2021-02-11 38.62369337979094 7.235520576984016 287
|
|
18
|
-
2021-02-18 39.12367491166078 7.97401197904921 283
|
|
19
|
-
2021-02-25 39.30167597765363 6.558596623006733 358
|
|
20
|
-
2021-03-04 39.56273764258555 6.239369575944036 263
|
|
21
|
-
2021-03-11 39.64939024390244 8.613699932870894 328
|
|
22
|
-
2021-03-18 39.625 7.909090909090907 320
|
|
23
|
-
2021-03-25 40.233676975945016 8.607275743571508 291
|
|
24
|
-
2021-04-01 40.11 8.345719063545154 300
|
|
25
|
-
2021-04-08 40.11552346570397 6.776461047454612 277
|
|
26
|
-
2021-04-15 40.674121405750796 7.5344884082903265 313
|
|
27
|
-
2021-04-22 40.55882352941177 8.050626808100292 306
|
|
28
|
-
2021-04-29 41.38768115942029 9.140065876152836 276
|
|
29
|
-
2021-05-06 41.098684210526315 7.442374500607957 304
|
|
30
|
-
2021-05-13 41.06024096385542 8.75038865137971 249
|
|
31
|
-
2021-05-20 41.205479452054796 9.091653721225812 292
|
|
32
|
-
2021-05-27 40.85546875 9.69275428921569 256
|
|
33
|
-
2021-06-03 41.12112676056338 8.886416805920257 355
|
|
34
|
-
2021-06-10 41.49 6.96472361809045 200
|
|
35
|
-
2021-06-17 41.723404255319146 7.741267493457731 188
|
|
36
|
-
2021-06-24 42.27860696517413 6.611990049751241 201
|
|
37
|
-
2021-07-01 42.541935483870965 8.717385839966484 155
|
|
38
|
-
2021-07-08 42.71875 7.475198412698421 64
|
|
39
|
-
2021-07-15 43.333333333333336 8.333333333333332 39
|
|
40
|
-
2021-07-22 42.32142857142857 7.822077922077916 56
|
|
41
|
-
2021-07-29 41.857142857142854 15.14285714285714 7
|
|
1
|
+
date mean number of mutations var number of mutations size dt_idx
|
|
2
|
+
2020-10-29 34.515151515151516 0.8689976689976701 66 0.0
|
|
3
|
+
2020-11-05 35.029411764705884 0.9620915032679709 136 1.0
|
|
4
|
+
2020-11-12 35.63250883392226 1.9921309174748796 283 2.0
|
|
5
|
+
2020-11-19 36.07575757575758 2.5170742962621144 198 3.0
|
|
6
|
+
2020-11-26 36.37826086956522 3.6685209796848266 230 4.0
|
|
7
|
+
2020-12-03 36.20704845814978 4.297649214455573 227 5.0
|
|
8
|
+
2020-12-10 36.555865921787706 3.715357651440464 358 6.0
|
|
9
|
+
2020-12-17 37.040625 4.709943181818182 320 7.0
|
|
10
|
+
2020-12-24 37.450171821305844 6.5794051427894376 291 8.0
|
|
11
|
+
2020-12-31 37.649068322981364 6.614781060737982 322 9.0
|
|
12
|
+
2021-01-07 37.97329376854599 5.907022749752713 337 10.0
|
|
13
|
+
2021-01-14 38.126182965299684 7.1675717765443405 317 11.0
|
|
14
|
+
2021-01-21 38.33774834437086 6.476909198917517 302 12.0
|
|
15
|
+
2021-01-28 38.614285714285714 6.632053251408085 280 13.0
|
|
16
|
+
2021-02-04 38.891373801916934 8.481752273285828 313 14.0
|
|
17
|
+
2021-02-11 38.62369337979094 7.235520576984016 287 15.0
|
|
18
|
+
2021-02-18 39.12367491166078 7.97401197904921 283 16.0
|
|
19
|
+
2021-02-25 39.30167597765363 6.558596623006733 358 17.0
|
|
20
|
+
2021-03-04 39.56273764258555 6.239369575944036 263 18.0
|
|
21
|
+
2021-03-11 39.64939024390244 8.613699932870894 328 19.0
|
|
22
|
+
2021-03-18 39.625 7.909090909090907 320 20.0
|
|
23
|
+
2021-03-25 40.233676975945016 8.607275743571508 291 21.0
|
|
24
|
+
2021-04-01 40.11 8.345719063545154 300 22.0
|
|
25
|
+
2021-04-08 40.11552346570397 6.776461047454612 277 23.0
|
|
26
|
+
2021-04-15 40.674121405750796 7.5344884082903265 313 24.0
|
|
27
|
+
2021-04-22 40.55882352941177 8.050626808100292 306 25.0
|
|
28
|
+
2021-04-29 41.38768115942029 9.140065876152836 276 26.0
|
|
29
|
+
2021-05-06 41.098684210526315 7.442374500607957 304 27.0
|
|
30
|
+
2021-05-13 41.06024096385542 8.75038865137971 249 28.0
|
|
31
|
+
2021-05-20 41.205479452054796 9.091653721225812 292 29.0
|
|
32
|
+
2021-05-27 40.85546875 9.69275428921569 256 30.0
|
|
33
|
+
2021-06-03 41.12112676056338 8.886416805920257 355 31.0
|
|
34
|
+
2021-06-10 41.49 6.96472361809045 200 32.0
|
|
35
|
+
2021-06-17 41.723404255319146 7.741267493457731 188 33.0
|
|
36
|
+
2021-06-24 42.27860696517413 6.611990049751241 201 34.0
|
|
37
|
+
2021-07-01 42.541935483870965 8.717385839966484 155 35.0
|
|
38
|
+
2021-07-08 42.71875 7.475198412698421 64 36.0
|
|
39
|
+
2021-07-15 43.333333333333336 8.333333333333332 39 37.0
|
|
40
|
+
2021-07-22 42.32142857142857 7.822077922077916 56 38.0
|
|
41
|
+
2021-07-29 41.857142857142854 15.14285714285714 7 39.0
|
share/figUSA_plots.pdf
CHANGED
|
Binary file
|
|
@@ -1,18 +1,65 @@
|
|
|
1
1
|
{
|
|
2
|
-
"mean number of mutations
|
|
2
|
+
"mean number of mutations model": {
|
|
3
3
|
"parameters": {
|
|
4
4
|
"m": 0.3258484867760181,
|
|
5
5
|
"b": 35.93995331972901
|
|
6
6
|
},
|
|
7
|
+
"confidence_intervals": {
|
|
8
|
+
"m": [
|
|
9
|
+
0.2883403045689692,
|
|
10
|
+
0.36335666898306695
|
|
11
|
+
],
|
|
12
|
+
"b": [
|
|
13
|
+
35.24158897313613,
|
|
14
|
+
36.63831766632188
|
|
15
|
+
]
|
|
16
|
+
},
|
|
7
17
|
"expression": "mx + b",
|
|
18
|
+
"confidence_level": 0.95,
|
|
8
19
|
"r2": 0.9071591655422693
|
|
9
20
|
},
|
|
10
|
-
"scaled var number of mutations
|
|
11
|
-
"
|
|
12
|
-
"
|
|
13
|
-
|
|
21
|
+
"scaled var number of mutations model": {
|
|
22
|
+
"linear_model": {
|
|
23
|
+
"parameters": {
|
|
24
|
+
"m": 0.2915484276133104
|
|
25
|
+
},
|
|
26
|
+
"confidence_intervals": {
|
|
27
|
+
"m": [
|
|
28
|
+
0.24003648883099504,
|
|
29
|
+
0.34306036639562576
|
|
30
|
+
]
|
|
31
|
+
},
|
|
32
|
+
"expression": "mx",
|
|
33
|
+
"r2": 0.7243923706082853,
|
|
34
|
+
"confidence_level": 0.95
|
|
35
|
+
},
|
|
36
|
+
"power_law_model": {
|
|
37
|
+
"parameters": {
|
|
38
|
+
"d": 0.6926303070016426,
|
|
39
|
+
"alpha": 0.723863222453596
|
|
40
|
+
},
|
|
41
|
+
"confidence_intervals": {
|
|
42
|
+
"d": [
|
|
43
|
+
0.2633037580510857,
|
|
44
|
+
1.1219568559521995
|
|
45
|
+
],
|
|
46
|
+
"alpha": [
|
|
47
|
+
0.5238853883558428,
|
|
48
|
+
0.9238410565513493
|
|
49
|
+
]
|
|
50
|
+
},
|
|
51
|
+
"expression": "d*x^alpha",
|
|
52
|
+
"r2": 0.7761009815632863,
|
|
53
|
+
"confidence_level": 0.95
|
|
14
54
|
},
|
|
15
|
-
"
|
|
16
|
-
|
|
55
|
+
"model_selection": {
|
|
56
|
+
"selected": "power_law",
|
|
57
|
+
"linear_AIC": 115.52445550217226,
|
|
58
|
+
"power_law_AIC": 110.93858154005544,
|
|
59
|
+
"delta_AIC_linear": 4.585873962116821,
|
|
60
|
+
"delta_AIC_power_law": 0.0,
|
|
61
|
+
"akaike_weight_linear": 0.0917096089306382,
|
|
62
|
+
"akaike_weight_power_law": 0.9082903910693618
|
|
63
|
+
}
|
|
17
64
|
}
|
|
18
65
|
}
|