PyEvoMotion 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,436 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Analyze parameter variability across multiple PyEvoMotion runs.
4
+
5
+ This script loads regression results from multiple runs and creates
6
+ violin plots to visualize parameter distributions and assess
7
+ reproducibility of the nonlinear fitting process.
8
+ """
9
+
10
+ import json
11
+ import os
12
+ from pathlib import Path
13
+ from typing import Dict, List
14
+
15
+ import matplotlib.pyplot as plt
16
+ import matplotlib as mpl
17
+ import numpy as np
18
+ import pandas as pd
19
+
20
+
21
+ def set_matplotlib_params():
22
+ """Set consistent matplotlib styling."""
23
+ mpl_params = {
24
+ "font.sans-serif": "Helvetica",
25
+ "axes.linewidth": 2,
26
+ "axes.labelsize": 14,
27
+ "axes.spines.top": False,
28
+ "axes.spines.right": False,
29
+ "font.size": 12,
30
+ "xtick.major.width": 2,
31
+ "ytick.major.width": 2,
32
+ "xtick.major.size": 6,
33
+ "ytick.major.size": 6,
34
+ "legend.frameon": False,
35
+ }
36
+ for k, v in mpl_params.items():
37
+ mpl.rcParams[k] = v
38
+
39
+
40
+ def load_regression_results(base_dir: Path, country: str, num_runs: int = 5) -> List[Dict]:
41
+ """
42
+ Load regression results from multiple runs.
43
+
44
+ :param base_dir: Base directory containing run subdirectories
45
+ :type base_dir: Path
46
+ :param country: Either "UK" or "USA"
47
+ :type country: str
48
+ :param num_runs: Number of runs to load (default 5)
49
+ :type num_runs: int
50
+ :return: List of dictionaries containing regression results
51
+ :rtype: List[Dict]
52
+ """
53
+ results = []
54
+
55
+ for run_num in range(1, num_runs + 1):
56
+ run_dir = base_dir / f"{country}_run{run_num}"
57
+ results_file = run_dir / f"fig{country}_regression_results.json"
58
+
59
+ if results_file.exists():
60
+ with open(results_file, 'r') as f:
61
+ data = json.load(f)
62
+ results.append({
63
+ 'run': run_num,
64
+ 'country': country,
65
+ 'data': data
66
+ })
67
+ else:
68
+ print(f"Warning: {results_file} not found")
69
+
70
+ return results
71
+
72
+
73
+ def extract_parameters(results: List[Dict]) -> pd.DataFrame:
74
+ """
75
+ Extract parameters from regression results into a DataFrame.
76
+
77
+ :param results: List of regression result dictionaries
78
+ :type results: List[Dict]
79
+ :return: DataFrame with parameters from all runs
80
+ :rtype: pd.DataFrame
81
+ """
82
+ records = []
83
+
84
+ for result in results:
85
+ run = result['run']
86
+ country = result['country']
87
+ data = result['data']
88
+
89
+ record = {
90
+ 'run': run,
91
+ 'country': country
92
+ }
93
+
94
+ # Extract mean model parameters
95
+ mean_key = None
96
+ for key in ["mean number of mutations model",
97
+ "mean number of mutations per 7D model",
98
+ "mean number of substitutions model"]:
99
+ if key in data:
100
+ mean_key = key
101
+ break
102
+
103
+ if mean_key:
104
+ mean_model = data[mean_key]
105
+ record['mean_m'] = mean_model['parameters']['m']
106
+ record['mean_b'] = mean_model['parameters']['b']
107
+ record['mean_r2'] = mean_model['r2']
108
+
109
+ # Extract variance model parameters
110
+ var_key = None
111
+ for key in ["scaled var number of mutations model",
112
+ "scaled var number of mutations per 7D model",
113
+ "scaled var number of substitutions model"]:
114
+ if key in data:
115
+ var_key = key
116
+ break
117
+
118
+ if var_key:
119
+ var_model = data[var_key]
120
+
121
+ # Check if model selection was performed
122
+ if "model_selection" in var_model:
123
+ selected = var_model["model_selection"]["selected"]
124
+ record['var_model_selected'] = selected
125
+
126
+ if selected == "linear" and "linear_model" in var_model:
127
+ linear = var_model["linear_model"]
128
+ record['var_m'] = linear['parameters']['m']
129
+ record['var_r2'] = linear['r2']
130
+ record['var_d'] = None
131
+ record['var_alpha'] = None
132
+
133
+ elif selected == "power_law" and "power_law_model" in var_model:
134
+ power_law = var_model["power_law_model"]
135
+ record['var_d'] = power_law['parameters']['d']
136
+ record['var_alpha'] = power_law['parameters']['alpha']
137
+ record['var_r2'] = power_law['r2']
138
+ record['var_m'] = None
139
+ else:
140
+ # Old format without model selection
141
+ params = var_model['parameters']
142
+ record['var_r2'] = var_model['r2']
143
+
144
+ if 'm' in params:
145
+ record['var_m'] = params['m']
146
+ record['var_d'] = None
147
+ record['var_alpha'] = None
148
+ record['var_model_selected'] = 'linear'
149
+ elif 'd' in params and 'alpha' in params:
150
+ record['var_d'] = params['d']
151
+ record['var_alpha'] = params['alpha']
152
+ record['var_m'] = None
153
+ record['var_model_selected'] = 'power_law'
154
+
155
+ records.append(record)
156
+
157
+ return pd.DataFrame(records)
158
+
159
+
160
+ def create_violin_plots(df: pd.DataFrame, export: bool = False, show: bool = True, output_filename: str = "share/test_runs_violin_plot.pdf"):
161
+ """
162
+ Create violin plots for parameter distributions.
163
+
164
+ :param df: DataFrame with extracted parameters
165
+ :type df: pd.DataFrame
166
+ :param export: Whether to save the figure (default False)
167
+ :type export: bool
168
+ :param show: Whether to display the figure (default True)
169
+ :type show: bool
170
+ :param output_filename: Path to save the figure
171
+ :type output_filename: str
172
+ """
173
+ set_matplotlib_params()
174
+
175
+ # Define colors
176
+ colors = {
177
+ "UK": "#76d6ff",
178
+ "USA": "#FF6346",
179
+ }
180
+
181
+ # Parameters to plot
182
+ mean_params = [
183
+ ('mean_m', 'Mean: Slope (m)', 'mutations/week'),
184
+ ('mean_b', 'Mean: Intercept (b)', 'mutations'),
185
+ ('mean_r2', 'Mean: R²', '')
186
+ ]
187
+
188
+ # Check which variance model is predominantly used
189
+ var_model_counts = df['var_model_selected'].value_counts()
190
+ print("\nVariance model selection:")
191
+ print(var_model_counts)
192
+
193
+ # Determine which variance parameters to plot
194
+ if var_model_counts.get('power_law', 0) > 0:
195
+ var_params = [
196
+ ('var_d', 'Variance: Coefficient (d)', ''),
197
+ ('var_alpha', 'Variance: Exponent (α)', ''),
198
+ ('var_r2', 'Variance: R²', '')
199
+ ]
200
+ else:
201
+ var_params = [
202
+ ('var_m', 'Variance: Slope (m)', 'mutations²/week'),
203
+ ('var_r2', 'Variance: R²', '')
204
+ ]
205
+
206
+ all_params = mean_params + var_params
207
+
208
+ # Create subplots
209
+ n_params = len(all_params)
210
+ fig, axes = plt.subplots(2, 3, figsize=(18, 12))
211
+ axes = axes.flatten()
212
+
213
+ for idx, (param, title, unit) in enumerate(all_params):
214
+ if idx >= len(axes):
215
+ break
216
+
217
+ ax = axes[idx]
218
+
219
+ # Filter out None values for this parameter
220
+ plot_df = df[df[param].notna()].copy()
221
+
222
+ if len(plot_df) == 0:
223
+ ax.text(0.5, 0.5, 'No data', ha='center', va='center', transform=ax.transAxes)
224
+ ax.set_title(title)
225
+ continue
226
+
227
+ # Create violin plot
228
+ parts = ax.violinplot(
229
+ [plot_df[plot_df['country'] == 'UK'][param].values,
230
+ plot_df[plot_df['country'] == 'USA'][param].values],
231
+ positions=[0, 1],
232
+ showmeans=True,
233
+ showextrema=True,
234
+ widths=0.7
235
+ )
236
+
237
+ # Color the violins
238
+ for i, pc in enumerate(parts['bodies']):
239
+ country = ['UK', 'USA'][i]
240
+ pc.set_facecolor(colors[country])
241
+ pc.set_alpha(0.7)
242
+ pc.set_edgecolor('black')
243
+ pc.set_linewidth(1.5)
244
+
245
+ # Style the other elements
246
+ for partname in ['cmeans', 'cmaxes', 'cmins', 'cbars']:
247
+ if partname in parts:
248
+ parts[partname].set_edgecolor('black')
249
+ parts[partname].set_linewidth(2)
250
+
251
+ # Add scatter points for individual runs
252
+ for i, country in enumerate(['UK', 'USA']):
253
+ country_data = plot_df[plot_df['country'] == country]
254
+ x_pos = np.random.normal(i, 0.04, size=len(country_data))
255
+ ax.scatter(x_pos, country_data[param].values,
256
+ alpha=0.6, s=50, c='black', zorder=3, edgecolors='white', linewidth=1)
257
+
258
+ # Styling
259
+ ax.set_xticks([0, 1])
260
+ ax.set_xticklabels(['UK', 'USA'])
261
+ ax.set_ylabel(f'{title.split(": ")[1]} {f"({unit})" if unit else ""}'.strip())
262
+ ax.set_title(title, fontweight='bold')
263
+ ax.grid(axis='y', alpha=0.3, linestyle='--')
264
+
265
+ # Add statistics text
266
+ for i, country in enumerate(['UK', 'USA']):
267
+ country_data = plot_df[plot_df['country'] == country][param]
268
+ if len(country_data) > 0:
269
+ mean_val = country_data.mean()
270
+ std_val = country_data.std()
271
+ cv = (std_val / mean_val * 100) if mean_val != 0 else 0
272
+
273
+ text_y = ax.get_ylim()[1] * 0.95 - i * (ax.get_ylim()[1] - ax.get_ylim()[0]) * 0.08
274
+ ax.text(0.98, text_y,
275
+ f'{country}: μ={mean_val:.4f}, σ={std_val:.4f}, CV={cv:.2f}%',
276
+ transform=ax.transData, ha='right', va='top',
277
+ fontsize=9, bbox=dict(boxstyle='round', facecolor=colors[country], alpha=0.3))
278
+
279
+ # Hide unused subplots
280
+ for idx in range(len(all_params), len(axes)):
281
+ axes[idx].set_visible(False)
282
+
283
+ fig.suptitle('Parameter Variability Across Multiple Runs\n(Assessing Nonlinear Fitting Reproducibility)',
284
+ fontsize=16, fontweight='bold', y=0.995)
285
+ plt.tight_layout()
286
+
287
+ if export:
288
+ fig.savefig(output_filename, dpi=400, bbox_inches='tight')
289
+ print(f"\nViolin plot saved as {output_filename}")
290
+
291
+ if show:
292
+ plt.show()
293
+
294
+
295
+ def print_summary_statistics(df: pd.DataFrame):
296
+ """
297
+ Print summary statistics for all parameters.
298
+
299
+ Displays mean, standard deviation, and coefficient of variation (CV%)
300
+ for each parameter grouped by country.
301
+
302
+ :param df: DataFrame with extracted parameters
303
+ :type df: pd.DataFrame
304
+ """
305
+ print("\n" + "="*80)
306
+ print("PARAMETER VARIABILITY SUMMARY")
307
+ print("="*80)
308
+
309
+ for country in ['UK', 'USA']:
310
+ print(f"\n{country} Dataset:")
311
+ print("-" * 40)
312
+
313
+ country_df = df[df['country'] == country]
314
+
315
+ # Mean model parameters
316
+ print("\nMean Model:")
317
+ for param in ['mean_m', 'mean_b', 'mean_r2']:
318
+ if param in country_df.columns:
319
+ values = country_df[param].dropna()
320
+ if len(values) > 0:
321
+ mean = values.mean()
322
+ std = values.std()
323
+ cv = (std / mean * 100) if mean != 0 else 0
324
+ print(f" {param:12s}: μ={mean:10.6f}, σ={std:10.6f}, CV={cv:6.2f}%")
325
+
326
+ # Variance model parameters
327
+ print("\nVariance Model:")
328
+ var_model = country_df['var_model_selected'].mode()[0] if 'var_model_selected' in country_df.columns else 'unknown'
329
+ print(f" Selected model: {var_model}")
330
+
331
+ if var_model == 'power_law':
332
+ for param in ['var_d', 'var_alpha', 'var_r2']:
333
+ if param in country_df.columns:
334
+ values = country_df[param].dropna()
335
+ if len(values) > 0:
336
+ mean = values.mean()
337
+ std = values.std()
338
+ cv = (std / mean * 100) if mean != 0 else 0
339
+ print(f" {param:12s}: μ={mean:10.6f}, σ={std:10.6f}, CV={cv:6.2f}%")
340
+ else:
341
+ for param in ['var_m', 'var_r2']:
342
+ if param in country_df.columns:
343
+ values = country_df[param].dropna()
344
+ if len(values) > 0:
345
+ mean = values.mean()
346
+ std = values.std()
347
+ cv = (std / mean * 100) if mean != 0 else 0
348
+ print(f" {param:12s}: μ={mean:10.6f}, σ={std:10.6f}, CV={cv:6.2f}%")
349
+
350
+ print("\n" + "="*80)
351
+
352
+
353
+ def main():
354
+ """
355
+ Main execution function for analyzing test run parameter variability.
356
+
357
+ Loads regression results from batch directories, extracts parameters,
358
+ computes statistics, and generates violin plots to visualize parameter
359
+ distributions across multiple runs.
360
+ """
361
+
362
+ import sys
363
+
364
+ # Parse command line arguments
365
+ if len(sys.argv) > 1:
366
+ batch_name = sys.argv[1]
367
+ BASE_DIR = Path(f"share/test-runs/{batch_name}")
368
+ output_suffix = f"_{batch_name}"
369
+ else:
370
+ # Try to auto-detect batch directories or use batch1 as default
371
+ test_runs_dir = Path("share/test-runs")
372
+ batch_dirs = [d for d in test_runs_dir.iterdir() if d.is_dir() and d.name.startswith("batch")]
373
+
374
+ if len(batch_dirs) == 0:
375
+ # Fall back to old structure (no batch subdirectories)
376
+ BASE_DIR = Path("share/test-runs")
377
+ output_suffix = ""
378
+ elif len(batch_dirs) == 1:
379
+ # Use the only batch found
380
+ BASE_DIR = batch_dirs[0]
381
+ output_suffix = f"_{batch_dirs[0].name}"
382
+ print(f"Auto-detected batch: {batch_dirs[0].name}")
383
+ else:
384
+ # Multiple batches - ask user or default to batch1
385
+ print(f"Found {len(batch_dirs)} batches: {[d.name for d in batch_dirs]}")
386
+ print("Please specify which batch to analyze:")
387
+ print(" python analyze_test_runs.py batch1")
388
+ print("Or analyze all batches separately by running for each.")
389
+ return
390
+
391
+ if not BASE_DIR.exists():
392
+ print(f"Error: Directory {BASE_DIR} does not exist!")
393
+ return
394
+
395
+ # Auto-detect number of runs
396
+ uk_runs = list(BASE_DIR.glob("UK_run*"))
397
+ usa_runs = list(BASE_DIR.glob("USA_run*"))
398
+ NUM_RUNS = max(len(uk_runs), len(usa_runs))
399
+
400
+ COUNTRIES = ["UK", "USA"]
401
+
402
+ print(f"Loading regression results from {BASE_DIR}...")
403
+ print(f"Detected {NUM_RUNS} runs per country")
404
+
405
+ # Load all results
406
+ all_results = []
407
+ for country in COUNTRIES:
408
+ results = load_regression_results(BASE_DIR, country, NUM_RUNS)
409
+ all_results.extend(results)
410
+ print(f"Loaded {len(results)} runs for {country}")
411
+
412
+ if not all_results:
413
+ print("Error: No results found!")
414
+ return
415
+
416
+ # Extract parameters into DataFrame
417
+ print("\nExtracting parameters...")
418
+ df = extract_parameters(all_results)
419
+
420
+ # Save to CSV for further analysis
421
+ output_csv = f"share/test_runs_parameters{output_suffix}.csv"
422
+ df.to_csv(output_csv, index=False)
423
+ print(f"Parameters saved to {output_csv}")
424
+
425
+ # Print summary statistics
426
+ print_summary_statistics(df)
427
+
428
+ # Create violin plots
429
+ print("\nCreating violin plots...")
430
+ output_plot = f"share/test_runs_violin_plot{output_suffix}.pdf"
431
+ create_violin_plots(df, export=True, show=True, output_filename=output_plot)
432
+
433
+
434
+ if __name__ == "__main__":
435
+ main()
436
+
Binary file
Binary file
share/figUK_plots.pdf CHANGED
Binary file
@@ -1,18 +1,65 @@
1
1
  {
2
- "mean number of mutations per 7D model": {
2
+ "mean number of mutations model": {
3
3
  "parameters": {
4
4
  "m": 0.19547481089767144,
5
5
  "b": 35.634115529420946
6
6
  },
7
+ "confidence_intervals": {
8
+ "m": [
9
+ 0.18318447093035867,
10
+ 0.2077651508649842
11
+ ],
12
+ "b": [
13
+ 35.35560980879931,
14
+ 35.912621250042584
15
+ ]
16
+ },
7
17
  "expression": "mx + b",
18
+ "confidence_level": 0.95,
8
19
  "r2": 0.9637010376538699
9
20
  },
10
- "scaled var number of mutations per 7D model": {
11
- "parameters": {
12
- "d": 1.9496396213906204,
13
- "alpha": 0.4010445329484692
21
+ "scaled var number of mutations model": {
22
+ "linear_model": {
23
+ "parameters": {
24
+ "m": 0.2701500514044973
25
+ },
26
+ "confidence_intervals": {
27
+ "m": [
28
+ 0.21165644420002577,
29
+ 0.3286436586089689
30
+ ]
31
+ },
32
+ "expression": "mx",
33
+ "r2": 0.23974456333891003,
34
+ "confidence_level": 0.95
35
+ },
36
+ "power_law_model": {
37
+ "parameters": {
38
+ "d": 1.949675756351066,
39
+ "alpha": 0.40103856972319674
40
+ },
41
+ "confidence_intervals": {
42
+ "d": [
43
+ 1.2336302228902016,
44
+ 2.6657212898119305
45
+ ],
46
+ "alpha": [
47
+ 0.2846823845254939,
48
+ 0.5173947549208996
49
+ ]
50
+ },
51
+ "expression": "d*x^alpha",
52
+ "r2": 0.7403444327630024,
53
+ "confidence_level": 0.95
14
54
  },
15
- "expression": "d*x^alpha",
16
- "r2": 0.7403444327797193
55
+ "model_selection": {
56
+ "selected": "power_law",
57
+ "linear_AIC": 173.79403093437475,
58
+ "power_law_AIC": 133.0411534485154,
59
+ "delta_AIC_linear": 40.75287748585936,
60
+ "delta_AIC_power_law": 0.0,
61
+ "akaike_weight_linear": 1.414572114240171e-09,
62
+ "akaike_weight_power_law": 0.999999998585428
63
+ }
17
64
  }
18
65
  }
share/figUK_run_args.json CHANGED
@@ -5,6 +5,7 @@
5
5
  "delta_t": "7D",
6
6
  "show": false,
7
7
  "export_plots": true,
8
+ "confidence_level": 0.95,
8
9
  "length_filter": 0,
9
10
  "kind": "total",
10
11
  "filter": null,
share/figUK_stats.tsv CHANGED
@@ -1,41 +1,41 @@
1
- date mean number of mutations var number of mutations size
2
- 2020-10-29 34.515151515151516 0.8689976689976701 66
3
- 2020-11-05 35.029411764705884 0.9620915032679709 136
4
- 2020-11-12 35.63250883392226 1.9921309174748796 283
5
- 2020-11-19 36.07575757575758 2.5170742962621144 198
6
- 2020-11-26 36.37826086956522 3.6685209796848266 230
7
- 2020-12-03 36.20704845814978 4.297649214455573 227
8
- 2020-12-10 36.555865921787706 3.715357651440464 358
9
- 2020-12-17 37.040625 4.709943181818182 320
10
- 2020-12-24 37.450171821305844 6.5794051427894376 291
11
- 2020-12-31 37.649068322981364 6.614781060737982 322
12
- 2021-01-07 37.97329376854599 5.907022749752713 337
13
- 2021-01-14 38.126182965299684 7.1675717765443405 317
14
- 2021-01-21 38.33774834437086 6.476909198917517 302
15
- 2021-01-28 38.614285714285714 6.632053251408085 280
16
- 2021-02-04 38.891373801916934 8.481752273285828 313
17
- 2021-02-11 38.62369337979094 7.235520576984016 287
18
- 2021-02-18 39.12367491166078 7.97401197904921 283
19
- 2021-02-25 39.30167597765363 6.558596623006733 358
20
- 2021-03-04 39.56273764258555 6.239369575944036 263
21
- 2021-03-11 39.64939024390244 8.613699932870894 328
22
- 2021-03-18 39.625 7.909090909090907 320
23
- 2021-03-25 40.233676975945016 8.607275743571508 291
24
- 2021-04-01 40.11 8.345719063545154 300
25
- 2021-04-08 40.11552346570397 6.776461047454612 277
26
- 2021-04-15 40.674121405750796 7.5344884082903265 313
27
- 2021-04-22 40.55882352941177 8.050626808100292 306
28
- 2021-04-29 41.38768115942029 9.140065876152836 276
29
- 2021-05-06 41.098684210526315 7.442374500607957 304
30
- 2021-05-13 41.06024096385542 8.75038865137971 249
31
- 2021-05-20 41.205479452054796 9.091653721225812 292
32
- 2021-05-27 40.85546875 9.69275428921569 256
33
- 2021-06-03 41.12112676056338 8.886416805920257 355
34
- 2021-06-10 41.49 6.96472361809045 200
35
- 2021-06-17 41.723404255319146 7.741267493457731 188
36
- 2021-06-24 42.27860696517413 6.611990049751241 201
37
- 2021-07-01 42.541935483870965 8.717385839966484 155
38
- 2021-07-08 42.71875 7.475198412698421 64
39
- 2021-07-15 43.333333333333336 8.333333333333332 39
40
- 2021-07-22 42.32142857142857 7.822077922077916 56
41
- 2021-07-29 41.857142857142854 15.14285714285714 7
1
+ date mean number of mutations var number of mutations size dt_idx
2
+ 2020-10-29 34.515151515151516 0.8689976689976701 66 0.0
3
+ 2020-11-05 35.029411764705884 0.9620915032679709 136 1.0
4
+ 2020-11-12 35.63250883392226 1.9921309174748796 283 2.0
5
+ 2020-11-19 36.07575757575758 2.5170742962621144 198 3.0
6
+ 2020-11-26 36.37826086956522 3.6685209796848266 230 4.0
7
+ 2020-12-03 36.20704845814978 4.297649214455573 227 5.0
8
+ 2020-12-10 36.555865921787706 3.715357651440464 358 6.0
9
+ 2020-12-17 37.040625 4.709943181818182 320 7.0
10
+ 2020-12-24 37.450171821305844 6.5794051427894376 291 8.0
11
+ 2020-12-31 37.649068322981364 6.614781060737982 322 9.0
12
+ 2021-01-07 37.97329376854599 5.907022749752713 337 10.0
13
+ 2021-01-14 38.126182965299684 7.1675717765443405 317 11.0
14
+ 2021-01-21 38.33774834437086 6.476909198917517 302 12.0
15
+ 2021-01-28 38.614285714285714 6.632053251408085 280 13.0
16
+ 2021-02-04 38.891373801916934 8.481752273285828 313 14.0
17
+ 2021-02-11 38.62369337979094 7.235520576984016 287 15.0
18
+ 2021-02-18 39.12367491166078 7.97401197904921 283 16.0
19
+ 2021-02-25 39.30167597765363 6.558596623006733 358 17.0
20
+ 2021-03-04 39.56273764258555 6.239369575944036 263 18.0
21
+ 2021-03-11 39.64939024390244 8.613699932870894 328 19.0
22
+ 2021-03-18 39.625 7.909090909090907 320 20.0
23
+ 2021-03-25 40.233676975945016 8.607275743571508 291 21.0
24
+ 2021-04-01 40.11 8.345719063545154 300 22.0
25
+ 2021-04-08 40.11552346570397 6.776461047454612 277 23.0
26
+ 2021-04-15 40.674121405750796 7.5344884082903265 313 24.0
27
+ 2021-04-22 40.55882352941177 8.050626808100292 306 25.0
28
+ 2021-04-29 41.38768115942029 9.140065876152836 276 26.0
29
+ 2021-05-06 41.098684210526315 7.442374500607957 304 27.0
30
+ 2021-05-13 41.06024096385542 8.75038865137971 249 28.0
31
+ 2021-05-20 41.205479452054796 9.091653721225812 292 29.0
32
+ 2021-05-27 40.85546875 9.69275428921569 256 30.0
33
+ 2021-06-03 41.12112676056338 8.886416805920257 355 31.0
34
+ 2021-06-10 41.49 6.96472361809045 200 32.0
35
+ 2021-06-17 41.723404255319146 7.741267493457731 188 33.0
36
+ 2021-06-24 42.27860696517413 6.611990049751241 201 34.0
37
+ 2021-07-01 42.541935483870965 8.717385839966484 155 35.0
38
+ 2021-07-08 42.71875 7.475198412698421 64 36.0
39
+ 2021-07-15 43.333333333333336 8.333333333333332 39 37.0
40
+ 2021-07-22 42.32142857142857 7.822077922077916 56 38.0
41
+ 2021-07-29 41.857142857142854 15.14285714285714 7 39.0
share/figUSA_plots.pdf CHANGED
Binary file
@@ -1,18 +1,65 @@
1
1
  {
2
- "mean number of mutations per 7D model": {
2
+ "mean number of mutations model": {
3
3
  "parameters": {
4
4
  "m": 0.3258484867760181,
5
5
  "b": 35.93995331972901
6
6
  },
7
+ "confidence_intervals": {
8
+ "m": [
9
+ 0.2883403045689692,
10
+ 0.36335666898306695
11
+ ],
12
+ "b": [
13
+ 35.24158897313613,
14
+ 36.63831766632188
15
+ ]
16
+ },
7
17
  "expression": "mx + b",
18
+ "confidence_level": 0.95,
8
19
  "r2": 0.9071591655422693
9
20
  },
10
- "scaled var number of mutations per 7D model": {
11
- "parameters": {
12
- "d": 0.6926241001872148,
13
- "alpha": 0.7238661387949127
21
+ "scaled var number of mutations model": {
22
+ "linear_model": {
23
+ "parameters": {
24
+ "m": 0.2915484276133104
25
+ },
26
+ "confidence_intervals": {
27
+ "m": [
28
+ 0.24003648883099504,
29
+ 0.34306036639562576
30
+ ]
31
+ },
32
+ "expression": "mx",
33
+ "r2": 0.7243923706082853,
34
+ "confidence_level": 0.95
35
+ },
36
+ "power_law_model": {
37
+ "parameters": {
38
+ "d": 0.6926303070016426,
39
+ "alpha": 0.723863222453596
40
+ },
41
+ "confidence_intervals": {
42
+ "d": [
43
+ 0.2633037580510857,
44
+ 1.1219568559521995
45
+ ],
46
+ "alpha": [
47
+ 0.5238853883558428,
48
+ 0.9238410565513493
49
+ ]
50
+ },
51
+ "expression": "d*x^alpha",
52
+ "r2": 0.7761009815632863,
53
+ "confidence_level": 0.95
14
54
  },
15
- "expression": "d*x^alpha",
16
- "r2": 0.7761009815628838
55
+ "model_selection": {
56
+ "selected": "power_law",
57
+ "linear_AIC": 115.52445550217226,
58
+ "power_law_AIC": 110.93858154005544,
59
+ "delta_AIC_linear": 4.585873962116821,
60
+ "delta_AIC_power_law": 0.0,
61
+ "akaike_weight_linear": 0.0917096089306382,
62
+ "akaike_weight_power_law": 0.9082903910693618
63
+ }
17
64
  }
18
65
  }