PyPI - PyEvoMotion - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

PyEvoMotion 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

PyEvoMotion/cli.py +87 -3
PyEvoMotion/core/base.py +296 -20
PyEvoMotion/core/core.py +73 -24
{pyevomotion-0.1.1.dist-info → pyevomotion-0.1.2.dist-info}/METADATA +1 -1
pyevomotion-0.1.2.dist-info/RECORD +35 -0
share/analyze_model_selection_accuracy.py +316 -0
share/analyze_test_runs.py +436 -0
share/anomalous_diffusion.pdf +0 -0
share/confusion_matrix_heatmap.pdf +0 -0
share/figUK_plots.pdf +0 -0
share/figUK_regression_results.json +54 -7
share/figUK_run_args.json +1 -0
share/figUK_stats.tsv +41 -41
share/figUSA_plots.pdf +0 -0
share/figUSA_regression_results.json +54 -7
share/figUSA_run_args.json +1 -0
share/figUSA_stats.tsv +34 -34
share/generate_sequences_from_test5_data.py +107 -0
share/manuscript_figure.py +450 -80
share/run_parallel_analysis.py +196 -0
share/synth_figure.pdf +0 -0
share/uk_time_windows.pdf +0 -0
share/weekly_size.pdf +0 -0
pyevomotion-0.1.1.dist-info/RECORD +0 -31
share/figure.pdf +0 -0
{pyevomotion-0.1.1.dist-info → pyevomotion-0.1.2.dist-info}/WHEEL +0 -0
{pyevomotion-0.1.1.dist-info → pyevomotion-0.1.2.dist-info}/entry_points.txt +0 -0

share/manuscript_figure.py CHANGED Viewed

@@ -10,6 +10,7 @@ import numpy as np
 import pandas as pd
 import matplotlib as mpl
 import matplotlib.pyplot as plt
+from matplotlib.colors import LinearSegmentedColormap
 #´:°•.°+.*•´.*:˚.°*.˚•´.°:°•.°•.*•´.*:˚.°*.˚•´.°:°•.°+.*•´.*:#
 #                         CONSTANTS                          #
@@ -20,6 +21,9 @@ COLORS = {
     "USA": "#FF6346",
 }
+# Control confidence interval plotting
+PLOT_CONFIDENCE_INTERVALS = False
 #´:°•.°+.*•´.*:˚.°*.˚•´.°:°•.°•.*•´.*:˚.°*.˚•´.°:°•.°+.*•´.*:#
 #                         FUNCTIONS                          #
 #.•°:°.´+˚.*°.˚:*.´•*.+°.•°:´*.´•*.•°.•°:°.´:•˚°.*°.˚:*.´+°.•#
@@ -169,36 +173,12 @@ def load_models() -> dict[str, dict[str, callable]]:
     return {
         "USA": {
-            "mean": [
-                lambda x: (
-                    _contents["USA"]["mean number of mutations per 7D model"]["parameters"]["m"]*x
-                    + _contents["USA"]["mean number of mutations per 7D model"]["parameters"]["b"]
-                ),
-                _contents["USA"]["mean number of mutations per 7D model"]["r2"],
-            ],
-            "var": [
-                lambda x: (
-                    _contents["USA"]["scaled var number of mutations per 7D model"]["parameters"]["d"]
-                    *(x**_contents["USA"]["scaled var number of mutations per 7D model"]["parameters"]["alpha"])
-                ),
-                _contents["USA"]["scaled var number of mutations per 7D model"]["r2"],
-            ]
+            "mean": list(_get_mean_model(_contents["USA"], "USA")),
+            "var": list(_get_var_model(_contents["USA"], "USA"))
         },
         "UK": {
-            "mean": [
-                lambda x: (
-                    _contents["UK"]["mean number of mutations per 7D model"]["parameters"]["m"]*x
-                    + _contents["UK"]["mean number of mutations per 7D model"]["parameters"]["b"]
-                ),
-                _contents["UK"]["mean number of mutations per 7D model"]["r2"],
-            ],
-            "var": [
-                lambda x: (
-                    _contents["UK"]["scaled var number of mutations per 7D model"]["parameters"]["d"]
-                    *(x**_contents["UK"]["scaled var number of mutations per 7D model"]["parameters"]["alpha"])
-                ),
-                _contents["UK"]["scaled var number of mutations per 7D model"]["r2"],
-            ]
+            "mean": list(_get_mean_model(_contents["UK"], "UK")),
+            "var": list(_get_var_model(_contents["UK"], "UK"))
         },
     }
@@ -211,6 +191,62 @@ def safe_map(f: callable, x: list[int | float]) -> list[int | float]:
             _results.append(None)
     return _results
+def _calculate_confidence_bounds(x_values: np.ndarray, model_func: callable, confidence_intervals: dict, model_type: str) -> tuple[np.ndarray, np.ndarray]:
+    """Calculate confidence interval bounds for a model.
+    :param x_values: X values to calculate bounds for
+    :type x_values: np.ndarray
+    :param model_func: The model function
+    :type model_func: callable
+    :param confidence_intervals: Dictionary of confidence intervals for parameters
+    :type confidence_intervals: dict
+    :param model_type: Type of model ('linear_mean', 'linear_var', 'power_law')
+    :type model_type: str
+    :return: Tuple of (lower_bounds, upper_bounds)
+    :rtype: tuple[np.ndarray, np.ndarray]
+    """
+    if not confidence_intervals:
+        # No confidence intervals available, return None bounds
+        return None, None
+    if model_type == "linear_mean":
+        # For linear mean model: mx + b
+        if "m" in confidence_intervals and "b" in confidence_intervals:
+            m_lower, m_upper = confidence_intervals["m"]
+            b_lower, b_upper = confidence_intervals["b"]
+            # Calculate bounds for the linear function
+            lower_bounds = m_lower * x_values + b_lower
+            upper_bounds = m_upper * x_values + b_upper
+            return lower_bounds, upper_bounds
+    elif model_type == "linear_var":
+        # For linear variance model: mx
+        if "m" in confidence_intervals:
+            m_lower, m_upper = confidence_intervals["m"]
+            lower_bounds = m_lower * x_values
+            upper_bounds = m_upper * x_values
+            return lower_bounds, upper_bounds
+    elif model_type == "power_law":
+        # For power law model: d*x^alpha
+        if "d" in confidence_intervals and "alpha" in confidence_intervals:
+            d_lower, d_upper = confidence_intervals["d"]
+            alpha_lower, alpha_upper = confidence_intervals["alpha"]
+            # For power law, we need to be careful about the bounds
+            # We'll use the parameter bounds to create approximate confidence bounds
+            lower_bounds = d_lower * (x_values ** alpha_lower)
+            upper_bounds = d_upper * (x_values ** alpha_upper)
+            return lower_bounds, upper_bounds
+    return None, None
 def plot_main_figure(df: pd.DataFrame, models: dict[str, any], export: bool = False, show: bool = True) -> None:
     set_matplotlib_global_params()
     fig, ax = plt.subplots(2, 1, figsize=(6, 10))
@@ -229,14 +265,48 @@ def plot_main_figure(df: pd.DataFrame, models: dict[str, any], export: bool = Fa
             )
             _x = np.arange(-10, 60, 0.5)
+            _x_shifted = _x + (8 if _country == "USA" else 0)
+            # Plot the main model line
             ax[idx].plot(
-                _x + (8 if _country == "USA" else 0),
+                _x_shifted,
                 safe_map(models[_country][case][0], _x),
                 color=COLORS[_country],
                 label=rf"{_country} ($R^2 = {round(models[_country][case][1], 2):.2f})$",
                 linewidth=3,
                 zorder=1,
             )
+            # Plot confidence intervals if available and enabled
+            if PLOT_CONFIDENCE_INTERVALS and len(models[_country][case]) > 2 and models[_country][case][2]:
+                confidence_intervals = models[_country][case][2]
+                # Determine model type for confidence interval calculation
+                if case == "mean":
+                    model_type = "linear_mean"
+                else:  # case == "var"
+                    # Check if it's linear or power law based on the model function
+                    # We'll determine this by checking if the model has 'alpha' parameter
+                    if "alpha" in confidence_intervals:
+                        model_type = "power_law"
+                    else:
+                        model_type = "linear_var"
+                lower_bounds, upper_bounds = _calculate_confidence_bounds(
+                    _x, models[_country][case][0], confidence_intervals, model_type
+                )
+                if lower_bounds is not None and upper_bounds is not None:
+                    # Plot confidence interval as filled area
+                    # The x-axis shift is already applied to _x_shifted, so we use the original bounds
+                    ax[idx].fill_between(
+                        _x_shifted,
+                        lower_bounds,
+                        upper_bounds,
+                        color=COLORS[_country],
+                        alpha=0.2,
+                        zorder=0,
+                    )
             # Styling
             ax[idx].set_xlim(-0.5, 40.5)
@@ -264,11 +334,11 @@ def plot_main_figure(df: pd.DataFrame, models: dict[str, any], export: bool = Fa
     if export:
         fig.savefig(
-            "share/figure.pdf",
+            "share/figure.eps",
             dpi=400,
             bbox_inches="tight",
         )
-        print("Figure saved as share/figure.pdf")
+        print("Figure saved as share/figure.eps")
     if show: plt.show()
@@ -382,8 +452,8 @@ def run_synthetic_data_tests() -> None:
     result1 = subprocess.run(
         [
             "PyEvoMotion",
-            "S1.fasta",
-            "S1.tsv",
+            "tests/data/test4/S1.fasta",
+            "tests/data/test4/S1.tsv",
             "tests/data/test4/synthdata1_out",
             "-ep"
         ],
@@ -401,8 +471,8 @@ def run_synthetic_data_tests() -> None:
     result2 = subprocess.run(
         [
             "PyEvoMotion",
-            "S2.fasta",
-            "S2.tsv",
+            "tests/data/test4/S2.fasta",
+            "tests/data/test4/S2.tsv",
             "tests/data/test4/synthdata2_out",
             "-ep"
         ],
@@ -433,6 +503,89 @@ def load_synthetic_data_df() -> pd.DataFrame:
         suffixes=(" synt1", " synt2"),
     )
+def _get_mean_model(data: dict, kind: str) -> tuple[callable, float, dict]:
+    """Extract mean model from data, handling both old and new formats.
+    :param data: The regression results data dictionary
+    :type data: dict
+    :param kind: The dataset kind identifier (for error messages)
+    :type kind: str
+    :return: Tuple of (lambda function, r2 value, confidence intervals)
+    :rtype: tuple[callable, float, dict]
+    """
+    # Try different possible key formats
+    possible_keys = [
+        "mean number of mutations model",  # New format (current)
+        "mean number of mutations per 7D model",  # Old format
+        "mean number of substitutions model",  # Alternative format
+        "mean number of substitutions per 7D model"  # Alternative old format
+    ]
+    for mean_key in possible_keys:
+        if mean_key in data:
+            params = data[mean_key]["parameters"]
+            r2 = data[mean_key]["r2"]
+            confidence_intervals = data[mean_key].get("confidence_intervals", {})
+            return lambda x: params["m"] * x + params["b"], r2, confidence_intervals
+    raise KeyError(f"Could not find mean model in {kind} data. Available keys: {list(data.keys())}")
+def _get_var_model(data: dict, kind: str) -> tuple[callable, float, dict]:
+    """Extract variance model from data, handling both old and new formats.
+    :param data: The regression results data dictionary
+    :type data: dict
+    :param kind: The dataset kind identifier (for error messages)
+    :type kind: str
+    :return: Tuple of (lambda function, r2 value, confidence intervals)
+    :rtype: tuple[callable, float, dict]
+    """
+    # Try different possible key formats
+    possible_keys = [
+        "scaled var number of mutations model",  # New format (current)
+        "scaled var number of mutations per 7D model",  # Old format
+        "scaled var number of substitutions model",  # Alternative format
+        "scaled var number of substitutions per 7D model"  # Alternative old format
+    ]
+    for var_key in possible_keys:
+        if var_key in data:
+            # Check if it has model_selection (new format)
+            if "model_selection" in data[var_key]:
+                # New format with model selection
+                model_selection = data[var_key]["model_selection"]
+                selected = model_selection["selected"]
+                if selected == "linear" and "linear_model" in data[var_key]:
+                    # Use linear model
+                    linear_model = data[var_key]["linear_model"]
+                    params = linear_model["parameters"]
+                    r2 = linear_model["r2"]
+                    confidence_intervals = linear_model.get("confidence_intervals", {})
+                    return lambda x: params["m"] * x, r2, confidence_intervals
+                elif selected == "power_law" and "power_law_model" in data[var_key]:
+                    # Use power law model
+                    power_law_model = data[var_key]["power_law_model"]
+                    params = power_law_model["parameters"]
+                    r2 = power_law_model["r2"]
+                    confidence_intervals = power_law_model.get("confidence_intervals", {})
+                    return lambda x: params["d"] * (x ** params["alpha"]), r2, confidence_intervals
+            else:
+                # Old format or new format without model selection - direct parameters
+                params = data[var_key]["parameters"]
+                r2 = data[var_key]["r2"]
+                confidence_intervals = data[var_key].get("confidence_intervals", {})
+                if "m" in params:
+                    # Linear model: mx
+                    return lambda x: params["m"] * x, r2, confidence_intervals
+                elif "d" in params and "alpha" in params:
+                    # Power law model: d*x^alpha
+                    return lambda x: params["d"] * (x ** params["alpha"]), r2, confidence_intervals
+    raise KeyError(f"Could not find variance model in {kind} data. Available keys: {list(data.keys())}")
 def load_synthetic_data_models() -> dict[str, dict[str, callable]]:
     if not check_synthetic_data_exists():
         run_synthetic_data_tests()
@@ -448,35 +601,12 @@ def load_synthetic_data_models() -> dict[str, dict[str, callable]]:
     return {
         "synt1": {
-            "mean": [
-                lambda x: (
-                    _contents["synt1"]["mean number of mutations per 7D model"]["parameters"]["m"]*x
-                    + _contents["synt1"]["mean number of mutations per 7D model"]["parameters"]["b"]
-                ),
-                _contents["synt1"]["mean number of mutations per 7D model"]["r2"],
-            ],
-            "var": [
-                lambda x: (
-                    _contents["synt1"]["scaled var number of mutations per 7D model"]["parameters"]["m"]*x
-                ),
-                _contents["synt1"]["scaled var number of mutations per 7D model"]["r2"],
-            ]
+            "mean": list(_get_mean_model(_contents["synt1"], "synt1")),
+            "var": list(_get_var_model(_contents["synt1"], "synt1"))
         },
         "synt2": {
-            "mean": [
-                lambda x: (
-                    _contents["synt2"]["mean number of mutations per 7D model"]["parameters"]["m"]*x
-                    + _contents["synt2"]["mean number of mutations per 7D model"]["parameters"]["b"]
-                ),
-                _contents["synt2"]["mean number of mutations per 7D model"]["r2"],
-            ],
-            "var": [
-                lambda x: (
-                    _contents["synt2"]["scaled var number of mutations per 7D model"]["parameters"]["d"]
-                    *(x**_contents["synt2"]["scaled var number of mutations per 7D model"]["parameters"]["alpha"])
-                ),
-                _contents["synt2"]["scaled var number of mutations per 7D model"]["r2"],
-            ]
+            "mean": list(_get_mean_model(_contents["synt2"], "synt2")),
+            "var": list(_get_var_model(_contents["synt2"], "synt2"))
         },
     }
@@ -513,6 +643,35 @@ def synthetic_data_plot(df: pd.DataFrame, models: dict[str, any], export: bool =
                 linewidth=3,
                 zorder=1,
             )
+            # Plot confidence intervals if available and enabled
+            if PLOT_CONFIDENCE_INTERVALS and len(models[_type.lower()][case]) > 2 and models[_type.lower()][case][2]:
+                confidence_intervals = models[_type.lower()][case][2]
+                # Determine model type for confidence interval calculation
+                if case == "mean":
+                    model_type = "linear_mean"
+                else:  # case == "var"
+                    # Check if it's linear or power law based on the model function
+                    if "alpha" in confidence_intervals:
+                        model_type = "power_law"
+                    else:
+                        model_type = "linear_var"
+                lower_bounds, upper_bounds = _calculate_confidence_bounds(
+                    _x, models[_type.lower()][case][0], confidence_intervals, model_type
+                )
+                if lower_bounds is not None and upper_bounds is not None:
+                    # Plot confidence interval as filled area
+                    ax[plot_idx].fill_between(
+                        _x,
+                        lower_bounds,
+                        upper_bounds,
+                        color="#76d6ff",
+                        alpha=0.2,
+                        zorder=0,
+                    )
             # Styling
             ax[plot_idx].set_xlim(-0.5, 40.5)
@@ -589,17 +748,19 @@ def load_additional_uk_models() -> dict[str, dict[str, callable]]:
         k: {
             "mean": [
                 {
-                    "m": _contents[k][f"mean number of mutations per {k} model"]["parameters"]["m"],
-                    "b": _contents[k][f"mean number of mutations per {k} model"]["parameters"]["b"]
+                    "m": _contents[k]["mean number of mutations model"]["parameters"]["m"],
+                    "b": _contents[k]["mean number of mutations model"]["parameters"]["b"]
                 },
-                _contents[k][f"mean number of mutations per {k} model"]["r2"]
+                _contents[k]["mean number of mutations model"]["r2"],
+                _contents[k]["mean number of mutations model"]["confidence_intervals"]
             ],
             "var": [
                 {
-                    "d": _contents[k][f"scaled var number of mutations per {k} model"]["parameters"]["d"],
-                    "alpha": _contents[k][f"scaled var number of mutations per {k} model"]["parameters"]["alpha"]
+                    "d": _contents[k]["scaled var number of mutations model"]["power_law_model"]["parameters"]["d"],
+                    "alpha": _contents[k]["scaled var number of mutations model"]["power_law_model"]["parameters"]["alpha"]
                 },
-                _contents[k][f"scaled var number of mutations per {k} model"]["r2"],
+                _contents[k]["scaled var number of mutations model"]["power_law_model"]["r2"],
+                _contents[k]["scaled var number of mutations model"]["power_law_model"]["confidence_intervals"]
             ]
         }
         for k in _files.keys()
@@ -624,18 +785,18 @@ def plot_uk_time_windows(stats: dict[str, pd.DataFrame], models: dict[str, dict[
     for idx, window in enumerate(windows):
         df = stats[window]
         model = models[window]
-        scaling = {
-            "5D": 5/7,
+        scaling = { # For the models to be comparable to the 7D model, we need to scale the x-axis by the square of the time window ratio
+            "5D": (5/7)**2,
             "7D": 1,
-            "10D": 10/7,
-            "14D": 14/7,
+            "10D": (10/7)**2,
+            "14D": (14/7)**2,
         }
         for idx2, case in enumerate(("mean", "var")):
             if case == "mean":
                 # Plot mean
                 ax[idx2, idx].scatter(
-                    df.index.to_numpy()*scaling[window],
+                    df["dt_idx"],
                     df["mean number of mutations"],
                     color=COLORS["UK"],
                     edgecolor="k",
@@ -645,17 +806,40 @@ def plot_uk_time_windows(stats: dict[str, pd.DataFrame], models: dict[str, dict[
                 _x = np.arange(-0.5, 51, 0.5)
                 ax[idx2, idx].plot(
                     _x,
-                    model["mean"][0]["m"]*(_x/scaling[window]) + model["mean"][0]["b"],
+                    model["mean"][0]["m"]*_x + model["mean"][0]["b"],
                     color=COLORS["UK"],
                     label=rf"Mean ($R^2 = {round(model['mean'][1], 2):.2f})$",
                     linewidth=3,
                     zorder=1,
                 )
+                # Plot confidence intervals if available and enabled
+                if PLOT_CONFIDENCE_INTERVALS and len(model["mean"]) > 2 and model["mean"][2]:
+                    confidence_intervals = model["mean"][2]
+                    # For mean, it's always linear model
+                    model_type = "linear_mean"
+                    # Calculate confidence bounds
+                    lower_bounds, upper_bounds = _calculate_confidence_bounds(
+                        _x, model["mean"][0], confidence_intervals, model_type
+                    )
+                    if lower_bounds is not None and upper_bounds is not None:
+                        # Plot confidence interval as filled area
+                        ax[idx2, idx].fill_between(
+                            _x,
+                            lower_bounds,
+                            upper_bounds,
+                            color=COLORS["UK"],
+                            alpha=0.2,
+                            zorder=0,
+                        )
             elif case == "var":
                 # Plot variance
                 ax[idx2, idx].scatter(
-                    df.index.to_numpy()*scaling[window],
+                    df["dt_idx"],
                     df["var number of mutations"] - df["var number of mutations"].min(),
                     color=COLORS["UK"],
                     edgecolor="k",
@@ -664,12 +848,36 @@ def plot_uk_time_windows(stats: dict[str, pd.DataFrame], models: dict[str, dict[
                 ax[idx2, idx].plot(
                     _x,
-                    model["var"][0]["d"]*(_x/scaling[window])**model["var"][0]["alpha"],
+                    model["var"][0]["d"]*(_x*scaling[window])**model["var"][0]["alpha"],
                     color=COLORS["UK"],
                     label=rf"Var ($R^2 = {round(model['var'][1], 2):.2f})$",
                     linewidth=3,
                     zorder=1,
                 )
+                # Plot confidence intervals if available and enabled
+                if PLOT_CONFIDENCE_INTERVALS and len(model["var"]) > 2 and model["var"][2]:
+                    confidence_intervals = model["var"][2]
+                    # For variance, it's always power law model
+                    model_type = "power_law"
+                    # Calculate confidence bounds for the scaled x values
+                    scaled_x = _x * scaling[window]
+                    lower_bounds, upper_bounds = _calculate_confidence_bounds(
+                        scaled_x, model["var"][0], confidence_intervals, model_type
+                    )
+                    if lower_bounds is not None and upper_bounds is not None:
+                        # Plot confidence interval as filled area
+                        ax[idx2, idx].fill_between(
+                            _x,
+                            lower_bounds,
+                            upper_bounds,
+                            color=COLORS["UK"],
+                            alpha=0.2,
+                            zorder=0,
+                        )
             # Styling
             ax[idx2, idx].set_xlim(-0.5, 40.5)
@@ -698,6 +906,165 @@ def plot_uk_time_windows(stats: dict[str, pd.DataFrame], models: dict[str, dict[
     if show:
         plt.show()
+def load_model_selection_results(directory: str) -> list[dict]:
+    """Load all regression results from a directory for model selection analysis.
+    This function recursively walks through the directory tree to find all
+    regression results JSON files, supporting both flat and nested directory structures.
+    Expected structure:
+        directory/
+        ├── {timestamp}/
+        │   ├── {dataset_01}/
+        │   │   └── *_out_regression_results.json
+        │   ├── {dataset_02}/
+        │   │   └── *_out_regression_results.json
+        │   └── ...
+        └── (or any nested structure)
+    :param directory: Root directory to search for regression results
+    :type directory: str
+    :return: List of dictionaries containing model selection information
+    :rtype: list[dict]
+    """
+    results = []
+    # Walk through directory tree to find all regression results files
+    # This works with any directory structure (flat or nested)
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            if file.endswith("out_regression_results.json"):
+                file_path = os.path.join(root, file)
+                try:
+                    with open(file_path, 'r') as f:
+                        data = json.load(f)
+                        # Extract the model selection info
+                        model_selection = data.get("scaled var number of substitutions model", {}).get("model_selection", {})
+                        results.append({
+                            'file': file_path,
+                            'selected_model': model_selection.get("selected", "unknown"),
+                            'linear_AIC': model_selection.get("linear_AIC", None),
+                            'power_law_AIC': model_selection.get("power_law_AIC", None),
+                            'delta_AIC_linear': model_selection.get("delta_AIC_linear", None),
+                            'delta_AIC_power_law': model_selection.get("delta_AIC_power_law", None),
+                            'akaike_weight_linear': model_selection.get("akaike_weight_linear", None),
+                            'akaike_weight_power_law': model_selection.get("akaike_weight_power_law", None)
+                        })
+                except Exception as e:
+                    print(f"Error loading {file_path}: {e}")
+    return results
+def create_confusion_matrix_plot(export: bool = False, show: bool = True) -> None:
+    """
+    Create a confusion matrix plot for model selection accuracy analysis.
+    Analyzes regression results from test5 synthetic datasets to assess
+    model selection accuracy. Works with organized directory structure:
+        tests/data/test5/
+        ├── linear/output/{timestamp}/
+        │   ├── linear_01/
+        │   │   └── linear_01_out_regression_results.json
+        │   ├── linear_02/
+        │   └── ...
+        └── powerlaw/output/{timestamp}/
+            ├── powerlaw_01/
+            └── ...
+    Args:
+        export: Whether to export the figure to share/confusion_matrix_heatmap.pdf
+        show: Whether to display the figure
+    """
+    set_matplotlib_global_params()
+    # Define paths - searches recursively through all subdirectories
+    base_path = "tests/data/test5"
+    linear_dir = os.path.join(base_path, "linear", "output")
+    powerlaw_dir = os.path.join(base_path, "powerlaw", "output")
+    print("Loading model selection results...")
+    # Load results from both directories
+    linear_results = load_model_selection_results(linear_dir)
+    powerlaw_results = load_model_selection_results(powerlaw_dir)
+    print(f"Loaded {len(linear_results)} linear results")
+    print(f"Loaded {len(powerlaw_results)} powerlaw results")
+    # Analyze results
+    linear_success = sum(1 for r in linear_results if r['selected_model'] == 'linear')
+    linear_failure = len(linear_results) - linear_success
+    powerlaw_success = sum(1 for r in powerlaw_results if r['selected_model'] == 'power_law')
+    powerlaw_failure = len(powerlaw_results) - powerlaw_success
+    # Create confusion matrix
+    # Format: [True Linear, False Linear], [False Powerlaw, True Powerlaw]
+    # Transpose to flip axes: true model on x-axis, predicted model on y-axis
+    confusion_matrix = np.array([
+        [linear_success, linear_failure],     # Predicted Linear: [True Linear, False Linear]
+        [powerlaw_failure, powerlaw_success]   # Predicted Powerlaw: [False Powerlaw, True Powerlaw]
+    ])
+    # Create the plot
+    fig, ax = plt.subplots(figsize=(8, 6))
+    # Create custom colormap from white to UK blue
+    colors = ['white', '#76d6ff']
+    n_bins = 30
+    cmap = LinearSegmentedColormap.from_list('custom', colors, N=n_bins)
+    # Create heatmap
+    im = ax.imshow(confusion_matrix, interpolation='nearest', cmap=cmap)
+    # Add colorbar
+    cbar = ax.figure.colorbar(im, ax=ax)
+    cbar.set_label('Count', rotation=270, labelpad=20)
+    # Set ticks and labels
+    ax.set_xticks([0, 1])
+    ax.set_yticks([0, 1])
+    ax.set_xticklabels(['Linear', 'Power Law'])  # Actual model (x-axis)
+    ax.set_yticklabels(['Linear', 'Power Law'])  # Predicted model (y-axis)
+    # Add text annotations
+    thresh = confusion_matrix.max() / 2.
+    for i in range(confusion_matrix.shape[0]):
+        for j in range(confusion_matrix.shape[1]):
+            ax.text(j, i, format(confusion_matrix[i, j], 'd'),
+                   ha="center", va="center",
+                   color="white" if confusion_matrix[i, j] > thresh else "black",
+                   fontsize=16, fontweight='bold')
+    # Labels and title
+    ax.set_xlabel('Actual Model', fontsize=16)
+    ax.set_ylabel('Predicted Model', fontsize=16)
+    ax.set_title('Model Selection Confusion Matrix', fontsize=18, fontweight='bold')
+    # Calculate and display accuracy
+    total_tests = len(linear_results) + len(powerlaw_results)
+    total_successes = linear_success + powerlaw_success
+    overall_accuracy = total_successes / total_tests if total_tests > 0 else 0
+    # Add accuracy text
+    ax.text(0.5, -0.25, f'Overall Accuracy: {overall_accuracy:.3f} ({total_successes}/{total_tests})',
+            transform=ax.transAxes, ha='center', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    if export:
+        fig.savefig(
+            "share/confusion_matrix_heatmap.pdf",
+            dpi=400,
+            bbox_inches="tight",
+        )
+        print("Confusion matrix saved as share/confusion_matrix_heatmap.pdf")
+    if show:
+        plt.show()
 #´:°•.°+.*•´.*:˚.°*.˚•´.°:°•.°•.*•´.*:˚.°*.˚•´.°:°•.°+.*•´.*:#
 #                           MAIN                             #
 #.•°:°.´+˚.*°.˚:*.´•*.+°.•°:´*.´•*.•°.•°:°.´:•˚°.*°.˚:*.´+°.•#
@@ -733,22 +1100,25 @@ def main(export: bool = False) -> None:
     # Main plot
     plot_main_figure(df, models, export=export)
-    # Size plot
+    # # Size plot
     size_plot(df, export=export)
-    # Anomalous diffusion plot
+    # # Anomalous diffusion plot
     anomalous_diffusion_plot(export=export)
-    # Synthetic data plot
+    # # Synthetic data plot
     synth_df = load_synthetic_data_df()
     synth_models = load_synthetic_data_models()
     synthetic_data_plot(synth_df, synth_models, export=export)
-    # UK time windows plot
+    # # UK time windows plot
     additional_uk_stats = load_additional_uk_stats()
     additional_uk_models = load_additional_uk_models()
     plot_uk_time_windows(additional_uk_stats, additional_uk_models, export=export)
+    # # Confusion matrix plot
+    create_confusion_matrix_plot(export=export)
 if __name__ == "__main__":

PyEvoMotion 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

PyEvoMotion 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl