PyPI - PyEvoMotion - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

PyEvoMotion 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

PyEvoMotion/cli.py +87 -3
PyEvoMotion/core/base.py +296 -20
PyEvoMotion/core/core.py +73 -24
{pyevomotion-0.1.1.dist-info → pyevomotion-0.1.2.dist-info}/METADATA +1 -1
pyevomotion-0.1.2.dist-info/RECORD +35 -0
share/analyze_model_selection_accuracy.py +316 -0
share/analyze_test_runs.py +436 -0
share/anomalous_diffusion.pdf +0 -0
share/confusion_matrix_heatmap.pdf +0 -0
share/figUK_plots.pdf +0 -0
share/figUK_regression_results.json +54 -7
share/figUK_run_args.json +1 -0
share/figUK_stats.tsv +41 -41
share/figUSA_plots.pdf +0 -0
share/figUSA_regression_results.json +54 -7
share/figUSA_run_args.json +1 -0
share/figUSA_stats.tsv +34 -34
share/generate_sequences_from_test5_data.py +107 -0
share/manuscript_figure.py +450 -80
share/run_parallel_analysis.py +196 -0
share/synth_figure.pdf +0 -0
share/uk_time_windows.pdf +0 -0
share/weekly_size.pdf +0 -0
pyevomotion-0.1.1.dist-info/RECORD +0 -31
share/figure.pdf +0 -0
{pyevomotion-0.1.1.dist-info → pyevomotion-0.1.2.dist-info}/WHEEL +0 -0
{pyevomotion-0.1.1.dist-info → pyevomotion-0.1.2.dist-info}/entry_points.txt +0 -0

PyEvoMotion/core/core.py CHANGED Viewed

@@ -433,7 +433,8 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
         length: int,
         show: bool = False,
         mutation_kind: str = "all",
-        export_plots_filename: str | None = None
+        export_plots_filename: str | None = None,
+        confidence_level: float = 0.95
     ) -> tuple[pd.DataFrame, dict[str,dict[str,any]]]:
         """
         Perform the global analysis of the data.
@@ -446,8 +447,10 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
         :type show: bool
         :param mutation_kind: The kind of mutation to compute the statistics for. Has to be one of ``all``, ``total``, ``substitutions`` or ``indels``. Default is ``all``.
         :type mutation_kind: str
-        :param export_plots: Filename to export the plots. Default is None and does not export the plots.
-        :type export_plots: str | None
+        :param export_plots_filename: Filename to export the plots. Default is None and does not export the plots.
+        :type export_plots_filename: str | None
+        :param confidence_level: Confidence level for parameter confidence intervals (default 0.95 for 95% CI).
+        :type confidence_level: float
         :return: The statistics and the regression models.
         :rtype: ``tuple[pd.DataFrame, dict[str, dict[str, any]]]``
         """
@@ -476,39 +479,49 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
                             stats.index, # Regression is given by the index, so in time, it is the same as multiplying by dt days
                             stats[col],
                             weights
-                        )
+                        ),
+                        confidence_level=confidence_level
                     )
                 }
             elif col.startswith("var"):
-                _single_regression = self.adjust_model(
+                _adjust_result = self.adjust_model(
                     stats.index,
                     stats[col] - stats[col].min(),
                     name=f"scaled {col} model",
-                    weights=weights.to_numpy().flatten()
+                    weights=weights.to_numpy().flatten(),
+                    confidence_level=confidence_level
                 )
+                # Extract the selected model for backward compatibility while preserving all model info
+                model_name = f"scaled {col} model"
+                full_result = _adjust_result[model_name]
+                selected_model = full_result["selected_model"]
+                # Store both the selected model (for backward compatibility) and full results
+                _single_regression = {
+                    model_name: selected_model,
+                    f"{model_name}_full_results": full_result
+                }
             # Save the regression model
             regs.update(_single_regression)
         # Add scaling correction to the regression models
         for k, v in regs.items():
-            if v["expression"] == "mx + b":
-                m = v["parameters"]["m"]
-                b = v["parameters"]["b"]
-                regs[k]["parameters"]["m"] = m/self.dt_ratio
-                m = regs[k]["parameters"]["m"]
-                regs[k]["model"] = lambda x: m*x + b
-            elif v["expression"] == "mx":
-                m = v["parameters"]["m"]
-                regs[k]["parameters"]["m"] = m/self.dt_ratio
-                m = regs[k]["parameters"]["m"]
-                regs[k]["model"] = lambda x: m*x
-            elif v["expression"] == "d*x^alpha":
-                d = v["parameters"]["d"]
-                alpha = v["parameters"]["alpha"]
-                regs[k]["parameters"]["d"] = d/(self.dt_ratio**alpha)
-                d = regs[k]["parameters"]["d"]
-                regs[k]["model"] = lambda x: d*(x**alpha)
+            # Skip full results entries - we'll handle them separately
+            if k.endswith("_full_results"):
+                continue
+            # Use the helper method for scaling correction
+            self._apply_scaling_correction_to_model(v)
+        # Apply scaling correction to all models in full results
+        for k, v in regs.items():
+            if k.endswith("_full_results"):
+                # Apply scaling to selected model
+                self._apply_scaling_correction_to_model(v["selected_model"])
+                # Apply scaling to linear model
+                self._apply_scaling_correction_to_model(v["linear_model"])
+                # Apply scaling to power law model
+                self._apply_scaling_correction_to_model(v["power_law_model"])
         # Sets of mutation types used in the analysis
         _sets = sorted({
@@ -561,4 +574,40 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
         return stats, regs
+    def _apply_scaling_correction_to_model(self, model: dict[str, any]) -> None:
+        """Apply scaling correction to a single model dictionary.
+        :param model: The model dictionary to apply scaling correction to
+        :type model: dict[str, any]
+        """
+        if model["expression"] == "mx + b":
+            m = model["parameters"]["m"]
+            b = model["parameters"]["b"]
+            model["parameters"]["m"] = m/self.dt_ratio
+            m = model["parameters"]["m"]
+            model["model"] = lambda x: m*x + b
+            # Update confidence intervals to match scaled parameters
+            if "confidence_intervals" in model:
+                m_ci_lower, m_ci_upper = model["confidence_intervals"]["m"]
+                model["confidence_intervals"]["m"] = (m_ci_lower/self.dt_ratio, m_ci_upper/self.dt_ratio)
+        elif model["expression"] == "mx":
+            m = model["parameters"]["m"]
+            model["parameters"]["m"] = m/self.dt_ratio
+            m = model["parameters"]["m"]
+            model["model"] = lambda x: m*x
+            # Update confidence intervals to match scaled parameters
+            if "confidence_intervals" in model:
+                m_ci_lower, m_ci_upper = model["confidence_intervals"]["m"]
+                model["confidence_intervals"]["m"] = (m_ci_lower/self.dt_ratio, m_ci_upper/self.dt_ratio)
+        elif model["expression"] == "d*x^alpha":
+            d = model["parameters"]["d"]
+            alpha = model["parameters"]["alpha"]
+            model["parameters"]["d"] = d/(self.dt_ratio**alpha)
+            d = model["parameters"]["d"]
+            model["model"] = lambda x: d*(x**alpha)
+            # Update confidence intervals to match scaled parameters
+            if "confidence_intervals" in model:
+                d_ci_lower, d_ci_upper = model["confidence_intervals"]["d"]
+                model["confidence_intervals"]["d"] = (d_ci_lower/(self.dt_ratio**alpha), d_ci_upper/(self.dt_ratio**alpha))

{pyevomotion-0.1.1.dist-info → pyevomotion-0.1.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: PyEvoMotion
-Version: 0.1.1
+Version: 0.1.2
 Summary: Evolutionary motion analysis tool
 Keywords: evolution,anomalous diffusion,bioinformatics
 Author: Lucas Goiriz

pyevomotion-0.1.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,35 @@
+PyEvoMotion/__init__.py,sha256=NqFDD-EZBzouzTwXozZqhPC9sLr7GQaElRKtP0tkHoE,568
+PyEvoMotion/cli.py,sha256=GnTBJDlKjXEtvqhT9bZxEHl-tq4e0QZYqWticCXklo0,18885
+PyEvoMotion/core/__init__.py,sha256=1I-NkFFh6ljLgB_mqQVFLNvCrVKEHLVxa_5dsv3ihWQ,450
+PyEvoMotion/core/base.py,sha256=L_uabRqGgAQy3mXs4QfzE05RuCz-my8ZJcTglsMAg7E,27931
+PyEvoMotion/core/core.py,sha256=RHkIoIYIfteA_zrKrLF9-XemPcenl_BSbHcCwz6Sg-M,22737
+PyEvoMotion/core/parser.py,sha256=w23KzX0jl3NLS0WYjAY1s_2VFEqfn6EoTrQXmGRRXfg,17323
+PyEvoMotion/utils.py,sha256=Ye3eL1RXZOZzzs2KZy0R45u06DOtLYo-zqE45tN2t7g,2859
+share/analyze_model_selection_accuracy.py,sha256=OnGKbmI515bIRdpYMNHGQ9SlZGmVQZi_tFnAX4g2Iyw,12846
+share/analyze_test_runs.py,sha256=AXdz-TdyK7DO1iT_FWrYsONYMs-2HYst_7fofcZ8wxQ,15534
+share/anomalous_diffusion.pdf,sha256=fWUvoxB2J9JRCRRjYEXtPNfJpR3ajbGfmCBiA5_-nzs,19384
+share/confusion_matrix_heatmap.pdf,sha256=GHfupvVgHF4msQjVohc_5KWXmVsPZDueml5zb4sL4Zo,23108
+share/figUK.tsv,sha256=DnPkVfbMGfsOq9x8IAkbPzIWsmQBB1hOXChSNkNlHGo,6711825
+share/figUK_plots.pdf,sha256=6PQw3ujMN0bfj8s8DMstadyoIYna3a2Upig-W1yHWOc,22664
+share/figUK_regression_results.json,sha256=Q-WpaqxNv7O7ZNKtnt9qmqEAifpmmRnarpT0zLCKAR4,1923
+share/figUK_run_args.json,sha256=k5NFbR0YNFBQ7M6TNpLT6G2vLml27iu8DDwGmwVWKqM,344
+share/figUK_stats.tsv,sha256=_gRc1-GYlGp4P7iUWtuiTeDy6cvYVbTX6SrXYUaNF-g,2252
+share/figUSA.tsv,sha256=hSPmKjCFk0a5B0XN75JhQOuhZgfcz9ZCxNrfqMrYcF0,6577552
+share/figUSA_plots.pdf,sha256=OsAseOotQGmFljxvgyqr4NGa4PXDPWo9UFAT47cjc-k,22066
+share/figUSA_regression_results.json,sha256=xgXn3paIYmZ2mEAunx-VKqAaMKotIz2CT8zDiHWlJt4,1918
+share/figUSA_run_args.json,sha256=N5ZBL9W0OcJEyMqcBq2dxSPr9vQUJsnQfflQMdTzmRw,347
+share/figUSA_stats.tsv,sha256=-lC1Gk_t4nB5vQXTjvhAfFS34ILjtkVEILarcDGWHZA,1913
+share/figdataUK.tsv,sha256=HMF07FNT7d3Tb2OMHuFYkRzc6vb5EQ6vj2nJBpXlXJ8,939837
+share/figdataUSA.tsv,sha256=z5yaIwcyfLo7Wr5ioE-x6_qXg9IhT_CmAJxcLTfP4jA,827811
+share/generate_sequences_from_synthdata.py,sha256=_2IdMgcOB7SxAq30iypA-VypSmZyZmMhA8otKQnkfAw,3443
+share/generate_sequences_from_test5_data.py,sha256=H1J4FQgndTSrRbXqEzaHMFI2JGX9oWDhwNnU7uwu534,4127
+share/mafft_install.sh,sha256=pCw70UsKkkNXUsZMwQlQ2b4zSXFrBA7jAj9iOfGLzUw,1007
+share/manuscript_figure.py,sha256=JVwFc236-KtZnmkve0PrPcmMPIeg94jUOaZGq7Z6_QM,41109
+share/run_parallel_analysis.py,sha256=D67RG0ze8xikOkOVeWm716ZNXUwaHR3O7flBioubgRg,6750
+share/synth_figure.pdf,sha256=cqjXjnd0Q14p6NQHLr58vprTot8edKvaWDoq6ZNTkis,26350
+share/uk_time_windows.pdf,sha256=JioGh4bHv8VROvGnySuJ0r7VAtr9ykWeH2M9eAinpSk,35022
+share/weekly_size.pdf,sha256=_YKsCKln8wpgNNwuu9_mFRQfkv4r0g3gLVaVK76MIj8,14602
+pyevomotion-0.1.2.dist-info/METADATA,sha256=Da84PseCS2zeTtQHMJArt584_k_jZLbNh7cCSISh7PI,7833
+pyevomotion-0.1.2.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
+pyevomotion-0.1.2.dist-info/entry_points.txt,sha256=UMzoojYwQi-713hRggkQXUIfGNygUARhTdGs77Usp7s,53
+pyevomotion-0.1.2.dist-info/RECORD,,

share/analyze_model_selection_accuracy.py ADDED Viewed

@@ -0,0 +1,316 @@
+#!/usr/bin/env python3
+"""
+Script to analyze model selection accuracy from test5 regression results.
+This script analyzes the out_regression_results.json files from both linear and powerlaw
+test datasets to compute accuracy metrics and create visualizations.
+Success criteria:
+- Linear datasets: success when "selected" field is "linear"
+- Powerlaw datasets: success when "selected" field is "power_law"
+"""
+import json
+import os
+import glob
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+from pathlib import Path
+from typing import Dict, List
+def load_regression_results(directory: str) -> List[Dict]:
+    """Load all regression results from a directory."""
+    results = []
+    pattern = os.path.join(directory, "**", "*out_regression_results.json")
+    for file_path in glob.glob(pattern, recursive=True):
+        try:
+            with open(file_path, 'r') as f:
+                data = json.load(f)
+                # Extract the model selection info
+                model_selection = data.get("scaled var number of substitutions model", {}).get("model_selection", {})
+                results.append({
+                    'file': file_path,
+                    'selected_model': model_selection.get("selected", "unknown"),
+                    'linear_AIC': model_selection.get("linear_AIC", None),
+                    'power_law_AIC': model_selection.get("power_law_AIC", None),
+                    'delta_AIC_linear': model_selection.get("delta_AIC_linear", None),
+                    'delta_AIC_power_law': model_selection.get("delta_AIC_power_law", None),
+                    'akaike_weight_linear': model_selection.get("akaike_weight_linear", None),
+                    'akaike_weight_power_law': model_selection.get("akaike_weight_power_law", None)
+                })
+        except Exception as e:
+            print(f"Error loading {file_path}: {e}")
+    return results
+def analyze_model_selection_accuracy():
+    """Analyze model selection accuracy and create visualizations."""
+    # Define paths
+    base_path = Path(__file__).parent.parent / "tests" / "data" / "test5"
+    linear_dir = base_path / "linear" / "output"
+    powerlaw_dir = base_path / "powerlaw" / "output"
+    print("Loading regression results...")
+    # Load results from both directories
+    linear_results = load_regression_results(str(linear_dir))
+    powerlaw_results = load_regression_results(str(powerlaw_dir))
+    print(f"Loaded {len(linear_results)} linear results")
+    print(f"Loaded {len(powerlaw_results)} powerlaw results")
+    # Analyze linear dataset results
+    linear_success = sum(1 for r in linear_results if r['selected_model'] == 'linear')
+    linear_failure = len(linear_results) - linear_success
+    # Analyze powerlaw dataset results
+    powerlaw_success = sum(1 for r in powerlaw_results if r['selected_model'] == 'power_law')
+    powerlaw_failure = len(powerlaw_results) - powerlaw_success
+    # Create summary table
+    summary_data = {
+        'Dataset Type': ['Linear', 'Powerlaw'],
+        'Total Tests': [len(linear_results), len(powerlaw_results)],
+        'Successes': [linear_success, powerlaw_success],
+        'Failures': [linear_failure, powerlaw_failure],
+        'Success Rate': [linear_success/len(linear_results) if linear_results else 0,
+                        powerlaw_success/len(powerlaw_results) if powerlaw_results else 0]
+    }
+    df = pd.DataFrame(summary_data)
+    print("\nModel Selection Accuracy Summary:")
+    print("=" * 50)
+    print(df.to_string(index=False, float_format='%.3f'))
+    # Calculate overall accuracy metrics
+    total_tests = len(linear_results) + len(powerlaw_results)
+    total_successes = linear_success + powerlaw_success
+    overall_accuracy = total_successes / total_tests if total_tests > 0 else 0
+    # Calculate precision and recall for each model type
+    # For linear: TP = linear_success, FP = powerlaw_failure, FN = linear_failure, TN = powerlaw_success
+    linear_tp = linear_success
+    linear_fp = powerlaw_failure  # Powerlaw datasets incorrectly classified as linear
+    linear_fn = linear_failure    # Linear datasets incorrectly classified as powerlaw
+    linear_tn = powerlaw_success  # Powerlaw datasets correctly classified as powerlaw
+    # For powerlaw: TP = powerlaw_success, FP = linear_failure, FN = powerlaw_failure, TN = linear_success
+    powerlaw_tp = powerlaw_success
+    powerlaw_fp = linear_failure  # Linear datasets incorrectly classified as powerlaw
+    powerlaw_fn = powerlaw_failure  # Powerlaw datasets incorrectly classified as linear
+    powerlaw_tn = linear_success  # Linear datasets correctly classified as linear
+    # Calculate metrics
+    linear_precision = linear_tp / (linear_tp + linear_fp) if (linear_tp + linear_fp) > 0 else 0
+    linear_recall = linear_tp / (linear_tp + linear_fn) if (linear_tp + linear_fn) > 0 else 0
+    linear_specificity = linear_tn / (linear_tn + linear_fp) if (linear_tn + linear_fp) > 0 else 0
+    powerlaw_precision = powerlaw_tp / (powerlaw_tp + powerlaw_fp) if (powerlaw_tp + powerlaw_fp) > 0 else 0
+    powerlaw_recall = powerlaw_tp / (powerlaw_tp + powerlaw_fn) if (powerlaw_tp + powerlaw_fn) > 0 else 0
+    powerlaw_specificity = powerlaw_tn / (powerlaw_tn + powerlaw_fp) if (powerlaw_tn + powerlaw_fp) > 0 else 0
+    # F1 scores
+    linear_f1 = 2 * (linear_precision * linear_recall) / (linear_precision + linear_recall) if (linear_precision + linear_recall) > 0 else 0
+    powerlaw_f1 = 2 * (powerlaw_precision * powerlaw_recall) / (powerlaw_precision + powerlaw_recall) if (powerlaw_precision + powerlaw_recall) > 0 else 0
+    print(f"\nOverall Accuracy: {overall_accuracy:.3f} ({total_successes}/{total_tests})")
+    print("\nDetailed Metrics:")
+    print("=" * 50)
+    metrics_data = {
+        'Model Type': ['Linear', 'Powerlaw'],
+        'Precision': [linear_precision, powerlaw_precision],
+        'Recall (Sensitivity)': [linear_recall, powerlaw_recall],
+        'Specificity': [linear_specificity, powerlaw_specificity],
+        'F1-Score': [linear_f1, powerlaw_f1]
+    }
+    metrics_df = pd.DataFrame(metrics_data)
+    print(metrics_df.to_string(index=False, float_format='%.3f'))
+    # Create confusion matrix data
+    confusion_matrix = np.array([
+        [linear_tp, linear_fp],    # True Linear, False Linear
+        [linear_fn, linear_tn]     # False Powerlaw, True Powerlaw
+    ])
+    print(f"\nConfusion Matrix:")
+    print("=" * 30)
+    print("                Predicted")
+    print("              Linear  Powerlaw")
+    print(f"Actual Linear   {linear_tp:3d}     {linear_fp:3d}")
+    print(f"       Powerlaw {linear_fn:3d}     {linear_tn:3d}")
+    # Create visualizations
+    create_bar_chart(summary_data, overall_accuracy)
+    create_confusion_matrix_heatmap(confusion_matrix)
+    create_metrics_comparison(metrics_data)
+    # Save detailed results
+    save_detailed_results(linear_results, powerlaw_results, summary_data, metrics_data, overall_accuracy)
+    return df, metrics_df, overall_accuracy
+def create_bar_chart(summary_data: Dict, overall_accuracy: float):
+    """Create a bar chart showing success rates."""
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
+    # Bar chart for success/failure counts
+    x = np.arange(len(summary_data['Dataset Type']))
+    width = 0.35
+    bars1 = ax1.bar(x - width/2, summary_data['Successes'], width, label='Successes', color='green', alpha=0.7)
+    bars2 = ax1.bar(x + width/2, summary_data['Failures'], width, label='Failures', color='red', alpha=0.7)
+    ax1.set_xlabel('Dataset Type')
+    ax1.set_ylabel('Number of Tests')
+    ax1.set_title('Model Selection Results by Dataset Type')
+    ax1.set_xticks(x)
+    ax1.set_xticklabels(summary_data['Dataset Type'])
+    ax1.legend()
+    ax1.grid(True, alpha=0.3)
+    # Add value labels on bars
+    for bar in bars1:
+        height = bar.get_height()
+        ax1.text(bar.get_x() + bar.get_width()/2., height + 0.1,
+                f'{int(height)}', ha='center', va='bottom')
+    for bar in bars2:
+        height = bar.get_height()
+        ax1.text(bar.get_x() + bar.get_width()/2., height + 0.1,
+                f'{int(height)}', ha='center', va='bottom')
+    # Success rate bar chart
+    bars3 = ax2.bar(summary_data['Dataset Type'], summary_data['Success Rate'],
+                   color=['blue', 'orange'], alpha=0.7)
+    # Add overall accuracy line
+    ax2.axhline(y=overall_accuracy, color='red', linestyle='--', linewidth=2,
+               label=f'Overall Accuracy: {overall_accuracy:.3f}')
+    ax2.set_xlabel('Dataset Type')
+    ax2.set_ylabel('Success Rate')
+    ax2.set_title('Model Selection Success Rates')
+    ax2.set_ylim(0, 1)
+    ax2.legend()
+    ax2.grid(True, alpha=0.3)
+    # Add value labels on bars
+    for bar in bars3:
+        height = bar.get_height()
+        ax2.text(bar.get_x() + bar.get_width()/2., height + 0.01,
+                f'{height:.3f}', ha='center', va='bottom')
+    plt.tight_layout()
+    plt.savefig('model_selection_accuracy_chart.pdf', dpi=300, bbox_inches='tight')
+def create_confusion_matrix_heatmap(confusion_matrix: np.ndarray):
+    """Create a heatmap of the confusion matrix."""
+    fig, ax = plt.subplots(figsize=(8, 6))
+    im = ax.imshow(confusion_matrix, interpolation='nearest', cmap='Blues')
+    ax.figure.colorbar(im, ax=ax)
+    # Set ticks and labels
+    ax.set_xticks([0, 1])
+    ax.set_yticks([0, 1])
+    ax.set_xticklabels(['Linear', 'Powerlaw'])
+    ax.set_yticklabels(['Linear', 'Powerlaw'])
+    # Add text annotations
+    thresh = confusion_matrix.max() / 2.
+    for i in range(confusion_matrix.shape[0]):
+        for j in range(confusion_matrix.shape[1]):
+            ax.text(j, i, format(confusion_matrix[i, j], 'd'),
+                   ha="center", va="center",
+                   color="white" if confusion_matrix[i, j] > thresh else "black")
+    ax.set_xlabel('Predicted Label')
+    ax.set_ylabel('True Label')
+    ax.set_title('Confusion Matrix: Model Selection Results')
+    plt.tight_layout()
+    plt.savefig('share/confusion_matrix_heatmap.pdf', dpi=300, bbox_inches='tight')
+def create_metrics_comparison(metrics_data: Dict):
+    """Create a comparison chart of different metrics."""
+    fig, ax = plt.subplots(figsize=(12, 8))
+    x = np.arange(len(metrics_data['Model Type']))
+    width = 0.2
+    metrics = ['Precision', 'Recall (Sensitivity)', 'Specificity', 'F1-Score']
+    colors = ['blue', 'green', 'orange', 'red']
+    for i, (metric, color) in enumerate(zip(metrics, colors)):
+        values = metrics_data[metric]
+        ax.bar(x + i * width, values, width, label=metric, color=color, alpha=0.7)
+    ax.set_xlabel('Model Type')
+    ax.set_ylabel('Score')
+    ax.set_title('Model Selection Performance Metrics Comparison')
+    ax.set_xticks(x + width * 1.5)
+    ax.set_xticklabels(metrics_data['Model Type'])
+    ax.legend()
+    ax.set_ylim(0, 1)
+    ax.grid(True, alpha=0.3)
+    # Add value labels on bars
+    for i, metric in enumerate(metrics):
+        values = metrics_data[metric]
+        for j, value in enumerate(values):
+            ax.text(j + i * width, value + 0.01, f'{value:.3f}',
+                   ha='center', va='bottom', fontsize=9)
+    plt.tight_layout()
+    plt.savefig('share/metrics_comparison_chart.pdf', dpi=300, bbox_inches='tight')
+def save_detailed_results(linear_results: List[Dict], powerlaw_results: List[Dict],
+                         summary_data: Dict, metrics_data: Dict, overall_accuracy: float):
+    """Save detailed results to JSON file."""
+    results = {
+        'overall_accuracy': overall_accuracy,
+        'summary': summary_data,
+        'metrics': metrics_data,
+        'linear_results': linear_results,
+        'powerlaw_results': powerlaw_results,
+        'analysis_timestamp': pd.Timestamp.now().isoformat()
+    }
+    with open('model_selection_analysis_results.json', 'w') as f:
+        json.dump(results, f, indent=2, default=str)
+    print(f"Detailed results saved as 'model_selection_analysis_results.json'")
+if __name__ == "__main__":
+    print("Model Selection Accuracy Analysis")
+    print("=" * 40)
+    print("Analyzing regression results from test5 datasets...")
+    print("Success criteria:")
+    print("- Linear datasets: success when 'selected' = 'linear'")
+    print("- Powerlaw datasets: success when 'selected' = 'power_law'")
+    print()
+    try:
+        summary_df, metrics_df, accuracy = analyze_model_selection_accuracy()
+        print(f"\nAnalysis complete! Overall accuracy: {accuracy:.3f}")
+    except Exception as e:
+        print(f"Error during analysis: {e}")
+        import traceback
+        traceback.print_exc()

PyEvoMotion 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

PyEvoMotion 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl