PyPI - PyEvoMotion - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

PyEvoMotion 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

PyEvoMotion/cli.py +1 -8
PyEvoMotion/core/base.py +83 -20
PyEvoMotion/core/core.py +82 -38
PyEvoMotion/core/parser.py +4 -1
{pyevomotion-0.1.0.dist-info → pyevomotion-0.1.1.dist-info}/METADATA +72 -4
pyevomotion-0.1.1.dist-info/RECORD +31 -0
share/anomalous_diffusion.pdf +0 -0
share/figUK.tsv +9949 -0
share/figUK_plots.pdf +0 -0
share/figUK_regression_results.json +18 -0
share/figUK_run_args.json +13 -0
share/figUK_stats.tsv +41 -0
share/figUSA.tsv +9470 -0
share/figUSA_plots.pdf +0 -0
share/figUSA_regression_results.json +18 -0
share/figUSA_run_args.json +13 -0
share/figUSA_stats.tsv +34 -0
share/figdataUK.tsv +10001 -0
share/figdataUSA.tsv +10001 -0
share/figure.pdf +0 -0
share/generate_sequences_from_synthdata.py +85 -0
share/manuscript_figure.py +457 -12
share/synth_figure.pdf +0 -0
share/uk_time_windows.pdf +0 -0
share/weekly_size.pdf +0 -0
pyevomotion-0.1.0.dist-info/RECORD +0 -13
{pyevomotion-0.1.0.dist-info → pyevomotion-0.1.1.dist-info}/WHEEL +0 -0
{pyevomotion-0.1.0.dist-info → pyevomotion-0.1.1.dist-info}/entry_points.txt +0 -0

PyEvoMotion/cli.py CHANGED Viewed

@@ -255,13 +255,6 @@ def _parse_arguments() -> argparse.Namespace:
         default=0,
         help="Length filter for the sequences (removes sequences with length less than the specified value). Default is 0."
     )
-    parser.add_argument(
-        "-n",
-        "--n_threshold",
-        type=int,
-        default=2,
-        help="Minimum number of sequences required in a time interval to compute statistics. Default is 2."
-    )
     parser.add_argument(
         "-xj",
         "--export_json",
@@ -407,7 +400,6 @@ def _main():
     # Runs the analysis
     stats, reg = instance.analysis(
         length=args.length_filter,
-        n_threshold=args.n_threshold,
         show=args.show,
         mutation_kind=args.kind,
         export_plots_filename=(
@@ -432,6 +424,7 @@ def _main():
     # Exports the regression models to a JSON file
     with open(f"{args.out}_regression_results.json", "w") as file:
         json.dump(_reg, file, indent=4)
+    print(f"Regression results saved to {args.out}_regression_results.json")
     # Exits the program with code 0 (success)
     exit(0)

PyEvoMotion/core/base.py CHANGED Viewed

@@ -102,7 +102,7 @@ class PyEvoMotionBase():
             print(f"Method {method} not found in {instance}")
     @staticmethod
-    def _remove_nan(x: pd.Series, y: pd.Series) -> tuple[np.ndarray, np.ndarray]:
+    def _remove_nan(x: pd.Series, y: pd.Series, z: pd.Series) -> tuple[np.ndarray, np.ndarray]:
         """
         Remove NaN values from two pandas Series and return them as numpy arrays.
@@ -110,22 +110,40 @@ class PyEvoMotionBase():
         :type x: pd.Series
         :param y: the second pandas Series.
         :type y: pd.Series
+        :param z: the third pandas Series.
+        :type z: pd.Series
         :return: a tuple with the two pandas Series without NaN values.
         :rtype: tuple[np.ndarray,np.ndarray]
         """
-        data = pd.DataFrame({"x": x, "y": y}).dropna()
+        data = pd.DataFrame({"x": x, "y": y, "z": z}).dropna()
         x = data["x"].to_numpy().reshape(-1, 1)
         y = data["y"].to_numpy().reshape(-1, 1)
+        z = data["z"].to_numpy().reshape(-1, 1)
+        return x, y, z
-        return x, y
+    @staticmethod
+    def _weighting_function(n: int, n_0: int = 30) -> np.ndarray:
+        """
+        Weighting function for the data points.
+        :param n: The number of data points.
+        :type n: int
+        :param n_0: The number of data points at which the weighting function approximates the constant 1. Default is 30.
+        :type n_0: int
+        :return: The weighting function.
+        :rtype: np.ndarray
+        """
+        return np.tanh(2*n/n_0)
     @classmethod
     def linear_regression(cls,
         x: np.ndarray,
         y: np.ndarray,
-        fit_intercept=True
+        weights: np.ndarray | None = None,
+        fit_intercept: bool = True
     ) -> dict[str, any]:
         """
         Perform a linear regression on a set of data.
@@ -136,6 +154,8 @@ class PyEvoMotionBase():
         :type y: np.ndarray
         :param fit_intercept: Whether to fit the intercept. Default is ``True``.
         :type fit_intercept: bool
+        :param weights: Optional weights for the data points. If provided, points with higher weights will have more influence on the fit. These weights are scaled by the weighting function tanh(2*n/n_0), where n is the number of data points and n_0 is the number of data points at which the weighting function approximates the constant 1. Default is ``None``.
+        :type weights: np.ndarray | None
         :return: A dictionary containing:
             * ``model``: A ``lambda`` function that computes predictions based on the fitted model.
@@ -145,7 +165,9 @@ class PyEvoMotionBase():
         :rtype: ``dict[str, any]``
         """
-        reg = LinearRegression(fit_intercept=fit_intercept).fit(x,y)
+        _weights = cls._weighting_function(weights).flatten() if weights is not None else None
+        reg = LinearRegression(fit_intercept=fit_intercept).fit(x, y, sample_weight=_weights)
         if fit_intercept:
             model = {
@@ -166,7 +188,7 @@ class PyEvoMotionBase():
                 "expression": "mx"
             }
-        model["r2"] = r2_score(y, reg.predict(x))
+        model["r2"] = r2_score(y, reg.predict(x), sample_weight=_weights)
         return model
@@ -192,7 +214,7 @@ class PyEvoMotionBase():
         return a*np.power(x, b)
     @classmethod
-    def power_law_fit(cls, x: np.ndarray, y: np.ndarray) -> dict[str, any]:
+    def power_law_fit(cls, x: np.ndarray, y: np.ndarray, weights: np.ndarray | None = None) -> dict[str, any]:
         """
         Perform a power law fit on a set of data.
@@ -200,6 +222,8 @@ class PyEvoMotionBase():
         :type x: np.ndarray
         :param y: A numpy array of the target.
         :type y: np.ndarray
+        :param weights: Optional weights for the data points. If provided, points with higher weights will have more influence on the fit. These weights are scaled by the weighting function tanh(2*n/n_0), where n is the number of data points and n_0 is the number of data points at which the weighting function approximates the constant 1. Default is ``None``.
+        :type weights: np.ndarray | None
         :return: A dictionary containing:
             * ``model``: A ``lambda`` function that computes predictions based on the fitted model.
@@ -209,10 +233,13 @@ class PyEvoMotionBase():
         :rtype: ``dict[str, any]``
         """
+        _weights = cls._weighting_function(weights).flatten() if weights is not None else None
         try:
             _popt, _, _, _msg, _ier = curve_fit(
                 cls._power_law,
                 x.T.tolist()[0], y.T.tolist()[0],
+                sigma=1/np.sqrt(_weights) if _weights is not None else None,
                 full_output=True
             )
         except RuntimeError as e:
@@ -230,16 +257,18 @@ class PyEvoMotionBase():
                 "alpha": _popt[1]
             },
             "expression": "d*x^alpha",
-            "r2": r2_score(y, cls._power_law(x, *_popt))
+            "r2": r2_score(y, cls._power_law(x, *_popt), sample_weight=_weights)
         }
         return model
-    @staticmethod
+    @classmethod
     def F_test(
+        cls,
         model1: dict[str,any],
         model2: dict[str,any],
-        data: np.ndarray
+        data: np.ndarray,
+        weights: np.ndarray | None = None
     ) -> tuple[float, float]:
         """
         Perform an F-test between two models.
@@ -257,6 +286,11 @@ class PyEvoMotionBase():
         """
         data = data.flatten()
+        if weights is not None:
+            _weights = cls._weighting_function(weights.flatten())
+        else:
+            _weights = np.ones(len(data))
         # Note that p1 < p2 always. Won't do an assertion because I'm making sure elsewhere that the linear model does not have an intercept, i.e. it only has the slope
         p1 = len(model1["parameters"])
@@ -278,8 +312,8 @@ class PyEvoMotionBase():
         )
         # Sum the residuals without the infinite values
-        RSS1 = RS1.sum(where=~mask)
-        RSS2 = RS2.sum(where=~mask)
+        RSS1 = np.sum(_weights*RS1, where=~mask)
+        RSS2 = np.sum(_weights*RS2, where=~mask)
         F = ((RSS1 - RSS2)/(p2 - p1))/(RSS2/(n - p2))
@@ -289,7 +323,8 @@ class PyEvoMotionBase():
     def adjust_model(cls,
         x: pd.Series,
         y: pd.Series,
-        name: str = None
+        name: str = None,
+        weights: pd.Series | None = None
     ) -> dict[str, any]:
         """Adjust a model to the data.
@@ -299,12 +334,14 @@ class PyEvoMotionBase():
         :type y: pd.Series
         :param name: The name of the data. Default is ``None``.
         :type name: str
+        :param weights: Optional weights for the data points. If provided, points with higher weights will have more influence on the fit. These weights are scaled by the weighting function tanh(2*n/n_0), where n is the number of data points and n_0 is the number of data points at which the weighting function approximates the constant 1. Default is ``None``.
+        :type weights: np.ndarray | None
         :return: A dictionary with the model.
         :rtype: ``dict[str, any]``
         :raises ValueError: If the dataset is empty or full of NaN values. This may occur if the grouped data contains only one entry per group, indicating that the variance cannot be computed.
         """
-        x,y = cls._remove_nan(x, y)
+        x,y,w = cls._remove_nan(x, y, weights)
         # Raises an error if the dataset is (almost) empty at this point
         if (x.size <= 1) or (y.size <= 1):
@@ -313,10 +350,10 @@ class PyEvoMotionBase():
                 f"Dataset length after filtering is: x: {x.size} elements; y: {y.size} elements. In particular:\n\nx: {x}\ny: {y}\n\nPerhaps NaN appeared for certain entries. Check if the grouped data contains only one entry per group, as this may cause NaN values when computing the variance. Also, consider widening the time window."
             )
-        model1 = cls.linear_regression(x, y, fit_intercept=False) # Not fitting the intercept because data is passed scaled to the minimum
-        model2 = cls.power_law_fit(x, y)
+        model1 = cls.linear_regression(x, y, weights=w, fit_intercept=False) # Not fitting the intercept because data is passed scaled to the minimum
+        model2 = cls.power_law_fit(x, y, weights=w)
-        _, p = cls.F_test(model1, model2, y)
+        _, p = cls.F_test(model1, model2, y, weights=w)
         if p < 0.05:
             model = model2
@@ -337,6 +374,7 @@ class PyEvoMotionBase():
         model_label: str,
         data_xlabel_units: str,
         ax: any,
+        dt_ratio: float,
         **kwargs: dict[str, any]
     ) -> None:
         """
@@ -376,13 +414,13 @@ class PyEvoMotionBase():
                 point_kwargs[_k] = kwargs[k]
         ax.scatter(
-            data_x,
+            data_x.to_numpy()*dt_ratio,
             data_y,
             **point_kwargs
         )
         ax.plot(
-            data_x,
-            model(data_x),
+            data_x.to_numpy()*dt_ratio,
+            model(data_x.to_numpy()*dt_ratio),
             label=model_label,
             **line_kwargs
         )
@@ -404,3 +442,28 @@ class PyEvoMotionBase():
             raise ValueError(
                 f"The dataset is (almost) empty at this point of the analysis.\n{msg}"
             )
+    @staticmethod
+    def _get_time_ratio(dt: str, reference: str = "7D") -> float:
+        """Get the ratio of a time interval with respect to a reference interval.
+        :param dt: Time interval string (e.g. "5D", "7D", "10D", "14D", "12H")
+        :type dt: str
+        :param reference: Reference time interval string. Default is "7D".
+        :type reference: str
+        :return: The ratio of dt to reference
+        :rtype: float
+        """
+        return pd.Timedelta(dt) / pd.Timedelta(reference)
+    @classmethod
+    def _verify_dt(cls, dt: str) -> None:
+        """Verify that the time window string is greater than 1 day.
+        :param dt: Time window string (e.g. "5D", "7D", "10D", "14D")
+        :type dt: str
+        :raises ValueError: If the time window is not greater than 1 day
+        """
+        if cls._get_time_ratio(dt, "1D") <= 1:
+            raise ValueError(f"Time window must be greater than 1 day. Got {dt}")

PyEvoMotion/core/core.py CHANGED Viewed

@@ -62,7 +62,9 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
         :type date_range: tuple[str] | None
         """
+        self._verify_dt(dt)
         self.dt = dt
+        self.dt_ratio = self._get_time_ratio(dt)
         # Parse the input fasta and metadata files
         super().__init__(
@@ -89,7 +91,8 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
     def plot_results(cls,
         stats: pd.DataFrame,
         regs: dict[str, dict[str, any]],
-        data_xlabel_units: str
+        data_xlabel_units: str,
+        dt_ratio: float
     ) -> None:
         """
         Plot the results of the analysis.
@@ -110,7 +113,7 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
             for k,v in regs.items()
             if k.startswith("mean")
         )
-        _mean_data = stats[stats.columns[1]]
+        _mean_data = stats[stats.columns[2]]
         cls.plot_single_data_and_model(
             stats.index,
             _mean_data,
@@ -118,7 +121,8 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
             _model["model"],
             r"$r^2$: " + f"{_model['r2']:.2f}",
             data_xlabel_units,
-            ax[0]
+            ax[0],
+            dt_ratio=dt_ratio
         )
         # Variance
@@ -127,7 +131,7 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
             for k,v in regs.items()
             if k.startswith("scaled var")
         )
-        _variance_data = stats[stats.columns[2]]
+        _variance_data = stats[stats.columns[3]]
         cls.plot_single_data_and_model(
             stats.index,
             _variance_data,
@@ -135,7 +139,8 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
             _model["model"],
             r"$r^2$: " + f"{_model['r2']:.2f}",
             data_xlabel_units,
-            ax[1]
+            ax[1],
+            dt_ratio=dt_ratio
         )
         # Dispersion index
@@ -147,6 +152,7 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
             "Poissonian regime",
             data_xlabel_units,
             ax[2],
+            dt_ratio=dt_ratio,
             line_linestyle="--",
             line_color="black"
         )
@@ -159,6 +165,7 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
         stats: pd.DataFrame,
         regs: dict[str, dict[str, any]],
         data_xlabel_units: str,
+        dt_ratio: float,
         output_ptr: str | None = None
     ) -> None:
         """
@@ -183,7 +190,7 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
             for k,v in regs.items()
             if k.startswith("mean")
         )
-        _mean_data = stats[stats.columns[1]]
+        _mean_data = stats[stats.columns[2]]
         cls.plot_single_data_and_model(
             stats.index,
             _mean_data,
@@ -191,7 +198,8 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
             _model["model"],
             r"$r^2$: " + f"{_model['r2']:.2f}",
             data_xlabel_units,
-            plt.gca()
+            plt.gca(),
+            dt_ratio=dt_ratio
         )
         plt.title(_mean_data.name)
@@ -205,7 +213,7 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
             for k,v in regs.items()
             if k.startswith("scaled var")
         )
-        _variance_data = stats[stats.columns[2]]
+        _variance_data = stats[stats.columns[3]]
         cls.plot_single_data_and_model(
             stats.index,
             _variance_data,
@@ -213,7 +221,8 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
             lambda x: _model["model"](x) + _variance_data.min(), # Adjust the model to the original variance
             r"$r^2$: " + f"{_model['r2']:.2f}",
             data_xlabel_units,
-            plt.gca()
+            plt.gca(),
+            dt_ratio=dt_ratio
         )
         plt.title(_variance_data.name)
@@ -232,6 +241,7 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
             "Poissonian regime",
             data_xlabel_units,
             plt.gca(),
+            dt_ratio=dt_ratio,
             line_linestyle="--",
             line_color="black"
         )
@@ -360,7 +370,6 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
     def compute_stats(self,
         DT: str,
         origin: str,
-        n_threshold: int | None = None,
         mutation_kind: str = "all"
     ) -> pd.DataFrame:
         """
@@ -372,31 +381,37 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
         :type DT: str
         :param origin: The string datetime that will be the origin of the grouping.
         :type origin: str
-        :param n_threshold: Minimum number of sequences required in a time interval to compute statistics.
-        :type n_threshold: int | None
         :param mutation_kind: The kind of mutation to compute the statistics for. Has to be one of ``all``, ``total``, ``substitutions``, ``insertions``, ``deletions`` or ``indels``. Default is ``all``.
         :return: The statistics of the data.
         :rtype: ``pd.DataFrame``
         """
-        grouped = self.date_grouper(self.data, DT, origin)
+        # Create a local copy of the data
+        _data = self.data.copy()
-        # Only keep weeks where the number of observations is greater than the threshold
-        if n_threshold:
+        # If the very first row's date is the same as the origin, and there happens to be only one entry for that date, duplicate that row; this way the stats for the first week can be computed (with variance = 0 of course)
+        if _data.iloc[0]["date"] == origin and len(_data[_data["date"] == origin]) == 1:
+            _data = pd.concat([_data, pd.DataFrame([_data.iloc[0]])], ignore_index=True)
+            _data.sort_values(by="date", inplace=True)
+            _data.reset_index(drop=True, inplace=True)
-            _filtered = grouped.filter(lambda x: len(x) >= n_threshold)
+        # Group the data by the datetime interval
+        grouped = self.date_grouper(_data, DT, origin)
-            if len(_filtered) == 0:
-                raise ValueError(
-                    f"No groups with at least {n_threshold} observations. Consider lowering the threshold."
-                )
+        # Only keep weeks where the number of observations is greater than 1
+        _filtered = grouped.filter(lambda x: len(x) >= 2)
-            grouped = self.date_grouper(
-                _filtered,
-                DT,
-                origin
+        if len(_filtered) == 0:
+            raise ValueError(
+                f"No groups with at least 2 observations. Consider widening the time interval."
             )
+        grouped = self.date_grouper(
+            _filtered,
+            DT,
+            origin
+        )
         levels = [
             f"number of {x}"
             for x in self._mutation_type_switch(mutation_kind)
@@ -416,7 +431,6 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
     def analysis(self,
         length: int,
-        n_threshold: int | None = None,
         show: bool = False,
         mutation_kind: str = "all",
         export_plots_filename: str | None = None
@@ -428,7 +442,6 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
         :param length: The length to filter by.
         :type length: int
-        :param n_threshold: Minimum number of sequences required in a time interval to compute statistics.
         :param show: Whether to show the plots or not. Default is False.
         :type show: bool
         :param mutation_kind: The kind of mutation to compute the statistics for. Has to be one of ``all``, ``total``, ``substitutions`` or ``indels``. Default is ``all``.
@@ -447,20 +460,22 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
         stats = self.compute_stats(
             self.dt,
             self.origin,
-            n_threshold,
             mutation_kind
         )
+        # Get weights for weighted fitting
+        weights = stats["size"]
         regs = {}
         # For each column in the statistics (except the date and the size), compute the corresponding regression model
         for col in stats.columns[1:-1]:
             if col.startswith("mean"):
                 _single_regression = {
-                    f"{col} per {self.dt} model": self.linear_regression(
+                    f"{col} model": self.linear_regression(
                         *self._remove_nan(
                             stats.index, # Regression is given by the index, so in time, it is the same as multiplying by dt days
-                            stats[col]
+                            stats[col],
+                            weights
                         )
                     )
                 }
@@ -468,33 +483,59 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
                 _single_regression = self.adjust_model(
                     stats.index,
                     stats[col] - stats[col].min(),
-                    name=f"scaled {col} per {self.dt} model"
+                    name=f"scaled {col} model",
+                    weights=weights.to_numpy().flatten()
                 )
             # Save the regression model
             regs.update(_single_regression)
+        # Add scaling correction to the regression models
+        for k, v in regs.items():
+            if v["expression"] == "mx + b":
+                m = v["parameters"]["m"]
+                b = v["parameters"]["b"]
+                regs[k]["parameters"]["m"] = m/self.dt_ratio
+                m = regs[k]["parameters"]["m"]
+                regs[k]["model"] = lambda x: m*x + b
+            elif v["expression"] == "mx":
+                m = v["parameters"]["m"]
+                regs[k]["parameters"]["m"] = m/self.dt_ratio
+                m = regs[k]["parameters"]["m"]
+                regs[k]["model"] = lambda x: m*x
+            elif v["expression"] == "d*x^alpha":
+                d = v["parameters"]["d"]
+                alpha = v["parameters"]["alpha"]
+                regs[k]["parameters"]["d"] = d/(self.dt_ratio**alpha)
+                d = regs[k]["parameters"]["d"]
+                regs[k]["model"] = lambda x: d*(x**alpha)
         # Sets of mutation types used in the analysis
         _sets = sorted({
             " ".join(x.split()[1:])
             for x in stats.columns[1:-1]
         })
+        stats["dt_idx"] = (stats["date"] - stats["date"].min()) / pd.Timedelta("7D")
         # Plot the results
         if show:
             # For each set of mutation types
             for _type in _sets:
                 self.plot_results(
-                    stats[["date", f"mean {_type}", f"var {_type}"]],
+                    stats[["date", "dt_idx", f"mean {_type}", f"var {_type}"]],
                     {
                         k: v
                         for k, v in regs.items()
                         if k in (
-                            f"mean {_type} per {self.dt} model",
-                            f"scaled var {_type} per {self.dt} model"
+                            f"mean {_type} model",
+                            f"scaled var {_type} model"
                         )
                     },
-                    f"in steps of {self.dt} since {self.origin}"
+                    "wk",
+                    self.dt_ratio
                 )
         # Export the plots
         if export_plots_filename:
             # Open pdf file pointer
@@ -502,19 +543,22 @@ class PyEvoMotion(PyEvoMotionParser, PyEvoMotionBase):
             # For each set of mutation types save the plots
             for _type in _sets:
                 self.export_plot_results(
-                    stats[["date", f"mean {_type}", f"var {_type}"]],
+                    stats[["date", "dt_idx", f"mean {_type}", f"var {_type}"]],
                     {
                         k: v
                         for k, v in regs.items()
                         if k in (
-                            f"mean {_type} per {self.dt} model",
-                            f"scaled var {_type} per {self.dt} model"
+                            f"mean {_type} model",
+                            f"scaled var {_type} model"
                         )
                     },
-                    f"in steps of {self.dt} since {self.origin}",
+                    "wk",
+                    self.dt_ratio,
                     pdf
                 )
             # Close pdf file pointer
             pdf.close()
         return stats, regs

PyEvoMotion/core/parser.py CHANGED Viewed

@@ -153,11 +153,14 @@ class PyEvoMotionParser():
                 mod
                 for mod in x
                 if start - 1 < int(mod.split("_")[1]) < end
-            ]
+            ] if x else ["NO_MUTATION"]
         )
         self.data = self.data[
             self.data["mutation instructions"].apply(len) > 0
         ]
+        self.data["mutation instructions"] = self.data["mutation instructions"].apply(
+            lambda x: [] if x == ["NO_MUTATION"] else x
+        )
     def filter_columns(self, filters: dict[str, list[str] | str]) -> None:
         """

PyEvoMotion 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

PyEvoMotion 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl