PyPI - DeConveil - Versions diffs - 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

DeConveil 0.1.3py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

deconveil/__init__.py +1 -0
deconveil/__version__.py +1 -1
deconveil/dds.py +170 -63
deconveil/default_inference.py +3 -3
deconveil/ds.py +82 -170
deconveil/grid_search.py +1 -0
deconveil/inference.py +4 -4
deconveil/nb_regression_fit.py +313 -0
deconveil/simulate_gene_dosage.py +589 -0
deconveil/utils_fit.py +173 -129
{deconveil-0.1.3.dist-info → deconveil-0.2.0.dist-info}/METADATA +4 -1
deconveil-0.2.0.dist-info/RECORD +18 -0
{deconveil-0.1.3.dist-info → deconveil-0.2.0.dist-info}/WHEEL +1 -1
deconveil-0.1.3.dist-info/RECORD +0 -16
{deconveil-0.1.3.dist-info → deconveil-0.2.0.dist-info}/licenses/LICENSE +0 -0
{deconveil-0.1.3.dist-info → deconveil-0.2.0.dist-info}/top_level.txt +0 -0

deconveil/ds.py CHANGED Viewed

@@ -132,16 +132,16 @@ class deconveil_stats:
     def __init__(
         self,
         dds: deconveil_fit,
-        contrast: Optional[List[str]] = None,
+        contrast: list[str] | np.ndarray,
         alpha: float = 0.05,
         cooks_filter: bool = True,
         independent_filter: bool = True,
-        prior_LFC_var: Optional[np.ndarray] = None,
+        prior_LFC_var: np.ndarray | None = None,
         lfc_null: float = 0.0,
-        alt_hypothesis: Optional[
-            Literal["greaterAbs", "lessAbs", "greater", "less"]
-        ] = None,
-        inference: Optional[Inference] = None,
+        alt_hypothesis: (
+            Literal["greaterAbs", "lessAbs", "greater", "less"] | None
+        ) = None,
+        inference: Inference | None = None,
         quiet: bool = False,
     ) -> None:
         assert (
@@ -164,24 +164,67 @@ class deconveil_stats:
         self.lfc_null = lfc_null
         self.alt_hypothesis = alt_hypothesis
-        # Check the validity of the contrast (if provided) or build it.
-        self._build_contrast(contrast)
         # Initialize the design matrix and LFCs. If the chosen reference level are the
         # same as in dds, keep them unchanged. Otherwise, change reference level.
         self.design_matrix = self.dds.obsm["design_matrix"].copy()
         self.LFC = self.dds.varm["LFC"].copy()
-        # Build a contrast vector corresponding to the variable and levels of interest
-        self._build_contrast_vector()
+        # Check the validity of the contrast (if provided) or build it.
+        self.contrast: list[str] | np.ndarray
+        if contrast is None:
+            raise ValueError(
+                """Default contrasts are no longer supported.
+                The "contrast" argument must be provided."""
+            )
+        elif isinstance(contrast, np.ndarray):
+            if contrast.shape[0] != self.dds.obsm["design_matrix"].shape[1]:
+                raise ValueError(
+                    "The contrast vector must have the same length as the design matrix."
+                )
+            self.contrast = contrast
+            self.contrast_vector = contrast
+        else:
+            self.contrast = contrast
+            self._build_contrast_vector()
         # Set a flag to indicate that LFCs are unshrunk
         self.shrunk_LFCs = False
         self.quiet = quiet
+        if inference:
+            if n_cpus:
+                if hasattr(inference, "n_cpus"):
+                    inference.n_cpus = n_cpus
+                else:
+                    warnings.warn(
+                        "The provided inference object does not have an n_cpus "
+                        "attribute, cannot override `n_cpus`.",
+                        UserWarning,
+                        stacklevel=2,
+                    )
         # Initialize the inference object.
         self.inference = inference or DefInference()
+        # If the `refit_cooks` attribute of the dds object is True, check that outliers
+        # were actually refitted.
+        #if self.dds.refit_cooks and "replaced" not in self.dds.var:
+            #raise AttributeError(
+                #"dds has 'refit_cooks' set to True but Cooks outliers have not been "
+                #"refitted. Please run 'dds.refit()' first or set 'dds.refit_cooks' "
+                #"to False."
+            #)
+        if self.dds.refit_cooks and "replaced" not in getattr(self.dds, "varm", {}):
+            raise AttributeError(
+                "dds has 'refit_cooks' set to True but Cooks outliers have not been "
+                "refitted. Please run 'dds.refit()' first or set 'dds.refit_cooks' "
+                "to False."
+            )
+    @property
+    def variables(self):
+        """Get the names of the variables used in the model definition."""
+        return self.dds.variables
     def summary(
         self,
@@ -249,9 +292,12 @@ class deconveil_stats:
         self.results_df["padj"] = self.padj
         if not self.quiet:
-            if self.contrast[1] == self.contrast[2] == "":
-                # The factor is continuous
-                print(f"Log2 fold change & Wald test p-value: " f"{self.contrast[0]}")
+            if isinstance(self.contrast, np.ndarray):
+                # The contrast vector was directly provided
+                print(
+                    "Log2 fold change & Wald test p-value, contrast vector: "
+                    f"{self.contrast}"
+                )
             else:
                 # The factor is categorical
                 print(
@@ -259,6 +305,7 @@ class deconveil_stats:
                     f"{self.contrast[0]} {self.contrast[1]} vs {self.contrast[2]}"
                 )
             print(self.results_df)
     def run_wald_test(self) -> None:
         """Perform a Wald test.
@@ -288,7 +335,7 @@ class deconveil_stats:
             ridge_factor = np.diag(1 / self.prior_LFC_var**2)
         else:
             ridge_factor = np.diag(np.repeat(1e-6, num_vars))
         design_matrix = self.design_matrix.values
         LFCs = self.LFC.values
@@ -320,70 +367,38 @@ class deconveil_stats:
             self.p_values.loc[self.dds.new_all_zeroes_genes] = 1.0
-    def lfc_shrink(self, coeff: Optional[str] = None, adapt: bool = True) -> None:
+    def lfc_shrink(self, coeff: str, adapt: bool = True) -> None:
         """LFC shrinkage with an apeGLM prior :cite:p:`DeseqStats-zhu2019heavy`.
         Shrinks LFCs using a heavy-tailed Cauchy prior, leaving p-values unchanged.
         Parameters
         ----------
-        coeff : str or None
-            The LFC coefficient to shrink. If set to ``None``, the method will try to
-            shrink the coefficient corresponding to the ``contrast`` attribute.
-            If the desired coefficient is not available, it may be set from the
-            :class:`pydeseq2.dds.DeseqDataSet` argument ``ref_level``.
+        coeff : str
+            The LFC coefficient to shrink. Must be one of the columns of the LFC matrix.
             (default: ``None``).
         adapt: bool
             Whether to use the MLE estimates of LFC to adapt the prior. If False, the
             prior scale is set to 1. (``default=True``)
         """
-        if self.contrast[1] == self.contrast[2] == "":
-            # The factor being tested is continuous
-            contrast_level = self.contrast[0]
-        else:
-            # The factor being tested is categorical
-            contrast_level = (
-                f"{self.contrast[0]}_{self.contrast[1]}_vs_{self.contrast[2]}"
-            )
-        if coeff is not None:
-            if coeff not in self.LFC.columns:
-                split_coeff = coeff.split("_")
-                if len(split_coeff) == 4:
-                    raise KeyError(
-                        f"The coeff argument '{coeff}' should be one the LFC columns. "
-                        f"The available LFC coeffs are {self.LFC.columns[1:]}. "
-                        f"If the desired coefficient is not available, please set "
-                        f"`ref_level = [{split_coeff[0]}, {split_coeff[3]}]` "
-                        f"in DeseqDataSet and rerun."
-                    )
-                else:
-                    raise KeyError(
-                        f"The coeff argument '{coeff}' should be one the LFC columns. "
-                        f"The available LFC coeffs are {self.LFC.columns[1:]}. "
-                        f"If the desired coefficient is not available, please set the "
-                        f"appropriate`ref_level` in DeseqDataSet and rerun."
-                    )
-        elif contrast_level not in self.LFC.columns:
+        if coeff not in self.LFC.columns:
             raise KeyError(
-                f"lfc_shrink's coeff argument was set to None, but the coefficient "
-                f"corresponding to the contrast {self.contrast} is not available."
-                f"The available LFC coeffs are {self.LFC.columns[1:]}. "
-                f"If the desired coefficient is not available, please set "
-                f"`ref_level = [{self.contrast[0]}, {self.contrast[2]}]` "
-                f"in DeseqDataSet and rerun."
-            )
-        else:
-            coeff = contrast_level
+                f"coeff '{coeff}' must be one of the LFC columns.\n"
+                f"Available: {list(self.LFC.columns)}"
+        )
         coeff_idx = self.LFC.columns.get_loc(coeff)
+        design_matrix = self.design_matrix.values
         size = 1.0 / self.dds.varm["dispersions"]
-        offset = np.log(self.dds.obsm["size_factors"])
+        offset = np.log(self.dds.obsm["size_factors"])
         counts=self.dds.data["counts"]
         cnv=self.dds.data["cnv"].to_numpy()
-        cnv = cnv + 0.1
+        cnv = (cnv / 2) + 0.1
+        #cnv = cnv + 0.1
         cnv = np.log(cnv)
         # Set priors
@@ -444,24 +459,10 @@ class deconveil_stats:
         if hasattr(self, "results_df"):
             self.results_df["log2FoldChange"] = self.LFC.iloc[:, coeff_idx] / np.log(2)
             self.results_df["lfcSE"] = self.SE / np.log(2)
-            # Get the corresponding factor, tested and reference levels of the shrunk
-            # coefficient
-            split_coeff = coeff.split("_")
-            # Categorical coeffs are of the form "factor_A_vs_B", and continuous coeffs
-            # of the form "factor".
-            if len(split_coeff) == 1:
-                # The factor is continuous
-                print(f"Shrunk log2 fold change & Wald test p-value: " f"{coeff}")
-            else:
-                # The factor is categorical
-                # Categorical coeffs are of the form "factor_A_vs_B", hence "factor"
-                # is split_coeff[0], "A" is split_coeff[1] and "B" split_coeff[3]
-                print(
-                    f"Shrunk log2 fold change & Wald test p-value: "
-                    f"{split_coeff[0]} {split_coeff[1]} vs {split_coeff[3]}"
-                )
+            if not self.quiet:
+                print(f"Shrunk log2 fold change & Wald test p-value: {coeff}")
+                print(self.results_df)
-            print(self.results_df)
     def _independent_filtering(self) -> None:
@@ -534,7 +535,6 @@ class deconveil_stats:
         # As in DESeq2, only take samples with 3 or more replicates when looking for
         # max cooks.
-        #use_for_max = n_or_more_replicates(self.design_matrix, 3)
         use_for_max = n_or_more_replicates(self.dds.obsm["design_matrix"], 3).values
         # If for a gene there are 3 samples or more that have more counts than the
@@ -614,70 +614,7 @@ class deconveil_stats:
             return min_var
         else:
             return root_scalar(objective, bracket=[min_var, max_var]).root
-    def _build_contrast(self, contrast: Optional[List[str]] = None) -> None:
-        """Check the validity of the contrast (if provided).
-        If not, build a default
-        contrast, corresponding to the last column of the design matrix.
-        A contrast should be a list of three strings, in the following format:
-        ``['variable_of_interest', 'tested_level', 'reference_level']``.
-        Names must correspond to the metadata data passed to the DeseqDataSet.
-        E.g., ``['condition', 'B', 'A']`` will measure the LFC of 'condition B'
-        compared to 'condition A'.
-        For continuous variables, the last two strings will be left empty, e.g.
-        ``['measurement', '', ''].
-        If None, the last variable from the design matrix
-        is chosen as the variable of interest, and the reference level is picked
-        alphabetically.
-        Parameters
-        ----------
-        contrast : list or None
-            A list of three strings, in the following format:
-            ``['variable_of_interest', 'tested_level', 'reference_level']``.
-            (default: ``None``).
-        """
-        if contrast is not None:  # Test contrast if provided
-            if len(contrast) != 3:
-                raise ValueError("The contrast should contain three strings.")
-            if contrast[0] not in self.dds.design_factors:
-                raise KeyError(
-                    f"The contrast variable ('{contrast[0]}') should be one "
-                    f"of the design factors."
-                )
-            if not (contrast[1] == contrast[2] == ""):
-                # The contrast factor is categorical, so we should check that the tested
-                # and reference levels are valid.
-                if contrast[1] not in self.dds.obs[contrast[0]].values:
-                    raise KeyError(
-                        f"The tested level ('{contrast[1]}') should correspond to "
-                        f"one of the levels of '{contrast[0]}'"
-                    )
-                if contrast[2] not in self.dds.obs[contrast[0]].values:
-                    raise KeyError(
-                        f"The reference level ('{contrast[2]}') should correspond to "
-                        f"one of the levels of '{contrast[0]}'"
-                    )
-            self.contrast = contrast
-        else:  # Build contrast if None
-            factor = self.dds.design_factors[-1]
-            # Check whether this factor is categorical or continuous.
-            if (
-                self.dds.continuous_factors is not None
-                and factor in self.dds.continuous_factors
-            ):
-                # The factor is continuous
-                self.contrast = [factor, "", ""]
-            else:
-                # The factor is categorical
-                factor_col = next(
-                    col
-                    for col in self.dds.obsm["design_matrix"].columns
-                    if col.startswith(factor)
-                )
-                split_col = factor_col.split("_")
-                self.contrast = [split_col[0], split_col[1], split_col[-1]]
     def _build_contrast_vector(self) -> None:
         """
@@ -688,34 +625,9 @@ class deconveil_stats:
         factor = self.contrast[0]
         alternative = self.contrast[1]
         ref = self.contrast[2]
-        if ref == alternative == "":
-            # "factor" is a continuous variable
-            contrast_level = factor
-        else:
-            contrast_level = f"{factor}_{alternative}_vs_{ref}"
-        self.contrast_vector = np.zeros(self.LFC.shape[-1])
-        if contrast_level in self.design_matrix.columns:
-            self.contrast_idx = self.LFC.columns.get_loc(contrast_level)
-            self.contrast_vector[self.contrast_idx] = 1
-        elif f"{factor}_{ref}_vs_{alternative}" in self.design_matrix.columns:
-            # Reference and alternative are inverted
-            self.contrast_idx = self.LFC.columns.get_loc(
-                f"{factor}_{ref}_vs_{alternative}"
-            )
-            self.contrast_vector[self.contrast_idx] = -1
-        else:
-            # Need to change reference
-            # Get any column corresponding to the desired factor and extract old ref
-            old_ref = next(
-                col for col in self.LFC.columns if col.startswith(factor)
-            ).split("_vs_")[-1]
-            new_alternative_idx = self.LFC.columns.get_loc(
-                f"{factor}_{alternative}_vs_{old_ref}"
-            )
-            new_ref_idx = self.LFC.columns.get_loc(f"{factor}_{ref}_vs_{old_ref}")
-            self.contrast_vector[new_alternative_idx] = 1
-            self.contrast_vector[new_ref_idx] = -1
+        self.contrast_vector = self.dds.contrast(
+            column=factor, baseline=ref, group_to_compare=alternative
+        )
     def plot_MA(self, log: bool = True, save_path: Optional[str] = None, **kwargs):

deconveil/grid_search.py CHANGED Viewed

@@ -67,6 +67,7 @@ def grid_fit_beta(
             raise ValueError("Beta is not properly initialized or has an unexpected shape.")
+        #mu = np.maximum(size_factors[:, None] * np.exp(design_matrix @ beta.T), min_mu)
         mu = np.maximum(cnv * size_factors[:, None] * np.exp(design_matrix @ beta.T), min_mu)
         return vec_nb_nll(counts, mu, disp) + 0.5 * (1e-6 * beta**2).sum(1)

deconveil/inference.py CHANGED Viewed

@@ -261,9 +261,9 @@ class Inference(ABC):
         ridge_factor: np.ndarray,
         contrast: np.ndarray,
         lfc_null: np.ndarray,
-        alt_hypothesis: Optional[
-            Literal["greaterAbs", "lessAbs", "greater", "less"]
-        ] = None,
+        alt_hypothesis: (
+            Literal["greaterAbs", "lessAbs", "greater", "less"] | None
+        ) = None,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """Run Wald test for differential expression.
@@ -320,7 +320,7 @@ class Inference(ABC):
         prior_scale: float,
         optimizer: str,
         shrink_index: int,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
         """Fit a negative binomial MAP LFC using an apeGLM prior.
         Only the LFC is shrinked, and not the intercept.

DeConveil 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

DeConveil 0.1.3py3-none-any.whl → 0.2.0py3-none-any.whl