DeConveil 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deconveil/ds.py CHANGED
@@ -132,16 +132,16 @@ class deconveil_stats:
132
132
  def __init__(
133
133
  self,
134
134
  dds: deconveil_fit,
135
- contrast: Optional[List[str]] = None,
135
+ contrast: list[str] | np.ndarray,
136
136
  alpha: float = 0.05,
137
137
  cooks_filter: bool = True,
138
138
  independent_filter: bool = True,
139
- prior_LFC_var: Optional[np.ndarray] = None,
139
+ prior_LFC_var: np.ndarray | None = None,
140
140
  lfc_null: float = 0.0,
141
- alt_hypothesis: Optional[
142
- Literal["greaterAbs", "lessAbs", "greater", "less"]
143
- ] = None,
144
- inference: Optional[Inference] = None,
141
+ alt_hypothesis: (
142
+ Literal["greaterAbs", "lessAbs", "greater", "less"] | None
143
+ ) = None,
144
+ inference: Inference | None = None,
145
145
  quiet: bool = False,
146
146
  ) -> None:
147
147
  assert (
@@ -164,24 +164,67 @@ class deconveil_stats:
164
164
  self.lfc_null = lfc_null
165
165
  self.alt_hypothesis = alt_hypothesis
166
166
 
167
- # Check the validity of the contrast (if provided) or build it.
168
- self._build_contrast(contrast)
169
-
170
167
  # Initialize the design matrix and LFCs. If the chosen reference level are the
171
168
  # same as in dds, keep them unchanged. Otherwise, change reference level.
172
169
  self.design_matrix = self.dds.obsm["design_matrix"].copy()
173
170
  self.LFC = self.dds.varm["LFC"].copy()
174
171
 
175
- # Build a contrast vector corresponding to the variable and levels of interest
176
- self._build_contrast_vector()
177
-
172
+ # Check the validity of the contrast (if provided) or build it.
173
+ self.contrast: list[str] | np.ndarray
174
+ if contrast is None:
175
+ raise ValueError(
176
+ """Default contrasts are no longer supported.
177
+ The "contrast" argument must be provided."""
178
+ )
179
+ elif isinstance(contrast, np.ndarray):
180
+ if contrast.shape[0] != self.dds.obsm["design_matrix"].shape[1]:
181
+ raise ValueError(
182
+ "The contrast vector must have the same length as the design matrix."
183
+ )
184
+ self.contrast = contrast
185
+ self.contrast_vector = contrast
186
+ else:
187
+ self.contrast = contrast
188
+ self._build_contrast_vector()
189
+
178
190
  # Set a flag to indicate that LFCs are unshrunk
179
191
  self.shrunk_LFCs = False
180
192
  self.quiet = quiet
181
193
 
194
+ if inference:
195
+ if n_cpus:
196
+ if hasattr(inference, "n_cpus"):
197
+ inference.n_cpus = n_cpus
198
+ else:
199
+ warnings.warn(
200
+ "The provided inference object does not have an n_cpus "
201
+ "attribute, cannot override `n_cpus`.",
202
+ UserWarning,
203
+ stacklevel=2,
204
+ )
205
+
182
206
  # Initialize the inference object.
183
207
  self.inference = inference or DefInference()
184
208
 
209
+ # If the `refit_cooks` attribute of the dds object is True, check that outliers
210
+ # were actually refitted.
211
+ #if self.dds.refit_cooks and "replaced" not in self.dds.var:
212
+ #raise AttributeError(
213
+ #"dds has 'refit_cooks' set to True but Cooks outliers have not been "
214
+ #"refitted. Please run 'dds.refit()' first or set 'dds.refit_cooks' "
215
+ #"to False."
216
+ #)
217
+ if self.dds.refit_cooks and "replaced" not in getattr(self.dds, "varm", {}):
218
+ raise AttributeError(
219
+ "dds has 'refit_cooks' set to True but Cooks outliers have not been "
220
+ "refitted. Please run 'dds.refit()' first or set 'dds.refit_cooks' "
221
+ "to False."
222
+ )
223
+
224
+ @property
225
+ def variables(self):
226
+ """Get the names of the variables used in the model definition."""
227
+ return self.dds.variables
185
228
 
186
229
  def summary(
187
230
  self,
@@ -249,9 +292,12 @@ class deconveil_stats:
249
292
  self.results_df["padj"] = self.padj
250
293
 
251
294
  if not self.quiet:
252
- if self.contrast[1] == self.contrast[2] == "":
253
- # The factor is continuous
254
- print(f"Log2 fold change & Wald test p-value: " f"{self.contrast[0]}")
295
+ if isinstance(self.contrast, np.ndarray):
296
+ # The contrast vector was directly provided
297
+ print(
298
+ "Log2 fold change & Wald test p-value, contrast vector: "
299
+ f"{self.contrast}"
300
+ )
255
301
  else:
256
302
  # The factor is categorical
257
303
  print(
@@ -259,6 +305,7 @@ class deconveil_stats:
259
305
  f"{self.contrast[0]} {self.contrast[1]} vs {self.contrast[2]}"
260
306
  )
261
307
  print(self.results_df)
308
+
262
309
 
263
310
  def run_wald_test(self) -> None:
264
311
  """Perform a Wald test.
@@ -288,7 +335,7 @@ class deconveil_stats:
288
335
  ridge_factor = np.diag(1 / self.prior_LFC_var**2)
289
336
  else:
290
337
  ridge_factor = np.diag(np.repeat(1e-6, num_vars))
291
-
338
+
292
339
  design_matrix = self.design_matrix.values
293
340
  LFCs = self.LFC.values
294
341
 
@@ -320,70 +367,38 @@ class deconveil_stats:
320
367
  self.p_values.loc[self.dds.new_all_zeroes_genes] = 1.0
321
368
 
322
369
 
323
- def lfc_shrink(self, coeff: Optional[str] = None, adapt: bool = True) -> None:
370
+ def lfc_shrink(self, coeff: str, adapt: bool = True) -> None:
324
371
  """LFC shrinkage with an apeGLM prior :cite:p:`DeseqStats-zhu2019heavy`.
325
372
 
326
373
  Shrinks LFCs using a heavy-tailed Cauchy prior, leaving p-values unchanged.
327
374
 
328
375
  Parameters
329
376
  ----------
330
- coeff : str or None
331
- The LFC coefficient to shrink. If set to ``None``, the method will try to
332
- shrink the coefficient corresponding to the ``contrast`` attribute.
333
- If the desired coefficient is not available, it may be set from the
334
- :class:`pydeseq2.dds.DeseqDataSet` argument ``ref_level``.
377
+ coeff : str
378
+ The LFC coefficient to shrink. Must be one of the columns of the LFC matrix.
335
379
  (default: ``None``).
380
+
336
381
  adapt: bool
337
382
  Whether to use the MLE estimates of LFC to adapt the prior. If False, the
338
383
  prior scale is set to 1. (``default=True``)
339
384
  """
340
- if self.contrast[1] == self.contrast[2] == "":
341
- # The factor being tested is continuous
342
- contrast_level = self.contrast[0]
343
- else:
344
- # The factor being tested is categorical
345
- contrast_level = (
346
- f"{self.contrast[0]}_{self.contrast[1]}_vs_{self.contrast[2]}"
347
- )
348
385
 
349
- if coeff is not None:
350
- if coeff not in self.LFC.columns:
351
- split_coeff = coeff.split("_")
352
- if len(split_coeff) == 4:
353
- raise KeyError(
354
- f"The coeff argument '{coeff}' should be one the LFC columns. "
355
- f"The available LFC coeffs are {self.LFC.columns[1:]}. "
356
- f"If the desired coefficient is not available, please set "
357
- f"`ref_level = [{split_coeff[0]}, {split_coeff[3]}]` "
358
- f"in DeseqDataSet and rerun."
359
- )
360
- else:
361
- raise KeyError(
362
- f"The coeff argument '{coeff}' should be one the LFC columns. "
363
- f"The available LFC coeffs are {self.LFC.columns[1:]}. "
364
- f"If the desired coefficient is not available, please set the "
365
- f"appropriate`ref_level` in DeseqDataSet and rerun."
366
- )
367
- elif contrast_level not in self.LFC.columns:
386
+ if coeff not in self.LFC.columns:
368
387
  raise KeyError(
369
- f"lfc_shrink's coeff argument was set to None, but the coefficient "
370
- f"corresponding to the contrast {self.contrast} is not available."
371
- f"The available LFC coeffs are {self.LFC.columns[1:]}. "
372
- f"If the desired coefficient is not available, please set "
373
- f"`ref_level = [{self.contrast[0]}, {self.contrast[2]}]` "
374
- f"in DeseqDataSet and rerun."
375
- )
376
- else:
377
- coeff = contrast_level
388
+ f"coeff '{coeff}' must be one of the LFC columns.\n"
389
+ f"Available: {list(self.LFC.columns)}"
390
+ )
378
391
 
379
392
  coeff_idx = self.LFC.columns.get_loc(coeff)
380
393
 
394
+ design_matrix = self.design_matrix.values
381
395
  size = 1.0 / self.dds.varm["dispersions"]
382
- offset = np.log(self.dds.obsm["size_factors"])
396
+ offset = np.log(self.dds.obsm["size_factors"])
383
397
 
384
398
  counts=self.dds.data["counts"]
385
399
  cnv=self.dds.data["cnv"].to_numpy()
386
- cnv = cnv + 0.1
400
+ cnv = (cnv / 2) + 0.1
401
+ #cnv = cnv + 0.1
387
402
  cnv = np.log(cnv)
388
403
 
389
404
  # Set priors
@@ -444,24 +459,10 @@ class deconveil_stats:
444
459
  if hasattr(self, "results_df"):
445
460
  self.results_df["log2FoldChange"] = self.LFC.iloc[:, coeff_idx] / np.log(2)
446
461
  self.results_df["lfcSE"] = self.SE / np.log(2)
447
- # Get the corresponding factor, tested and reference levels of the shrunk
448
- # coefficient
449
- split_coeff = coeff.split("_")
450
- # Categorical coeffs are of the form "factor_A_vs_B", and continuous coeffs
451
- # of the form "factor".
452
- if len(split_coeff) == 1:
453
- # The factor is continuous
454
- print(f"Shrunk log2 fold change & Wald test p-value: " f"{coeff}")
455
- else:
456
- # The factor is categorical
457
- # Categorical coeffs are of the form "factor_A_vs_B", hence "factor"
458
- # is split_coeff[0], "A" is split_coeff[1] and "B" split_coeff[3]
459
- print(
460
- f"Shrunk log2 fold change & Wald test p-value: "
461
- f"{split_coeff[0]} {split_coeff[1]} vs {split_coeff[3]}"
462
- )
462
+ if not self.quiet:
463
+ print(f"Shrunk log2 fold change & Wald test p-value: {coeff}")
464
+ print(self.results_df)
463
465
 
464
- print(self.results_df)
465
466
 
466
467
 
467
468
  def _independent_filtering(self) -> None:
@@ -534,7 +535,6 @@ class deconveil_stats:
534
535
 
535
536
  # As in DESeq2, only take samples with 3 or more replicates when looking for
536
537
  # max cooks.
537
- #use_for_max = n_or_more_replicates(self.design_matrix, 3)
538
538
  use_for_max = n_or_more_replicates(self.dds.obsm["design_matrix"], 3).values
539
539
 
540
540
  # If for a gene there are 3 samples or more that have more counts than the
@@ -614,70 +614,7 @@ class deconveil_stats:
614
614
  return min_var
615
615
  else:
616
616
  return root_scalar(objective, bracket=[min_var, max_var]).root
617
-
618
- def _build_contrast(self, contrast: Optional[List[str]] = None) -> None:
619
- """Check the validity of the contrast (if provided).
620
-
621
- If not, build a default
622
- contrast, corresponding to the last column of the design matrix.
623
- A contrast should be a list of three strings, in the following format:
624
- ``['variable_of_interest', 'tested_level', 'reference_level']``.
625
- Names must correspond to the metadata data passed to the DeseqDataSet.
626
- E.g., ``['condition', 'B', 'A']`` will measure the LFC of 'condition B'
627
- compared to 'condition A'.
628
- For continuous variables, the last two strings will be left empty, e.g.
629
- ``['measurement', '', ''].
630
- If None, the last variable from the design matrix
631
- is chosen as the variable of interest, and the reference level is picked
632
- alphabetically.
633
-
634
- Parameters
635
- ----------
636
- contrast : list or None
637
- A list of three strings, in the following format:
638
- ``['variable_of_interest', 'tested_level', 'reference_level']``.
639
- (default: ``None``).
640
- """
641
- if contrast is not None: # Test contrast if provided
642
- if len(contrast) != 3:
643
- raise ValueError("The contrast should contain three strings.")
644
- if contrast[0] not in self.dds.design_factors:
645
- raise KeyError(
646
- f"The contrast variable ('{contrast[0]}') should be one "
647
- f"of the design factors."
648
- )
649
- if not (contrast[1] == contrast[2] == ""):
650
- # The contrast factor is categorical, so we should check that the tested
651
- # and reference levels are valid.
652
- if contrast[1] not in self.dds.obs[contrast[0]].values:
653
- raise KeyError(
654
- f"The tested level ('{contrast[1]}') should correspond to "
655
- f"one of the levels of '{contrast[0]}'"
656
- )
657
- if contrast[2] not in self.dds.obs[contrast[0]].values:
658
- raise KeyError(
659
- f"The reference level ('{contrast[2]}') should correspond to "
660
- f"one of the levels of '{contrast[0]}'"
661
- )
662
- self.contrast = contrast
663
- else: # Build contrast if None
664
- factor = self.dds.design_factors[-1]
665
- # Check whether this factor is categorical or continuous.
666
- if (
667
- self.dds.continuous_factors is not None
668
- and factor in self.dds.continuous_factors
669
- ):
670
- # The factor is continuous
671
- self.contrast = [factor, "", ""]
672
- else:
673
- # The factor is categorical
674
- factor_col = next(
675
- col
676
- for col in self.dds.obsm["design_matrix"].columns
677
- if col.startswith(factor)
678
- )
679
- split_col = factor_col.split("_")
680
- self.contrast = [split_col[0], split_col[1], split_col[-1]]
617
+
681
618
 
682
619
  def _build_contrast_vector(self) -> None:
683
620
  """
@@ -688,34 +625,9 @@ class deconveil_stats:
688
625
  factor = self.contrast[0]
689
626
  alternative = self.contrast[1]
690
627
  ref = self.contrast[2]
691
- if ref == alternative == "":
692
- # "factor" is a continuous variable
693
- contrast_level = factor
694
- else:
695
- contrast_level = f"{factor}_{alternative}_vs_{ref}"
696
-
697
- self.contrast_vector = np.zeros(self.LFC.shape[-1])
698
- if contrast_level in self.design_matrix.columns:
699
- self.contrast_idx = self.LFC.columns.get_loc(contrast_level)
700
- self.contrast_vector[self.contrast_idx] = 1
701
- elif f"{factor}_{ref}_vs_{alternative}" in self.design_matrix.columns:
702
- # Reference and alternative are inverted
703
- self.contrast_idx = self.LFC.columns.get_loc(
704
- f"{factor}_{ref}_vs_{alternative}"
705
- )
706
- self.contrast_vector[self.contrast_idx] = -1
707
- else:
708
- # Need to change reference
709
- # Get any column corresponding to the desired factor and extract old ref
710
- old_ref = next(
711
- col for col in self.LFC.columns if col.startswith(factor)
712
- ).split("_vs_")[-1]
713
- new_alternative_idx = self.LFC.columns.get_loc(
714
- f"{factor}_{alternative}_vs_{old_ref}"
715
- )
716
- new_ref_idx = self.LFC.columns.get_loc(f"{factor}_{ref}_vs_{old_ref}")
717
- self.contrast_vector[new_alternative_idx] = 1
718
- self.contrast_vector[new_ref_idx] = -1
628
+ self.contrast_vector = self.dds.contrast(
629
+ column=factor, baseline=ref, group_to_compare=alternative
630
+ )
719
631
 
720
632
 
721
633
  def plot_MA(self, log: bool = True, save_path: Optional[str] = None, **kwargs):
deconveil/grid_search.py CHANGED
@@ -67,6 +67,7 @@ def grid_fit_beta(
67
67
  raise ValueError("Beta is not properly initialized or has an unexpected shape.")
68
68
 
69
69
 
70
+ #mu = np.maximum(size_factors[:, None] * np.exp(design_matrix @ beta.T), min_mu)
70
71
  mu = np.maximum(cnv * size_factors[:, None] * np.exp(design_matrix @ beta.T), min_mu)
71
72
  return vec_nb_nll(counts, mu, disp) + 0.5 * (1e-6 * beta**2).sum(1)
72
73
 
deconveil/inference.py CHANGED
@@ -261,9 +261,9 @@ class Inference(ABC):
261
261
  ridge_factor: np.ndarray,
262
262
  contrast: np.ndarray,
263
263
  lfc_null: np.ndarray,
264
- alt_hypothesis: Optional[
265
- Literal["greaterAbs", "lessAbs", "greater", "less"]
266
- ] = None,
264
+ alt_hypothesis: (
265
+ Literal["greaterAbs", "lessAbs", "greater", "less"] | None
266
+ ) = None,
267
267
  ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
268
268
  """Run Wald test for differential expression.
269
269
 
@@ -320,7 +320,7 @@ class Inference(ABC):
320
320
  prior_scale: float,
321
321
  optimizer: str,
322
322
  shrink_index: int,
323
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
323
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
324
324
  """Fit a negative binomial MAP LFC using an apeGLM prior.
325
325
 
326
326
  Only the LFC is shrinked, and not the intercept.