DeConveil 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deconveil/utils_fit.py CHANGED
@@ -20,6 +20,11 @@ from pydeseq2.utils import get_num_processes
20
20
  from pydeseq2.grid_search import grid_fit_alpha
21
21
  from pydeseq2.grid_search import grid_fit_shrink_beta
22
22
 
23
+ import rpy2.robjects as ro
24
+ from rpy2.robjects import pandas2ri, conversion, Formula
25
+ import rpy2.robjects.packages as rpackages
26
+ from rpy2.robjects.packages import importr
27
+
23
28
 
24
29
  def irls_glm(
25
30
  counts: np.ndarray,
@@ -43,6 +48,8 @@ def irls_glm(
43
48
  # if full rank, estimate initial betas for IRLS below
44
49
  if np.linalg.matrix_rank(X) == num_vars:
45
50
  Q, R = np.linalg.qr(X)
51
+ eps = 1e-8
52
+ cnv = np.where(cnv == 0, eps, cnv)
46
53
  y = np.log((counts / cnv) / size_factors + 0.1)
47
54
  beta_init = solve(R, Q.T @ y)
48
55
  beta = beta_init
@@ -222,7 +229,14 @@ def fit_moments_dispersions2(
222
229
  """
223
230
  # Exclude genes with all zeroes
224
231
  #normed_counts = normed_counts[:, ~(normed_counts == 0).all(axis=0)]
225
- # mean inverse size factor
232
+ is_all_zero = (normed_counts == 0).all(axis=0)
233
+ # if DataFrame -> Series; if ndarray -> ndarray
234
+ mask = ~np.asarray(is_all_zero)
235
+ if hasattr(normed_counts, "loc"):
236
+ normed_counts = normed_counts.loc[:, mask]
237
+ else:
238
+ normed_counts = normed_counts[:, mask]
239
+ #mean inverse size factor
226
240
  s_mean_inv = (1 /size_factors).mean()
227
241
  mu = normed_counts.mean(0)
228
242
  sigma = normed_counts.var(0, ddof=1)
@@ -407,7 +421,7 @@ def nbinomGLM(
407
421
  d_nll = (
408
422
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset - cnv))
409
423
  ) @ design_matrix
410
-
424
+
411
425
  return (d_neg_prior - d_nll) / cnst
412
426
 
413
427
  def ddf(beta: np.ndarray, cnst: float = scale_cnst) -> np.ndarray:
@@ -525,149 +539,179 @@ def nbinomFn(
525
539
  return prior - nll
526
540
 
527
541
 
528
- def build_design_matrix(
529
- metadata: pd.DataFrame,
530
- design_factors: Union[str, List[str]] = "condition",
531
- ref_level: Optional[List[str]] = None,
532
- continuous_factors: Optional[List[str]] = None,
533
- expanded: bool = False,
534
- intercept: bool = True,
535
- ) -> pd.DataFrame:
536
- """Build design_matrix matrix for DEA.
542
+ def run_stageR(
543
+ res_pydeseq,
544
+ res_deconveil,
545
+ screen_col="pvalue",
546
+ confirm_col="pvalue",
547
+ alpha=0.05,
548
+ method="holm",
549
+ ):
550
+ """
551
+ Two-stage gene-level multiple testing using stageR.
552
+
553
+ Stage I (screening):
554
+ - Omnibus Simes test combining CN-naive and CN-aware p-values
555
+ - BH FDR applied once across genes
537
556
 
538
- Unless specified, the reference factor is chosen alphabetically.
557
+ Stage II (confirmation):
558
+ - Within-gene multiplicity correction (Holm) on naive + aware tests
559
+ - Conditional on passing Stage I
539
560
 
540
561
  Parameters
541
562
  ----------
542
- metadata : pandas.DataFrame
543
- DataFrame containing metadata information.
544
- Must be indexed by sample barcodes.
563
+ res_pydeseq : pd.DataFrame
564
+ CN-naive DE results with raw p-values
565
+ res_deconveil : pd.DataFrame
566
+ CN-aware DE results with raw p-values
567
+ screen_col : str
568
+ Column name of raw p-values (used for screening)
569
+ confirm_col : str
570
+ Column name of raw p-values (used for confirmation)
571
+ alpha : float
572
+ Target FDR level
573
+ method : str
574
+ Within-gene correction method (e.g. "holm")
545
575
 
546
- design_factors : str or list
547
- Name of the columns of metadata to be used as design_matrix variables.
548
- (default: ``"condition"``).
549
-
550
- ref_level : dict or None
551
- An optional list of two strings of the form ``["factor", "ref_level"]``
552
- specifying the factor of interest and the desired reference level, e.g.
553
- ``["condition", "A"]``. (default: ``None``).
576
+ Returns
577
+ -------
578
+ res_screen : pd.DataFrame
579
+ Adjusted screening p-values (gene-level)
580
+ res_confirm : pd.DataFrame
581
+ 0/1 confirmation decisions per hypothesis
582
+ res_naive_upd : pd.DataFrame
583
+ CN-naive results with stageR-adjusted q-values
584
+ res_aware_upd : pd.DataFrame
585
+ CN-aware results with stageR-adjusted q-values
586
+ """
554
587
 
555
- continuous_factors : list or None
556
- An optional list of continuous (as opposed to categorical) factors. Any factor
557
- not in ``continuous_factors`` will be considered categorical (default: ``None``).
588
+ # --------------------------------------------------
589
+ # 1. Extract raw p-values
590
+ # --------------------------------------------------
591
+ p_naive = res_pydeseq[screen_col].astype(float)
592
+ p_aware = res_deconveil[screen_col].astype(float)
558
593
 
559
- expanded : bool
560
- If true, use one column per category. Else, use n-1 columns, for each n-level
561
- categorical factor.
562
- (default: ``False``).
594
+ # Ensure alignment
595
+ p_naive, p_aware = p_naive.align(p_aware, join="inner")
563
596
 
564
- intercept : bool
565
- If true, add an intercept (a column containing only ones). (default: ``True``).
597
+ # --------------------------------------------------
598
+ # 2. Omnibus screening p-values (Simes)
599
+ # --------------------------------------------------
600
+ p1 = np.minimum(p_naive, p_aware)
601
+ p2 = np.maximum(p_naive, p_aware)
602
+ p_screen = np.minimum(2.0 * p1, p2)
566
603
 
567
- Returns
568
- -------
569
- pandas.DataFrame
570
- A DataFrame with experiment design information (to split cohorts).
571
- Indexed by sample barcodes.
572
- """
573
- if isinstance(
574
- design_factors, str
575
- ): # if there is a single factor, convert to singleton list
576
- design_factors = [design_factors]
577
-
578
- for factor in design_factors:
579
- # Check that each factor has at least 2 levels
580
- if len(np.unique(metadata[factor])) < 2:
581
- raise ValueError(
582
- f"Factors should take at least two values, but {factor} "
583
- f"takes the single value '{np.unique(metadata[factor])}'."
584
- )
604
+ #p_screen = pd.Series(p_screen, index=p_naive.index, name="p_screen")
585
605
 
586
- # Check that level factors in the design don't contain underscores. If so, convert
587
- # them to hyphens
588
- warning_issued = False
589
- for factor in design_factors:
590
- if np.any(["_" in value for value in metadata[factor]]):
591
- if not warning_issued:
592
- warnings.warn(
593
- """Some factor levels in the design contain underscores ('_').
594
- They will be converted to hyphens ('-').""",
595
- UserWarning,
596
- stacklevel=2,
597
- )
598
- warning_issued = True
599
- metadata[factor] = metadata[factor].apply(lambda x: x.replace("_", "-"))
606
+ # --------------------------------------------------
607
+ # 3. Confirmation p-values matrix
608
+ # --------------------------------------------------
609
+
610
+ p_naive_conf = pd.DataFrame({"p_naive": res_pydeseq[confirm_col].astype(float)})
611
+ p_aware_conf = pd.DataFrame({"p_aware": res_deconveil[confirm_col].astype(float)})
612
+ p_conf = pd.concat([p_naive_conf, p_aware_conf], axis=1)
613
+
614
+ # stageR requires string rownames
615
+ p_screen.index = p_screen.index.astype(str)
616
+ p_conf.index = p_conf.index.astype(str)
617
+
618
+ # --------------------------------------------------
619
+ # 4. Convert to R
620
+ # --------------------------------------------------
621
+
622
+ with conversion.localconverter(ro.default_converter + pandas2ri.converter):
623
+ r_p_screen = conversion.py2rpy(p_screen)
624
+ r_p_conf = conversion.py2rpy(p_conf)
625
+
626
+ # Assign R variables
627
+ genes = list(p_conf.index)
628
+ ro.globalenv["p_screen"] = r_p_screen
629
+ ro.globalenv["p_conf"] = r_p_conf
630
+ ro.globalenv["genes"] = ro.StrVector(list(genes))
631
+ ro.globalenv["conf_names"] = ro.StrVector(list(p_conf.columns))
632
+
633
+ # --------------------------------------------------
634
+ # 5. Run stageR
635
+ # --------------------------------------------------
636
+ r_code = f"""
637
+ library(stageR)
638
+
639
+ p_conf <- as.matrix(p_conf)
640
+
641
+ stageRObj <- stageR(
642
+ pScreen = p_screen,
643
+ pConfirmation = p_conf,
644
+ pScreenAdjusted = FALSE
645
+ )
600
646
 
601
- if continuous_factors is not None:
602
- categorical_factors = [
603
- factor for factor in design_factors if factor not in continuous_factors
604
- ]
605
- else:
606
- categorical_factors = design_factors
647
+ stageRObj <- stageWiseAdjustment(
648
+ stageRObj,
649
+ method = "{method}",
650
+ alpha = {alpha},
651
+ #allowNA = TRUE
652
+ )
607
653
 
608
- # Check that there is at least one categorical factor
609
- if len(categorical_factors) > 0:
610
- design_matrix = pd.get_dummies(
611
- metadata[categorical_factors], drop_first=not expanded
654
+ res_screen <- getAdjustedPValues(
655
+ stageRObj,
656
+ onlySignificantGenes = FALSE,
657
+ order = FALSE
612
658
  )
613
659
 
614
- if ref_level is not None:
615
- if len(ref_level) != 2:
616
- raise KeyError("The reference level should contain 2 strings.")
617
- if ref_level[1] not in metadata[ref_level[0]].values:
618
- raise KeyError(
619
- f"The metadata data should contain a '{ref_level[0]}' column"
620
- f" with a '{ref_level[1]}' level."
621
- )
660
+ res_confirm <- getResults(stageRObj)
661
+ """
622
662
 
623
- # Check that the reference level is not in the matrix (if unexpanded design)
624
- ref_level_name = "_".join(ref_level)
625
- if (not expanded) and ref_level_name in design_matrix.columns:
626
- # Remove the reference level and add one
627
- factor_cols = [
628
- col for col in design_matrix.columns if col.startswith(ref_level[0])
629
- ]
630
- missing_level = next(
631
- level
632
- for level in np.unique(metadata[ref_level[0]])
633
- if f"{ref_level[0]}_{level}" not in design_matrix.columns
634
- )
635
- design_matrix[f"{ref_level[0]}_{missing_level}"] = 1 - design_matrix[
636
- factor_cols
637
- ].sum(1)
638
- design_matrix.drop(ref_level_name, axis="columns", inplace=True)
639
-
640
- if not expanded:
641
- # Add reference level as column name suffix
642
- for factor in design_factors:
643
- if ref_level is None or factor != ref_level[0]:
644
- # The reference is the unique level that is no longer there
645
- ref = next(
646
- level
647
- for level in np.unique(metadata[factor])
648
- if f"{factor}_{level}" not in design_matrix.columns
649
- )
650
- else:
651
- # The reference level is given as an argument
652
- ref = ref_level[1]
653
- design_matrix.columns = [
654
- f"{col}_vs_{ref}" if col.startswith(factor) else col
655
- for col in design_matrix.columns
656
- ]
657
- else:
658
- # There is no categorical factor in the design
659
- design_matrix = pd.DataFrame(index=metadata.index)
663
+ ro.r(r_code)
660
664
 
661
- if intercept:
662
- design_matrix.insert(0, "intercept", 1)
665
+ # --------------------------------------------------
666
+ # 6. Convert back to Python
667
+ # --------------------------------------------------
668
+
669
+ with conversion.localconverter(ro.default_converter + pandas2ri.converter):
670
+ res_screen = conversion.rpy2py(ro.r("res_screen"))
671
+ res_confirm = conversion.rpy2py(ro.r("res_confirm"))
672
+
673
+ # Ensure pandas DataFrames
674
+ if isinstance(res_screen, np.ndarray):
675
+ rows = list(ro.r("rownames(res_screen)"))
676
+ cols = list(ro.r("colnames(res_screen)"))
677
+ res_screen = pd.DataFrame(res_screen, index=rows, columns=cols)
678
+
679
+ if isinstance(res_confirm, np.ndarray):
680
+ rows = list(ro.r("rownames(res_confirm)"))
681
+ cols = list(ro.r("colnames(res_confirm)"))
682
+ res_confirm = pd.DataFrame(res_confirm, index=rows, columns=cols)
683
+
684
+ # --------------------------------------------------
685
+ # 7. Attach results to original tables
686
+ # --------------------------------------------------
687
+
688
+ res_screen.index = res_screen.index.astype(str)
689
+
690
+ # 1) Update PyDESeq2 table with SCREEN q-values
691
+ res_pydeseq_upd = res_pydeseq.copy()
692
+ if "p_naive" in res_screen.columns:
693
+ res_pydeseq_upd["padj_stageR"] = (
694
+ res_screen["p_naive"].reindex(res_pydeseq_upd.index.astype(str)).values
695
+ )
663
696
 
664
- # Convert categorical factors one-hot encodings to int
665
- design_matrix = design_matrix.astype("int")
697
+ # 2) Update DeConveil table with SCREEN q-values
698
+ res_deconveil_upd = res_deconveil.copy()
699
+ if "p_aware" in res_screen.columns:
700
+ res_deconveil_upd["padj_stageR"] = (
701
+ res_screen["p_aware"].reindex(res_pydeseq_upd.index.astype(str)).values
702
+ )
666
703
 
667
- # Add continuous factors
668
- if continuous_factors is not None:
669
- for factor in continuous_factors:
670
- # This factor should be numeric
671
- design_matrix[factor] = pd.to_numeric(metadata[factor])
672
- return design_matrix
704
+ res_confirm.index = res_confirm.index.astype(str)
705
+ if "p_naive" in res_confirm.columns:
706
+ res_pydeseq_upd["DE_confirmed"] = (
707
+ res_confirm["p_naive"].reindex(res_pydeseq_upd.index.astype(str)).values
708
+ )
673
709
 
710
+ if "p_aware" in res_confirm.columns:
711
+ res_deconveil_upd["DE_confirmed"] = (
712
+ res_confirm["p_aware"].reindex(res_deconveil_upd.index.astype(str)).values
713
+ )
714
+
715
+ # NA = not tested / not confirmed
716
+
717
+ return res_screen, res_confirm, res_pydeseq_upd, res_deconveil_upd
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: DeConveil
3
- Version: 0.1.3
3
+ Version: 0.2.0
4
4
  Summary: An extension of PyDESeq2/DESeq2 designed to account for genome aneuploidy
5
5
  Home-page: https://github.com/caravagnalab/DeConveil
6
6
  Author: Katsiaryna Davydzenka
@@ -18,6 +18,9 @@ Requires-Dist: formulaic-contrasts>=0.2.0
18
18
  Requires-Dist: matplotlib>=3.6.2
19
19
  Requires-Dist: seaborn>=0.12.2
20
20
  Requires-Dist: pydeseq2>=0.4.12
21
+ Requires-Dist: rpy2>=3.5.0
22
+ Provides-Extra: stan
23
+ Requires-Dist: cmdstanpy>=1.2.0; extra == "stan"
21
24
  Provides-Extra: dev
22
25
  Requires-Dist: pytest>=6.2.4; extra == "dev"
23
26
  Requires-Dist: pre-commit>=2.13.0; extra == "dev"
@@ -0,0 +1,18 @@
1
+ deconveil/__init__.py,sha256=Txa18Rg91nrz8AV5RAZvuWi2Js9tCR_MByTdCOgCGeI,221
2
+ deconveil/__version__.py,sha256=Zn1KFblwuFHiDRdRAiRnDBRkbPttWh44jKa5zG2ov0E,22
3
+ deconveil/dds.py,sha256=36-FhisZjYHt9Qm-yjArOdVniQo5WlDcfUs5QYd9QZM,53688
4
+ deconveil/default_inference.py,sha256=xkZFkeBcKjzn2qiLo46qXTbG2wFdaij1-odU8poUEok,9465
5
+ deconveil/ds.py,sha256=nZzXve0l-xoxaS7MzHtzwU8WIYYplsup_RhWCu-iYOE,24427
6
+ deconveil/grid_search.py,sha256=csUBiwluUE4IG18G2qfTbi1sZWReEWiltOXnoBwX27c,5270
7
+ deconveil/inference.py,sha256=E7XUR_bkrwoqoiKzoFz9ENYcFUlg5xUxynetsub4ZPM,10428
8
+ deconveil/nb_regression_fit.py,sha256=tm_Xfom30Zz0DhDxkauES6mogeEjtKP8VLor-neT-P0,11464
9
+ deconveil/simulate_gene_dosage.py,sha256=Do2XF8zoGlBynZieIYRYd3HY5R2b2uiI6M075Qkq5ZA,18646
10
+ deconveil/utils_clustering.py,sha256=twspPvXQ6pvw_NaY1ebyvswuH3ZvVBGn7DeOpZ1XatI,5939
11
+ deconveil/utils_fit.py,sha256=PpLQgG0MwCO2-sNDjp_nAQyOLS06MBUHNLijRNJhtmU,22272
12
+ deconveil/utils_plot.py,sha256=1JQthYXaEUKUWa0fy8owkyJ1CTkQxlrSRAqPkXMk7Us,9857
13
+ deconveil/utils_processing.py,sha256=9j35FAfQ7oNjdH1FWHP90DBTyL5RwlgdVbbW9de10VI,6560
14
+ deconveil-0.2.0.dist-info/licenses/LICENSE,sha256=BJ0f3JRteiF7tjiARi8syxiu4yKmckc0nWlHCKXttKQ,1078
15
+ deconveil-0.2.0.dist-info/METADATA,sha256=p5jiKvL6TKxr5ux72mOhOPjIeaVGxU-QeBPI_Sd78Ck,1194
16
+ deconveil-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
17
+ deconveil-0.2.0.dist-info/top_level.txt,sha256=yAWZbw0eg8XpbMsswoq-VzBGfQHrfWOqNHnu2qQ2xO4,10
18
+ deconveil-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,16 +0,0 @@
1
- deconveil/__init__.py,sha256=_6FL_AYiycv9nP3mKJiQ4zl4aU83YSWnV2YoIZr9Mv0,188
2
- deconveil/__version__.py,sha256=XEqb2aiIn8fzGE68Mph4ck1FtQqsR_am0wRWvrYPffQ,22
3
- deconveil/dds.py,sha256=0MNwtDzCjqjoJR-rrCmVu3JOaDd3gXuToOzTBXJMxak,49039
4
- deconveil/default_inference.py,sha256=J40O0-qZChLnLrLGmhwxjaTVsV7REWAUQOTf8qSwWk0,9466
5
- deconveil/ds.py,sha256=Vb9p152U1KXltrXFpMoBxY6YRW25dP4CO26_osbz6Aw,29476
6
- deconveil/grid_search.py,sha256=iOHR8ur10MyrrfEZHr409lGulGxODufsjG6j7lQ7tWs,5181
7
- deconveil/inference.py,sha256=B3zf3q_mbCTX3gHJwuXnTuy9uyXOxEjuWyaSR6VtVEo,10429
8
- deconveil/utils_clustering.py,sha256=twspPvXQ6pvw_NaY1ebyvswuH3ZvVBGn7DeOpZ1XatI,5939
9
- deconveil/utils_fit.py,sha256=SdGcBQjN3cyzbSFessufYOOOJAQCOjNcy3etbwmodsM,21583
10
- deconveil/utils_plot.py,sha256=1JQthYXaEUKUWa0fy8owkyJ1CTkQxlrSRAqPkXMk7Us,9857
11
- deconveil/utils_processing.py,sha256=9j35FAfQ7oNjdH1FWHP90DBTyL5RwlgdVbbW9de10VI,6560
12
- deconveil-0.1.3.dist-info/licenses/LICENSE,sha256=BJ0f3JRteiF7tjiARi8syxiu4yKmckc0nWlHCKXttKQ,1078
13
- deconveil-0.1.3.dist-info/METADATA,sha256=yL6AwQ5ziGhrI5lE4FDCHOadT81W6yEIDWGsHni6Q5w,1097
14
- deconveil-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
- deconveil-0.1.3.dist-info/top_level.txt,sha256=yAWZbw0eg8XpbMsswoq-VzBGfQHrfWOqNHnu2qQ2xO4,10
16
- deconveil-0.1.3.dist-info/RECORD,,