DeConveil 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ __version__ = "0.1.2"
deconveil/dds.py CHANGED
@@ -17,10 +17,10 @@ from deconveil.utils_fit import fit_rough_dispersions
17
17
  from deconveil.utils_fit import fit_moments_dispersions2
18
18
  from deconveil.utils_fit import grid_fit_beta
19
19
  from deconveil.utils_fit import irls_glm
20
+ from deconveil.utils_fit import build_design_matrix
20
21
 
21
22
  from pydeseq2.preprocessing import deseq2_norm_fit
22
23
  from pydeseq2.preprocessing import deseq2_norm_transform
23
- from pydeseq2.utils import build_design_matrix
24
24
  from pydeseq2.utils import dispersion_trend
25
25
  from pydeseq2.utils import mean_absolute_deviation
26
26
  from pydeseq2.utils import n_or_more_replicates
deconveil/utils_fit.py CHANGED
@@ -523,3 +523,151 @@ def nbinomFn(
523
523
  ).sum(0)
524
524
 
525
525
  return prior - nll
526
+
527
+
528
+ def build_design_matrix(
529
+ metadata: pd.DataFrame,
530
+ design_factors: Union[str, List[str]] = "condition",
531
+ ref_level: Optional[List[str]] = None,
532
+ continuous_factors: Optional[List[str]] = None,
533
+ expanded: bool = False,
534
+ intercept: bool = True,
535
+ ) -> pd.DataFrame:
536
+ """Build design_matrix matrix for DEA.
537
+
538
+ Unless specified, the reference factor is chosen alphabetically.
539
+
540
+ Parameters
541
+ ----------
542
+ metadata : pandas.DataFrame
543
+ DataFrame containing metadata information.
544
+ Must be indexed by sample barcodes.
545
+
546
+ design_factors : str or list
547
+ Name of the columns of metadata to be used as design_matrix variables.
548
+ (default: ``"condition"``).
549
+
550
+ ref_level : dict or None
551
+ An optional list of two strings of the form ``["factor", "ref_level"]``
552
+ specifying the factor of interest and the desired reference level, e.g.
553
+ ``["condition", "A"]``. (default: ``None``).
554
+
555
+ continuous_factors : list or None
556
+ An optional list of continuous (as opposed to categorical) factors. Any factor
557
+ not in ``continuous_factors`` will be considered categorical (default: ``None``).
558
+
559
+ expanded : bool
560
+ If true, use one column per category. Else, use n-1 columns, for each n-level
561
+ categorical factor.
562
+ (default: ``False``).
563
+
564
+ intercept : bool
565
+ If true, add an intercept (a column containing only ones). (default: ``True``).
566
+
567
+ Returns
568
+ -------
569
+ pandas.DataFrame
570
+ A DataFrame with experiment design information (to split cohorts).
571
+ Indexed by sample barcodes.
572
+ """
573
+ if isinstance(
574
+ design_factors, str
575
+ ): # if there is a single factor, convert to singleton list
576
+ design_factors = [design_factors]
577
+
578
+ for factor in design_factors:
579
+ # Check that each factor has at least 2 levels
580
+ if len(np.unique(metadata[factor])) < 2:
581
+ raise ValueError(
582
+ f"Factors should take at least two values, but {factor} "
583
+ f"takes the single value '{np.unique(metadata[factor])}'."
584
+ )
585
+
586
+ # Check that level factors in the design don't contain underscores. If so, convert
587
+ # them to hyphens
588
+ warning_issued = False
589
+ for factor in design_factors:
590
+ if np.any(["_" in value for value in metadata[factor]]):
591
+ if not warning_issued:
592
+ warnings.warn(
593
+ """Some factor levels in the design contain underscores ('_').
594
+ They will be converted to hyphens ('-').""",
595
+ UserWarning,
596
+ stacklevel=2,
597
+ )
598
+ warning_issued = True
599
+ metadata[factor] = metadata[factor].apply(lambda x: x.replace("_", "-"))
600
+
601
+ if continuous_factors is not None:
602
+ categorical_factors = [
603
+ factor for factor in design_factors if factor not in continuous_factors
604
+ ]
605
+ else:
606
+ categorical_factors = design_factors
607
+
608
+ # Check that there is at least one categorical factor
609
+ if len(categorical_factors) > 0:
610
+ design_matrix = pd.get_dummies(
611
+ metadata[categorical_factors], drop_first=not expanded
612
+ )
613
+
614
+ if ref_level is not None:
615
+ if len(ref_level) != 2:
616
+ raise KeyError("The reference level should contain 2 strings.")
617
+ if ref_level[1] not in metadata[ref_level[0]].values:
618
+ raise KeyError(
619
+ f"The metadata data should contain a '{ref_level[0]}' column"
620
+ f" with a '{ref_level[1]}' level."
621
+ )
622
+
623
+ # Check that the reference level is not in the matrix (if unexpanded design)
624
+ ref_level_name = "_".join(ref_level)
625
+ if (not expanded) and ref_level_name in design_matrix.columns:
626
+ # Remove the reference level and add one
627
+ factor_cols = [
628
+ col for col in design_matrix.columns if col.startswith(ref_level[0])
629
+ ]
630
+ missing_level = next(
631
+ level
632
+ for level in np.unique(metadata[ref_level[0]])
633
+ if f"{ref_level[0]}_{level}" not in design_matrix.columns
634
+ )
635
+ design_matrix[f"{ref_level[0]}_{missing_level}"] = 1 - design_matrix[
636
+ factor_cols
637
+ ].sum(1)
638
+ design_matrix.drop(ref_level_name, axis="columns", inplace=True)
639
+
640
+ if not expanded:
641
+ # Add reference level as column name suffix
642
+ for factor in design_factors:
643
+ if ref_level is None or factor != ref_level[0]:
644
+ # The reference is the unique level that is no longer there
645
+ ref = next(
646
+ level
647
+ for level in np.unique(metadata[factor])
648
+ if f"{factor}_{level}" not in design_matrix.columns
649
+ )
650
+ else:
651
+ # The reference level is given as an argument
652
+ ref = ref_level[1]
653
+ design_matrix.columns = [
654
+ f"{col}_vs_{ref}" if col.startswith(factor) else col
655
+ for col in design_matrix.columns
656
+ ]
657
+ else:
658
+ # There is no categorical factor in the design
659
+ design_matrix = pd.DataFrame(index=metadata.index)
660
+
661
+ if intercept:
662
+ design_matrix.insert(0, "intercept", 1)
663
+
664
+ # Convert categorical factors one-hot encodings to int
665
+ design_matrix = design_matrix.astype("int")
666
+
667
+ # Add continuous factors
668
+ if continuous_factors is not None:
669
+ for factor in continuous_factors:
670
+ # This factor should be numeric
671
+ design_matrix[factor] = pd.to_numeric(metadata[factor])
672
+ return design_matrix
673
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: DeConveil
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: An extension of PyDESeq2/DESeq2 designed to account for genome aneuploidy
5
5
  Home-page: https://github.com/caravagnalab/DeConveil
6
6
  Author: Katsiaryna Davydzenka
@@ -1,15 +1,16 @@
1
1
  deconveil/__init__.py,sha256=_6FL_AYiycv9nP3mKJiQ4zl4aU83YSWnV2YoIZr9Mv0,188
2
- deconveil/dds.py,sha256=ccYi6o6c6yw_5AXWdpbs0MIALp1xLsUvtp-QhEzH3cQ,49034
2
+ deconveil/__version__.py,sha256=K5SiDdEGYMpdqXThrqwTqECJJBOQNTQDrnpc2K5mzKs,21
3
+ deconveil/dds.py,sha256=0MNwtDzCjqjoJR-rrCmVu3JOaDd3gXuToOzTBXJMxak,49039
3
4
  deconveil/default_inference.py,sha256=J40O0-qZChLnLrLGmhwxjaTVsV7REWAUQOTf8qSwWk0,9466
4
5
  deconveil/ds.py,sha256=Vb9p152U1KXltrXFpMoBxY6YRW25dP4CO26_osbz6Aw,29476
5
6
  deconveil/grid_search.py,sha256=iOHR8ur10MyrrfEZHr409lGulGxODufsjG6j7lQ7tWs,5181
6
7
  deconveil/inference.py,sha256=B3zf3q_mbCTX3gHJwuXnTuy9uyXOxEjuWyaSR6VtVEo,10429
7
8
  deconveil/utils_clustering.py,sha256=twspPvXQ6pvw_NaY1ebyvswuH3ZvVBGn7DeOpZ1XatI,5939
8
- deconveil/utils_fit.py,sha256=ODtIwFKKKchQBiwdNhPSOCt5wsPcpLHCKRENf6JmF18,15785
9
+ deconveil/utils_fit.py,sha256=SdGcBQjN3cyzbSFessufYOOOJAQCOjNcy3etbwmodsM,21583
9
10
  deconveil/utils_plot.py,sha256=1JQthYXaEUKUWa0fy8owkyJ1CTkQxlrSRAqPkXMk7Us,9857
10
11
  deconveil/utils_processing.py,sha256=CB99CwQst7eUiIgE58yl7_3E6uD9CgQoU_Qmprjyt-s,4141
11
- deconveil-0.1.1.dist-info/licenses/LICENSE,sha256=BJ0f3JRteiF7tjiARi8syxiu4yKmckc0nWlHCKXttKQ,1078
12
- deconveil-0.1.1.dist-info/METADATA,sha256=zuxmrMBmMhAz8bcoDdMZhZcs38oomC-mNhue4hsiVY4,1097
13
- deconveil-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- deconveil-0.1.1.dist-info/top_level.txt,sha256=yAWZbw0eg8XpbMsswoq-VzBGfQHrfWOqNHnu2qQ2xO4,10
15
- deconveil-0.1.1.dist-info/RECORD,,
12
+ deconveil-0.1.2.dist-info/licenses/LICENSE,sha256=BJ0f3JRteiF7tjiARi8syxiu4yKmckc0nWlHCKXttKQ,1078
13
+ deconveil-0.1.2.dist-info/METADATA,sha256=JqHZYXo0lLvPjoj_cDT-IwHADSKdESJQxorDbpsk3-k,1097
14
+ deconveil-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ deconveil-0.1.2.dist-info/top_level.txt,sha256=yAWZbw0eg8XpbMsswoq-VzBGfQHrfWOqNHnu2qQ2xO4,10
16
+ deconveil-0.1.2.dist-info/RECORD,,