PyPI - consenrich - Versions diffs - 0.6.3b1__cp311-cp311-macosx_11_0_arm64.whl → 0.7.1b1__cp311-cp311-macosx_11_0_arm64.whl - Mend

consenrich 0.6.3b1__cp311-cp311-macosx_11_0_arm64.whl → 0.7.1b1__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of consenrich might be problematic. Click here for more details.

Files changed (13) hide show

consenrich/cconsenrich.c +404 -404
consenrich/cconsenrich.cpython-311-darwin.so +0 -0
consenrich/consenrich.py +216 -62
consenrich/core.py +30 -17
consenrich/detrorm.py +12 -3
consenrich/matching.py +444 -369
consenrich/misc_util.py +29 -0
{consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/METADATA +3 -3
{consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/RECORD +13 -13
{consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/WHEEL +0 -0
{consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/entry_points.txt +0 -0
{consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/licenses/LICENSE +0 -0
{consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/top_level.txt +0 -0

consenrich/core.py CHANGED Viewed

@@ -194,6 +194,8 @@ class samParams(NamedTuple):
        extend reads from 5'. Ignored if `pairedEndMode > 0` or `extendBP` set. This parameter is particularly
        important when targeting broader marks (e.g., ChIP-seq H3K27me3).
     :type inferFragmentLength: int
+    :param countEndsOnly: If True, only the 5' ends of reads are counted. Overrides `inferFragmentLength` and `pairedEndMode`.
+    :type countEndsOnly: Optional[bool]
     .. tip::
@@ -210,6 +212,7 @@ class samParams(NamedTuple):
     maxInsertSize: Optional[int] = 1000
     pairedEndMode: Optional[int] = 0
     inferFragmentLength: Optional[int] = 0
+    countEndsOnly: Optional[bool] = False
 class detrendParams(NamedTuple):
@@ -251,6 +254,7 @@ class inputParams(NamedTuple):
     bamFiles: List[str]
     bamFilesControl: Optional[List[str]]
+    pairedEnd: Optional[bool]
 class genomeParams(NamedTuple):
@@ -309,9 +313,9 @@ class countingParams(NamedTuple):
 class matchingParams(NamedTuple):
-    r"""Parameters related to the matching algorithm packaged with this software.
+    r"""Parameters related to the matching algorithm.
-    See :ref:`matching` for details.
+    See :ref:`matching` for an overview of the approach.
     :param templateNames: A list of str values -- wavelet bases used for matching, e.g., `[haar, db2, sym4]`
     :type templateNames: List[str]
@@ -322,20 +326,18 @@ class matchingParams(NamedTuple):
         an empirical null to test significance. See :func:`cconsenrich.csampleBlockStats`.
     :type iters: int
     :param alpha: Primary significance threshold on detected matches. Specifically, the
-        :math:`1 - \alpha` quantile of an empirical null distribution. The empirical null
-        distribution is built from cross-correlation values over randomly sampled blocks.
+        minimum corr. empirical p-value approximated from randomly sampled blocks in the
+        response sequence.
     :type alpha: float
     :param minMatchLengthBP: Within a window of `minMatchLengthBP` length (bp), relative maxima in
         the signal-template convolution must be greater in value than others to qualify as matches.
-        *Set to a negative value to disable this filter*.
     :type minMatchLengthBP: int
-    :param minSignalAtMaxima: Secondary significance threshold coupled with `alpha`. Require the *signal value*
-        at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale.
-        If a `float` value is provided, the minimum signal value must be greater than this (absolute) value. *Set to a
-        negative value to disable the threshold*.
-        If a `str` value is provided, looks for 'q:quantileValue', e.g., 'q:0.75'. The
+    :param minSignalAtMaxima: Secondary significance threshold coupled with `alpha`. Requires the *signal value*
+        at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale
+        to temper genome-wide dynamic range. If a `float` value is provided, the minimum signal value must be greater
+        than this (absolute) value. *Set to a negative value to disable the threshold*.
+        If a `str` value is provided, looks for 'q:quantileValue', e.g., 'q:0.90'. The
         threshold is then set to the corresponding quantile of the non-zero signal estimates.
-        Defaults to str value 'q:0.75' --- the 75th percentile of signal values.
     :type minSignalAtMaxima: Optional[str | float]
     :param useScalingFunction: If True, use (only) the scaling function to build the matching template.
         If False, use (only) the wavelet function.
@@ -343,20 +345,21 @@ class matchingParams(NamedTuple):
     :param excludeRegionsBedFile: A BED file with regions to exclude from matching
     :type excludeRegionsBedFile: Optional[str]
-    :seealso: :class:`consenrich.core.matchingParams`, :func:`cconsenrich.csampleBlockStats`, :ref:`matching`
+    :seealso: :func:`cconsenrich.csampleBlockStats`, :ref:`matching`
     """
     templateNames: List[str]
     cascadeLevels: List[int]
     iters: int
     alpha: float
+    useScalingFunction: Optional[bool]
     minMatchLengthBP: Optional[int]
     maxNumMatches: Optional[int]
-    minSignalAtMaxima: Optional[str | float] = "q:0.75"
-    merge: bool = False
-    mergeGapBP: int = 25
-    useScalingFunction: bool = True
-    excludeRegionsBedFile: Optional[str] = None
+    minSignalAtMaxima: Optional[str | float]
+    merge: Optional[bool]
+    mergeGapBP: Optional[int]
+    excludeRegionsBedFile: Optional[str]
 def _numIntervals(start: int, end: int, step: int) -> int:
@@ -518,6 +521,7 @@ def readBamSegments(
     maxInsertSize: Optional[int] = 1000,
     pairedEndMode: Optional[int] = 0,
     inferFragmentLength: Optional[int] = 0,
+    countEndsOnly: Optional[bool] = False,
 ) -> npt.NDArray[np.float32]:
     r"""Calculate tracks of read counts (or a function thereof) for each BAM file.
@@ -553,6 +557,9 @@ def readBamSegments(
     :type pairedEndMode: int
     :param inferFragmentLength: See :class:`samParams`.
     :type inferFragmentLength: int
+    :param countEndsOnly: If True, only the 5' ends of reads are counted. This overrides `inferFragmentLength` and `pairedEndMode`.
+    :type countEndsOnly: Optional[bool]
     """
     if len(bamFiles) == 0:
@@ -567,6 +574,12 @@ def readBamSegments(
     offsetStr = ((str(offsetStr) or "0,0").replace(" ", "")).split(",")
     numIntervals = ((end - start) + stepSize - 1) // stepSize
     counts = np.empty((len(bamFiles), numIntervals), dtype=np.float32)
+    if isinstance(countEndsOnly, bool) and countEndsOnly:
+        # note: setting this option ignores inferFragmentLength, pairedEndMode
+        inferFragmentLength = 0
+        pairedEndMode = 0
     for j, bam in enumerate(bamFiles):
         logger.info(f"Reading {chromosome}: {bam}")
         arr = cconsenrich.creadBamSegment(

consenrich/detrorm.py CHANGED Viewed

@@ -39,7 +39,7 @@ def getScaleFactor1x(
     :type bamFile: str
     :param effectiveGenomeSize: Effective genome size in base pairs. See :func:`consenrich.constants.getEffectiveGenomeSize`.
     :type effectiveGenomeSize: int
-    :param readLength: Read length (base pairs). See :func:`consenrich.core.getReadLength`.
+    :param readLength: read length or fragment length
     :type readLength: int
     :param excludeChroms: List of chromosomes to exclude from the analysis.
     :type excludeChroms: List[str]
@@ -125,9 +125,9 @@ def getPairScaleFactors(
     :type effectiveGenomeSizeA: int
     :param effectiveGenomeSizeB: Effective genome size for the second BAM file.
     :type effectiveGenomeSizeB: int
-    :param readLengthA: Read length for the first BAM file.
+    :param readLengthA: read length or fragment length for the first BAM file.
     :type readLengthA: int
-    :param readLengthB: Read length for the second BAM file.
+    :param readLengthB: read length or fragment length for the second BAM file.
     :type readLengthB: int
     :param excludeChroms: List of chromosomes to exclude from the analysis.
     :type excludeChroms: List[str]
@@ -167,9 +167,18 @@ def getPairScaleFactors(
     else:
         scaleFactorA *= coverageB / coverageA
         scaleFactorB = 1.0
     logger.info(
         f"Final scale factors: {bamFileA}: {scaleFactorA}, {bamFileB}: {scaleFactorB}"
     )
+    ratio = max(scaleFactorA, scaleFactorB) / min(scaleFactorA, scaleFactorB)
+    if ratio > 5.0:
+        logger.warning(
+            f"Scale factors differ > 5x....\n"
+            f"\n\tAre effective genome sizes {effectiveGenomeSizeA} and {effectiveGenomeSizeB} correct?"
+            f"\n\tAre read/fragment lengths {readLengthA},{readLengthB} correct?"
+        )
     return scaleFactorA, scaleFactorB