consenrich 0.6.3b1__cp311-cp311-macosx_11_0_arm64.whl → 0.7.1b1__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of consenrich might be problematic. Click here for more details.

consenrich/core.py CHANGED
@@ -194,6 +194,8 @@ class samParams(NamedTuple):
194
194
  extend reads from 5'. Ignored if `pairedEndMode > 0` or `extendBP` set. This parameter is particularly
195
195
  important when targeting broader marks (e.g., ChIP-seq H3K27me3).
196
196
  :type inferFragmentLength: int
197
+ :param countEndsOnly: If True, only the 5' ends of reads are counted. Overrides `inferFragmentLength` and `pairedEndMode`.
198
+ :type countEndsOnly: Optional[bool]
197
199
 
198
200
  .. tip::
199
201
 
@@ -210,6 +212,7 @@ class samParams(NamedTuple):
210
212
  maxInsertSize: Optional[int] = 1000
211
213
  pairedEndMode: Optional[int] = 0
212
214
  inferFragmentLength: Optional[int] = 0
215
+ countEndsOnly: Optional[bool] = False
213
216
 
214
217
 
215
218
  class detrendParams(NamedTuple):
@@ -251,6 +254,7 @@ class inputParams(NamedTuple):
251
254
 
252
255
  bamFiles: List[str]
253
256
  bamFilesControl: Optional[List[str]]
257
+ pairedEnd: Optional[bool]
254
258
 
255
259
 
256
260
  class genomeParams(NamedTuple):
@@ -309,9 +313,9 @@ class countingParams(NamedTuple):
309
313
 
310
314
 
311
315
  class matchingParams(NamedTuple):
312
- r"""Parameters related to the matching algorithm packaged with this software.
316
+ r"""Parameters related to the matching algorithm.
313
317
 
314
- See :ref:`matching` for details.
318
+ See :ref:`matching` for an overview of the approach.
315
319
 
316
320
  :param templateNames: A list of str values -- wavelet bases used for matching, e.g., `[haar, db2, sym4]`
317
321
  :type templateNames: List[str]
@@ -322,20 +326,18 @@ class matchingParams(NamedTuple):
322
326
  an empirical null to test significance. See :func:`cconsenrich.csampleBlockStats`.
323
327
  :type iters: int
324
328
  :param alpha: Primary significance threshold on detected matches. Specifically, the
325
- :math:`1 - \alpha` quantile of an empirical null distribution. The empirical null
326
- distribution is built from cross-correlation values over randomly sampled blocks.
329
+ minimum corr. empirical p-value approximated from randomly sampled blocks in the
330
+ response sequence.
327
331
  :type alpha: float
328
332
  :param minMatchLengthBP: Within a window of `minMatchLengthBP` length (bp), relative maxima in
329
333
  the signal-template convolution must be greater in value than others to qualify as matches.
330
- *Set to a negative value to disable this filter*.
331
334
  :type minMatchLengthBP: int
332
- :param minSignalAtMaxima: Secondary significance threshold coupled with `alpha`. Require the *signal value*
333
- at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale.
334
- If a `float` value is provided, the minimum signal value must be greater than this (absolute) value. *Set to a
335
- negative value to disable the threshold*.
336
- If a `str` value is provided, looks for 'q:quantileValue', e.g., 'q:0.75'. The
335
+ :param minSignalAtMaxima: Secondary significance threshold coupled with `alpha`. Requires the *signal value*
336
+ at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale
337
+ to temper genome-wide dynamic range. If a `float` value is provided, the minimum signal value must be greater
338
+ than this (absolute) value. *Set to a negative value to disable the threshold*.
339
+ If a `str` value is provided, looks for 'q:quantileValue', e.g., 'q:0.90'. The
337
340
  threshold is then set to the corresponding quantile of the non-zero signal estimates.
338
- Defaults to str value 'q:0.75' --- the 75th percentile of signal values.
339
341
  :type minSignalAtMaxima: Optional[str | float]
340
342
  :param useScalingFunction: If True, use (only) the scaling function to build the matching template.
341
343
  If False, use (only) the wavelet function.
@@ -343,20 +345,21 @@ class matchingParams(NamedTuple):
343
345
  :param excludeRegionsBedFile: A BED file with regions to exclude from matching
344
346
  :type excludeRegionsBedFile: Optional[str]
345
347
 
346
- :seealso: :class:`consenrich.core.matchingParams`, :func:`cconsenrich.csampleBlockStats`, :ref:`matching`
348
+ :seealso: :func:`cconsenrich.csampleBlockStats`, :ref:`matching`
349
+
347
350
  """
348
351
 
349
352
  templateNames: List[str]
350
353
  cascadeLevels: List[int]
351
354
  iters: int
352
355
  alpha: float
356
+ useScalingFunction: Optional[bool]
353
357
  minMatchLengthBP: Optional[int]
354
358
  maxNumMatches: Optional[int]
355
- minSignalAtMaxima: Optional[str | float] = "q:0.75"
356
- merge: bool = False
357
- mergeGapBP: int = 25
358
- useScalingFunction: bool = True
359
- excludeRegionsBedFile: Optional[str] = None
359
+ minSignalAtMaxima: Optional[str | float]
360
+ merge: Optional[bool]
361
+ mergeGapBP: Optional[int]
362
+ excludeRegionsBedFile: Optional[str]
360
363
 
361
364
 
362
365
  def _numIntervals(start: int, end: int, step: int) -> int:
@@ -518,6 +521,7 @@ def readBamSegments(
518
521
  maxInsertSize: Optional[int] = 1000,
519
522
  pairedEndMode: Optional[int] = 0,
520
523
  inferFragmentLength: Optional[int] = 0,
524
+ countEndsOnly: Optional[bool] = False,
521
525
  ) -> npt.NDArray[np.float32]:
522
526
  r"""Calculate tracks of read counts (or a function thereof) for each BAM file.
523
527
 
@@ -553,6 +557,9 @@ def readBamSegments(
553
557
  :type pairedEndMode: int
554
558
  :param inferFragmentLength: See :class:`samParams`.
555
559
  :type inferFragmentLength: int
560
+ :param countEndsOnly: If True, only the 5' ends of reads are counted. This overrides `inferFragmentLength` and `pairedEndMode`.
561
+ :type countEndsOnly: Optional[bool]
562
+
556
563
  """
557
564
 
558
565
  if len(bamFiles) == 0:
@@ -567,6 +574,12 @@ def readBamSegments(
567
574
  offsetStr = ((str(offsetStr) or "0,0").replace(" ", "")).split(",")
568
575
  numIntervals = ((end - start) + stepSize - 1) // stepSize
569
576
  counts = np.empty((len(bamFiles), numIntervals), dtype=np.float32)
577
+
578
+ if isinstance(countEndsOnly, bool) and countEndsOnly:
579
+ # note: setting this option ignores inferFragmentLength, pairedEndMode
580
+ inferFragmentLength = 0
581
+ pairedEndMode = 0
582
+
570
583
  for j, bam in enumerate(bamFiles):
571
584
  logger.info(f"Reading {chromosome}: {bam}")
572
585
  arr = cconsenrich.creadBamSegment(
consenrich/detrorm.py CHANGED
@@ -39,7 +39,7 @@ def getScaleFactor1x(
39
39
  :type bamFile: str
40
40
  :param effectiveGenomeSize: Effective genome size in base pairs. See :func:`consenrich.constants.getEffectiveGenomeSize`.
41
41
  :type effectiveGenomeSize: int
42
- :param readLength: Read length (base pairs). See :func:`consenrich.core.getReadLength`.
42
+ :param readLength: read length or fragment length
43
43
  :type readLength: int
44
44
  :param excludeChroms: List of chromosomes to exclude from the analysis.
45
45
  :type excludeChroms: List[str]
@@ -125,9 +125,9 @@ def getPairScaleFactors(
125
125
  :type effectiveGenomeSizeA: int
126
126
  :param effectiveGenomeSizeB: Effective genome size for the second BAM file.
127
127
  :type effectiveGenomeSizeB: int
128
- :param readLengthA: Read length for the first BAM file.
128
+ :param readLengthA: read length or fragment length for the first BAM file.
129
129
  :type readLengthA: int
130
- :param readLengthB: Read length for the second BAM file.
130
+ :param readLengthB: read length or fragment length for the second BAM file.
131
131
  :type readLengthB: int
132
132
  :param excludeChroms: List of chromosomes to exclude from the analysis.
133
133
  :type excludeChroms: List[str]
@@ -167,9 +167,18 @@ def getPairScaleFactors(
167
167
  else:
168
168
  scaleFactorA *= coverageB / coverageA
169
169
  scaleFactorB = 1.0
170
+
170
171
  logger.info(
171
172
  f"Final scale factors: {bamFileA}: {scaleFactorA}, {bamFileB}: {scaleFactorB}"
172
173
  )
174
+
175
+ ratio = max(scaleFactorA, scaleFactorB) / min(scaleFactorA, scaleFactorB)
176
+ if ratio > 5.0:
177
+ logger.warning(
178
+ f"Scale factors differ > 5x....\n"
179
+ f"\n\tAre effective genome sizes {effectiveGenomeSizeA} and {effectiveGenomeSizeB} correct?"
180
+ f"\n\tAre read/fragment lengths {readLengthA},{readLengthB} correct?"
181
+ )
173
182
  return scaleFactorA, scaleFactorB
174
183
 
175
184