consenrich 0.6.3b1__cp311-cp311-macosx_11_0_arm64.whl → 0.7.1b1__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of consenrich might be problematic. Click here for more details.
- consenrich/cconsenrich.c +404 -404
- consenrich/cconsenrich.cpython-311-darwin.so +0 -0
- consenrich/consenrich.py +216 -62
- consenrich/core.py +30 -17
- consenrich/detrorm.py +12 -3
- consenrich/matching.py +444 -369
- consenrich/misc_util.py +29 -0
- {consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/METADATA +3 -3
- {consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/RECORD +13 -13
- {consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/WHEEL +0 -0
- {consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/entry_points.txt +0 -0
- {consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/licenses/LICENSE +0 -0
- {consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/top_level.txt +0 -0
consenrich/core.py
CHANGED
|
@@ -194,6 +194,8 @@ class samParams(NamedTuple):
|
|
|
194
194
|
extend reads from 5'. Ignored if `pairedEndMode > 0` or `extendBP` set. This parameter is particularly
|
|
195
195
|
important when targeting broader marks (e.g., ChIP-seq H3K27me3).
|
|
196
196
|
:type inferFragmentLength: int
|
|
197
|
+
:param countEndsOnly: If True, only the 5' ends of reads are counted. Overrides `inferFragmentLength` and `pairedEndMode`.
|
|
198
|
+
:type countEndsOnly: Optional[bool]
|
|
197
199
|
|
|
198
200
|
.. tip::
|
|
199
201
|
|
|
@@ -210,6 +212,7 @@ class samParams(NamedTuple):
|
|
|
210
212
|
maxInsertSize: Optional[int] = 1000
|
|
211
213
|
pairedEndMode: Optional[int] = 0
|
|
212
214
|
inferFragmentLength: Optional[int] = 0
|
|
215
|
+
countEndsOnly: Optional[bool] = False
|
|
213
216
|
|
|
214
217
|
|
|
215
218
|
class detrendParams(NamedTuple):
|
|
@@ -251,6 +254,7 @@ class inputParams(NamedTuple):
|
|
|
251
254
|
|
|
252
255
|
bamFiles: List[str]
|
|
253
256
|
bamFilesControl: Optional[List[str]]
|
|
257
|
+
pairedEnd: Optional[bool]
|
|
254
258
|
|
|
255
259
|
|
|
256
260
|
class genomeParams(NamedTuple):
|
|
@@ -309,9 +313,9 @@ class countingParams(NamedTuple):
|
|
|
309
313
|
|
|
310
314
|
|
|
311
315
|
class matchingParams(NamedTuple):
|
|
312
|
-
r"""Parameters related to the matching algorithm
|
|
316
|
+
r"""Parameters related to the matching algorithm.
|
|
313
317
|
|
|
314
|
-
See :ref:`matching` for
|
|
318
|
+
See :ref:`matching` for an overview of the approach.
|
|
315
319
|
|
|
316
320
|
:param templateNames: A list of str values -- wavelet bases used for matching, e.g., `[haar, db2, sym4]`
|
|
317
321
|
:type templateNames: List[str]
|
|
@@ -322,20 +326,18 @@ class matchingParams(NamedTuple):
|
|
|
322
326
|
an empirical null to test significance. See :func:`cconsenrich.csampleBlockStats`.
|
|
323
327
|
:type iters: int
|
|
324
328
|
:param alpha: Primary significance threshold on detected matches. Specifically, the
|
|
325
|
-
|
|
326
|
-
|
|
329
|
+
minimum corr. empirical p-value approximated from randomly sampled blocks in the
|
|
330
|
+
response sequence.
|
|
327
331
|
:type alpha: float
|
|
328
332
|
:param minMatchLengthBP: Within a window of `minMatchLengthBP` length (bp), relative maxima in
|
|
329
333
|
the signal-template convolution must be greater in value than others to qualify as matches.
|
|
330
|
-
*Set to a negative value to disable this filter*.
|
|
331
334
|
:type minMatchLengthBP: int
|
|
332
|
-
:param minSignalAtMaxima: Secondary significance threshold coupled with `alpha`.
|
|
333
|
-
at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale
|
|
334
|
-
If a `float` value is provided, the minimum signal value must be greater
|
|
335
|
-
negative value to disable the threshold*.
|
|
336
|
-
If a `str` value is provided, looks for 'q:quantileValue', e.g., 'q:0.
|
|
335
|
+
:param minSignalAtMaxima: Secondary significance threshold coupled with `alpha`. Requires the *signal value*
|
|
336
|
+
at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale
|
|
337
|
+
to temper genome-wide dynamic range. If a `float` value is provided, the minimum signal value must be greater
|
|
338
|
+
than this (absolute) value. *Set to a negative value to disable the threshold*.
|
|
339
|
+
If a `str` value is provided, looks for 'q:quantileValue', e.g., 'q:0.90'. The
|
|
337
340
|
threshold is then set to the corresponding quantile of the non-zero signal estimates.
|
|
338
|
-
Defaults to str value 'q:0.75' --- the 75th percentile of signal values.
|
|
339
341
|
:type minSignalAtMaxima: Optional[str | float]
|
|
340
342
|
:param useScalingFunction: If True, use (only) the scaling function to build the matching template.
|
|
341
343
|
If False, use (only) the wavelet function.
|
|
@@ -343,20 +345,21 @@ class matchingParams(NamedTuple):
|
|
|
343
345
|
:param excludeRegionsBedFile: A BED file with regions to exclude from matching
|
|
344
346
|
:type excludeRegionsBedFile: Optional[str]
|
|
345
347
|
|
|
346
|
-
:seealso: :
|
|
348
|
+
:seealso: :func:`cconsenrich.csampleBlockStats`, :ref:`matching`
|
|
349
|
+
|
|
347
350
|
"""
|
|
348
351
|
|
|
349
352
|
templateNames: List[str]
|
|
350
353
|
cascadeLevels: List[int]
|
|
351
354
|
iters: int
|
|
352
355
|
alpha: float
|
|
356
|
+
useScalingFunction: Optional[bool]
|
|
353
357
|
minMatchLengthBP: Optional[int]
|
|
354
358
|
maxNumMatches: Optional[int]
|
|
355
|
-
minSignalAtMaxima: Optional[str | float]
|
|
356
|
-
merge: bool
|
|
357
|
-
mergeGapBP: int
|
|
358
|
-
|
|
359
|
-
excludeRegionsBedFile: Optional[str] = None
|
|
359
|
+
minSignalAtMaxima: Optional[str | float]
|
|
360
|
+
merge: Optional[bool]
|
|
361
|
+
mergeGapBP: Optional[int]
|
|
362
|
+
excludeRegionsBedFile: Optional[str]
|
|
360
363
|
|
|
361
364
|
|
|
362
365
|
def _numIntervals(start: int, end: int, step: int) -> int:
|
|
@@ -518,6 +521,7 @@ def readBamSegments(
|
|
|
518
521
|
maxInsertSize: Optional[int] = 1000,
|
|
519
522
|
pairedEndMode: Optional[int] = 0,
|
|
520
523
|
inferFragmentLength: Optional[int] = 0,
|
|
524
|
+
countEndsOnly: Optional[bool] = False,
|
|
521
525
|
) -> npt.NDArray[np.float32]:
|
|
522
526
|
r"""Calculate tracks of read counts (or a function thereof) for each BAM file.
|
|
523
527
|
|
|
@@ -553,6 +557,9 @@ def readBamSegments(
|
|
|
553
557
|
:type pairedEndMode: int
|
|
554
558
|
:param inferFragmentLength: See :class:`samParams`.
|
|
555
559
|
:type inferFragmentLength: int
|
|
560
|
+
:param countEndsOnly: If True, only the 5' ends of reads are counted. This overrides `inferFragmentLength` and `pairedEndMode`.
|
|
561
|
+
:type countEndsOnly: Optional[bool]
|
|
562
|
+
|
|
556
563
|
"""
|
|
557
564
|
|
|
558
565
|
if len(bamFiles) == 0:
|
|
@@ -567,6 +574,12 @@ def readBamSegments(
|
|
|
567
574
|
offsetStr = ((str(offsetStr) or "0,0").replace(" ", "")).split(",")
|
|
568
575
|
numIntervals = ((end - start) + stepSize - 1) // stepSize
|
|
569
576
|
counts = np.empty((len(bamFiles), numIntervals), dtype=np.float32)
|
|
577
|
+
|
|
578
|
+
if isinstance(countEndsOnly, bool) and countEndsOnly:
|
|
579
|
+
# note: setting this option ignores inferFragmentLength, pairedEndMode
|
|
580
|
+
inferFragmentLength = 0
|
|
581
|
+
pairedEndMode = 0
|
|
582
|
+
|
|
570
583
|
for j, bam in enumerate(bamFiles):
|
|
571
584
|
logger.info(f"Reading {chromosome}: {bam}")
|
|
572
585
|
arr = cconsenrich.creadBamSegment(
|
consenrich/detrorm.py
CHANGED
|
@@ -39,7 +39,7 @@ def getScaleFactor1x(
|
|
|
39
39
|
:type bamFile: str
|
|
40
40
|
:param effectiveGenomeSize: Effective genome size in base pairs. See :func:`consenrich.constants.getEffectiveGenomeSize`.
|
|
41
41
|
:type effectiveGenomeSize: int
|
|
42
|
-
:param readLength:
|
|
42
|
+
:param readLength: read length or fragment length
|
|
43
43
|
:type readLength: int
|
|
44
44
|
:param excludeChroms: List of chromosomes to exclude from the analysis.
|
|
45
45
|
:type excludeChroms: List[str]
|
|
@@ -125,9 +125,9 @@ def getPairScaleFactors(
|
|
|
125
125
|
:type effectiveGenomeSizeA: int
|
|
126
126
|
:param effectiveGenomeSizeB: Effective genome size for the second BAM file.
|
|
127
127
|
:type effectiveGenomeSizeB: int
|
|
128
|
-
:param readLengthA:
|
|
128
|
+
:param readLengthA: read length or fragment length for the first BAM file.
|
|
129
129
|
:type readLengthA: int
|
|
130
|
-
:param readLengthB:
|
|
130
|
+
:param readLengthB: read length or fragment length for the second BAM file.
|
|
131
131
|
:type readLengthB: int
|
|
132
132
|
:param excludeChroms: List of chromosomes to exclude from the analysis.
|
|
133
133
|
:type excludeChroms: List[str]
|
|
@@ -167,9 +167,18 @@ def getPairScaleFactors(
|
|
|
167
167
|
else:
|
|
168
168
|
scaleFactorA *= coverageB / coverageA
|
|
169
169
|
scaleFactorB = 1.0
|
|
170
|
+
|
|
170
171
|
logger.info(
|
|
171
172
|
f"Final scale factors: {bamFileA}: {scaleFactorA}, {bamFileB}: {scaleFactorB}"
|
|
172
173
|
)
|
|
174
|
+
|
|
175
|
+
ratio = max(scaleFactorA, scaleFactorB) / min(scaleFactorA, scaleFactorB)
|
|
176
|
+
if ratio > 5.0:
|
|
177
|
+
logger.warning(
|
|
178
|
+
f"Scale factors differ > 5x....\n"
|
|
179
|
+
f"\n\tAre effective genome sizes {effectiveGenomeSizeA} and {effectiveGenomeSizeB} correct?"
|
|
180
|
+
f"\n\tAre read/fragment lengths {readLengthA},{readLengthB} correct?"
|
|
181
|
+
)
|
|
173
182
|
return scaleFactorA, scaleFactorB
|
|
174
183
|
|
|
175
184
|
|