consenrich 0.7.0b1__cp313-cp313-macosx_11_0_arm64.whl → 0.7.1b1__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of consenrich might be problematic. Click here for more details.
- consenrich/cconsenrich.c +174 -174
- consenrich/cconsenrich.cpython-313-darwin.so +0 -0
- consenrich/consenrich.py +203 -67
- consenrich/core.py +5 -5
- consenrich/matching.py +442 -367
- {consenrich-0.7.0b1.dist-info → consenrich-0.7.1b1.dist-info}/METADATA +1 -1
- {consenrich-0.7.0b1.dist-info → consenrich-0.7.1b1.dist-info}/RECORD +11 -11
- {consenrich-0.7.0b1.dist-info → consenrich-0.7.1b1.dist-info}/WHEEL +0 -0
- {consenrich-0.7.0b1.dist-info → consenrich-0.7.1b1.dist-info}/entry_points.txt +0 -0
- {consenrich-0.7.0b1.dist-info → consenrich-0.7.1b1.dist-info}/licenses/LICENSE +0 -0
- {consenrich-0.7.0b1.dist-info → consenrich-0.7.1b1.dist-info}/top_level.txt +0 -0
|
Binary file
|
consenrich/consenrich.py
CHANGED
|
@@ -76,9 +76,14 @@ def getReadLengths(
|
|
|
76
76
|
:return: List of read lengths for each BAM file.
|
|
77
77
|
"""
|
|
78
78
|
if not inputArgs.bamFiles:
|
|
79
|
-
raise ValueError(
|
|
79
|
+
raise ValueError(
|
|
80
|
+
"No BAM files provided in the input arguments."
|
|
81
|
+
)
|
|
80
82
|
|
|
81
|
-
if
|
|
83
|
+
if (
|
|
84
|
+
not isinstance(inputArgs.bamFiles, list)
|
|
85
|
+
or len(inputArgs.bamFiles) == 0
|
|
86
|
+
):
|
|
82
87
|
raise ValueError("bam files list is empty")
|
|
83
88
|
|
|
84
89
|
return [
|
|
@@ -148,7 +153,9 @@ def getInputArgs(config_path: str) -> core.inputParams:
|
|
|
148
153
|
bamFiles = _expandWildCards(bamFilesRaw)
|
|
149
154
|
bamFilesControl = _expandWildCards(bamFilesControlRaw)
|
|
150
155
|
if len(bamFiles) == 0:
|
|
151
|
-
raise ValueError(
|
|
156
|
+
raise ValueError(
|
|
157
|
+
"No BAM files provided in the configuration."
|
|
158
|
+
)
|
|
152
159
|
if (
|
|
153
160
|
len(bamFilesControl) > 0
|
|
154
161
|
and len(bamFilesControl) != len(bamFiles)
|
|
@@ -164,7 +171,11 @@ def getInputArgs(config_path: str) -> core.inputParams:
|
|
|
164
171
|
)
|
|
165
172
|
bamFilesControl = bamFilesControl * len(bamFiles)
|
|
166
173
|
|
|
167
|
-
if
|
|
174
|
+
if (
|
|
175
|
+
not bamFiles
|
|
176
|
+
or not isinstance(bamFiles, list)
|
|
177
|
+
or len(bamFiles) == 0
|
|
178
|
+
):
|
|
168
179
|
raise ValueError("No BAM files found")
|
|
169
180
|
|
|
170
181
|
for i, bamFile in enumerate(bamFiles):
|
|
@@ -176,19 +187,21 @@ def getInputArgs(config_path: str) -> core.inputParams:
|
|
|
176
187
|
|
|
177
188
|
# if we've made it here, we can check pairedEnd
|
|
178
189
|
pairedEndList = misc_util.bamsArePairedEnd(bamFiles)
|
|
179
|
-
_isPairedEnd: Optional[bool] = config.get(
|
|
190
|
+
_isPairedEnd: Optional[bool] = config.get(
|
|
191
|
+
"inputParams.pairedEnd", None
|
|
192
|
+
)
|
|
180
193
|
if _isPairedEnd is None:
|
|
181
194
|
# only set auto if not provided in config
|
|
182
195
|
_isPairedEnd = all(pairedEndList)
|
|
183
196
|
if _isPairedEnd:
|
|
184
|
-
logger.info(
|
|
185
|
-
"Paired-end BAM files detected"
|
|
186
|
-
)
|
|
197
|
+
logger.info("Paired-end BAM files detected")
|
|
187
198
|
else:
|
|
188
|
-
logger.info(
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
199
|
+
logger.info("One or more single-end BAM files detected")
|
|
200
|
+
return core.inputParams(
|
|
201
|
+
bamFiles=bamFiles,
|
|
202
|
+
bamFilesControl=bamFilesControl,
|
|
203
|
+
pairedEnd=_isPairedEnd,
|
|
204
|
+
)
|
|
192
205
|
|
|
193
206
|
|
|
194
207
|
def getGenomeArgs(config_path: str) -> core.genomeParams:
|
|
@@ -200,12 +213,22 @@ def getGenomeArgs(config_path: str) -> core.genomeParams:
|
|
|
200
213
|
blacklistFile: Optional[str] = None
|
|
201
214
|
sparseBedFile: Optional[str] = None
|
|
202
215
|
chromosomes: Optional[List[str]] = None
|
|
203
|
-
excludeChroms: List[str] = config.get(
|
|
204
|
-
|
|
216
|
+
excludeChroms: List[str] = config.get(
|
|
217
|
+
"genomeParams.excludeChroms", []
|
|
218
|
+
)
|
|
219
|
+
excludeForNorm: List[str] = config.get(
|
|
220
|
+
"genomeParams.excludeForNorm", []
|
|
221
|
+
)
|
|
205
222
|
if genome:
|
|
206
|
-
chromSizesFile = constants.getGenomeResourceFile(
|
|
207
|
-
|
|
208
|
-
|
|
223
|
+
chromSizesFile = constants.getGenomeResourceFile(
|
|
224
|
+
genome, "sizes"
|
|
225
|
+
)
|
|
226
|
+
blacklistFile = constants.getGenomeResourceFile(
|
|
227
|
+
genome, "blacklist"
|
|
228
|
+
)
|
|
229
|
+
sparseBedFile = constants.getGenomeResourceFile(
|
|
230
|
+
genome, "sparse"
|
|
231
|
+
)
|
|
209
232
|
if config.get("genomeParams.chromSizesFile", None):
|
|
210
233
|
chromSizesFile = config["genomeParams.chromSizesFile"]
|
|
211
234
|
if config.get("genomeParams.blacklistFile", None):
|
|
@@ -232,10 +255,14 @@ def getGenomeArgs(config_path: str) -> core.genomeParams:
|
|
|
232
255
|
raise ValueError(
|
|
233
256
|
"No chromosomes provided in the configuration and no chromosome sizes file specified."
|
|
234
257
|
)
|
|
235
|
-
chromosomes = [
|
|
258
|
+
chromosomes = [
|
|
259
|
+
chrom.strip() for chrom in chromosomes if chrom.strip()
|
|
260
|
+
]
|
|
236
261
|
if excludeChroms:
|
|
237
262
|
chromosomes = [
|
|
238
|
-
chrom
|
|
263
|
+
chrom
|
|
264
|
+
for chrom in chromosomes
|
|
265
|
+
if chrom not in excludeChroms
|
|
239
266
|
]
|
|
240
267
|
if not chromosomes:
|
|
241
268
|
raise ValueError(
|
|
@@ -259,7 +286,9 @@ def getCountingArgs(config_path: str) -> core.countingParams:
|
|
|
259
286
|
scaleDown = config.get("countingParams.scaleDown", True)
|
|
260
287
|
scaleFactors = config.get("countingParams.scaleFactors", None)
|
|
261
288
|
numReads = config.get("countingParams.numReads", 100)
|
|
262
|
-
scaleFactorsControl = config.get(
|
|
289
|
+
scaleFactorsControl = config.get(
|
|
290
|
+
"countingParams.scaleFactorsControl", None
|
|
291
|
+
)
|
|
263
292
|
applyAsinh = config.get("countingParams.applyAsinh", False)
|
|
264
293
|
applyLog = config.get("countingParams.applyLog", False)
|
|
265
294
|
if applyAsinh and applyLog:
|
|
@@ -271,19 +300,25 @@ def getCountingArgs(config_path: str) -> core.countingParams:
|
|
|
271
300
|
rescaleToTreatmentCoverage = config.get(
|
|
272
301
|
"countingParams.rescaleToTreatmentCoverage", True
|
|
273
302
|
)
|
|
274
|
-
if scaleFactors is not None and not isinstance(
|
|
303
|
+
if scaleFactors is not None and not isinstance(
|
|
304
|
+
scaleFactors, list
|
|
305
|
+
):
|
|
275
306
|
raise ValueError("`scaleFactors` should be a list of floats.")
|
|
276
307
|
if scaleFactorsControl is not None and not isinstance(
|
|
277
308
|
scaleFactorsControl, list
|
|
278
309
|
):
|
|
279
|
-
raise ValueError(
|
|
310
|
+
raise ValueError(
|
|
311
|
+
"`scaleFactorsControl` should be a list of floats."
|
|
312
|
+
)
|
|
280
313
|
if (
|
|
281
314
|
scaleFactors is not None
|
|
282
315
|
and scaleFactorsControl is not None
|
|
283
316
|
and len(scaleFactors) != len(scaleFactorsControl)
|
|
284
317
|
):
|
|
285
318
|
if len(scaleFactorsControl) == 1:
|
|
286
|
-
scaleFactorsControl = scaleFactorsControl * len(
|
|
319
|
+
scaleFactorsControl = scaleFactorsControl * len(
|
|
320
|
+
scaleFactors
|
|
321
|
+
)
|
|
287
322
|
else:
|
|
288
323
|
raise ValueError(
|
|
289
324
|
"control and treatment scale factors: must be equal length or 1 control"
|
|
@@ -308,12 +343,16 @@ def readConfig(config_path: str) -> Dict[str, Any]:
|
|
|
308
343
|
genomeParams = getGenomeArgs(config_path)
|
|
309
344
|
countingParams = getCountingArgs(config_path)
|
|
310
345
|
minR_default = _getMinR(config, len(inputParams.bamFiles))
|
|
311
|
-
minQ_default = (
|
|
346
|
+
minQ_default = (
|
|
347
|
+
minR_default / (len(inputParams.bamFiles))
|
|
348
|
+
) + 0.10 # protect condition number
|
|
312
349
|
matchingExcludeRegionsBedFile_default: Optional[str] = (
|
|
313
350
|
genomeParams.blacklistFile
|
|
314
351
|
)
|
|
315
352
|
return {
|
|
316
|
-
"experimentName": config.get(
|
|
353
|
+
"experimentName": config.get(
|
|
354
|
+
"experimentName", "consenrichExperiment"
|
|
355
|
+
),
|
|
317
356
|
"genomeArgs": genomeParams,
|
|
318
357
|
"inputArgs": inputParams,
|
|
319
358
|
"countingArgs": countingParams,
|
|
@@ -338,8 +377,12 @@ def readConfig(config_path: str) -> Dict[str, Any]:
|
|
|
338
377
|
),
|
|
339
378
|
noGlobal=config.get("observationParams.noGlobal", False),
|
|
340
379
|
numNearest=config.get("observationParams.numNearest", 25),
|
|
341
|
-
localWeight=config.get(
|
|
342
|
-
|
|
380
|
+
localWeight=config.get(
|
|
381
|
+
"observationParams.localWeight", 0.333
|
|
382
|
+
),
|
|
383
|
+
globalWeight=config.get(
|
|
384
|
+
"observationParams.globalWeight", 0.667
|
|
385
|
+
),
|
|
343
386
|
approximationWindowLengthBP=config.get(
|
|
344
387
|
"observationParams.approximationWindowLengthBP", 10000
|
|
345
388
|
),
|
|
@@ -349,26 +392,50 @@ def readConfig(config_path: str) -> Dict[str, Any]:
|
|
|
349
392
|
lowPassFilterType=config.get(
|
|
350
393
|
"observationParams.lowPassFilterType", "median"
|
|
351
394
|
),
|
|
352
|
-
returnCenter=config.get(
|
|
395
|
+
returnCenter=config.get(
|
|
396
|
+
"observationParams.returnCenter", True
|
|
397
|
+
),
|
|
353
398
|
),
|
|
354
399
|
"stateArgs": core.stateParams(
|
|
355
400
|
stateInit=config.get("stateParams.stateInit", 0.0),
|
|
356
|
-
stateCovarInit=config.get(
|
|
401
|
+
stateCovarInit=config.get(
|
|
402
|
+
"stateParams.stateCovarInit", 100.0
|
|
403
|
+
),
|
|
357
404
|
boundState=config.get("stateParams.boundState", True),
|
|
358
|
-
stateLowerBound=config.get(
|
|
359
|
-
|
|
405
|
+
stateLowerBound=config.get(
|
|
406
|
+
"stateParams.stateLowerBound", 0.0
|
|
407
|
+
),
|
|
408
|
+
stateUpperBound=config.get(
|
|
409
|
+
"stateParams.stateUpperBound", 10000.0
|
|
410
|
+
),
|
|
360
411
|
),
|
|
361
412
|
"samArgs": core.samParams(
|
|
362
413
|
samThreads=config.get("samParams.samThreads", 1),
|
|
363
|
-
samFlagExclude=config.get(
|
|
414
|
+
samFlagExclude=config.get(
|
|
415
|
+
"samParams.samFlagExclude", 3844
|
|
416
|
+
),
|
|
364
417
|
oneReadPerBin=config.get("samParams.oneReadPerBin", 0),
|
|
365
418
|
chunkSize=config.get("samParams.chunkSize", 1000000),
|
|
366
419
|
offsetStr=config.get("samParams.offsetStr", "0,0"),
|
|
367
420
|
extendBP=config.get("samParams.extendBP", []),
|
|
368
421
|
maxInsertSize=config.get("samParams.maxInsertSize", 1000),
|
|
369
|
-
pairedEndMode=config.get(
|
|
370
|
-
|
|
371
|
-
|
|
422
|
+
pairedEndMode=config.get(
|
|
423
|
+
"samParams.pairedEndMode",
|
|
424
|
+
1
|
|
425
|
+
if inputParams.pairedEnd is not None
|
|
426
|
+
and int(inputParams.pairedEnd) > 0
|
|
427
|
+
else 0,
|
|
428
|
+
),
|
|
429
|
+
inferFragmentLength=config.get(
|
|
430
|
+
"samParams.inferFragmentLength",
|
|
431
|
+
1
|
|
432
|
+
if inputParams.pairedEnd is not None
|
|
433
|
+
and int(inputParams.pairedEnd) == 0
|
|
434
|
+
else 0,
|
|
435
|
+
),
|
|
436
|
+
countEndsOnly=config.get(
|
|
437
|
+
"samParams.countEndsOnly", False
|
|
438
|
+
),
|
|
372
439
|
),
|
|
373
440
|
"detrendArgs": core.detrendParams(
|
|
374
441
|
detrendWindowLengthBP=config.get(
|
|
@@ -377,7 +444,9 @@ def readConfig(config_path: str) -> Dict[str, Any]:
|
|
|
377
444
|
detrendTrackPercentile=config.get(
|
|
378
445
|
"detrendParams.detrendTrackPercentile", 75.0
|
|
379
446
|
),
|
|
380
|
-
usePolyFilter=config.get(
|
|
447
|
+
usePolyFilter=config.get(
|
|
448
|
+
"detrendParams.usePolyFilter", False
|
|
449
|
+
),
|
|
381
450
|
detrendSavitzkyGolayDegree=config.get(
|
|
382
451
|
"detrendParams.detrendSavitzkyGolayDegree", 2
|
|
383
452
|
),
|
|
@@ -386,12 +455,20 @@ def readConfig(config_path: str) -> Dict[str, Any]:
|
|
|
386
455
|
),
|
|
387
456
|
),
|
|
388
457
|
"matchingArgs": core.matchingParams(
|
|
389
|
-
templateNames=config.get(
|
|
390
|
-
|
|
458
|
+
templateNames=config.get(
|
|
459
|
+
"matchingParams.templateNames", []
|
|
460
|
+
),
|
|
461
|
+
cascadeLevels=config.get(
|
|
462
|
+
"matchingParams.cascadeLevels", [2]
|
|
463
|
+
),
|
|
391
464
|
iters=config.get("matchingParams.iters", 25_000),
|
|
392
465
|
alpha=config.get("matchingParams.alpha", 0.05),
|
|
393
|
-
minMatchLengthBP=config.get(
|
|
394
|
-
|
|
466
|
+
minMatchLengthBP=config.get(
|
|
467
|
+
"matchingParams.minMatchLengthBP", 250
|
|
468
|
+
),
|
|
469
|
+
maxNumMatches=config.get(
|
|
470
|
+
"matchingParams.maxNumMatches", 100_000
|
|
471
|
+
),
|
|
395
472
|
minSignalAtMaxima=config.get(
|
|
396
473
|
"matchingParams.minSignalAtMaxima", "q:0.75"
|
|
397
474
|
),
|
|
@@ -418,7 +495,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
|
|
|
418
495
|
"OR install via conda (conda install -c bioconda ucsc-bedgraphtobigwig)."
|
|
419
496
|
)
|
|
420
497
|
|
|
421
|
-
logger.info(
|
|
498
|
+
logger.info(
|
|
499
|
+
"Attempting to generate bigWig files from bedGraph format..."
|
|
500
|
+
)
|
|
422
501
|
try:
|
|
423
502
|
path_ = shutil.which("bedGraphToBigWig")
|
|
424
503
|
except Exception as e:
|
|
@@ -429,7 +508,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
|
|
|
429
508
|
return
|
|
430
509
|
logger.info(f"Using bedGraphToBigWig from {path_}")
|
|
431
510
|
for suffix in suffixes:
|
|
432
|
-
bedgraph =
|
|
511
|
+
bedgraph = (
|
|
512
|
+
f"consenrichOutput_{experimentName}_{suffix}.bedGraph"
|
|
513
|
+
)
|
|
433
514
|
if not os.path.exists(bedgraph):
|
|
434
515
|
logger.warning(
|
|
435
516
|
f"bedGraph file {bedgraph} does not exist. Skipping bigWig conversion."
|
|
@@ -452,7 +533,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
|
|
|
452
533
|
)
|
|
453
534
|
continue
|
|
454
535
|
if os.path.exists(bigwig) and os.path.getsize(bigwig) > 100:
|
|
455
|
-
logger.info(
|
|
536
|
+
logger.info(
|
|
537
|
+
f"Finished: converted {bedgraph} to {bigwig}."
|
|
538
|
+
)
|
|
456
539
|
|
|
457
540
|
|
|
458
541
|
def main():
|
|
@@ -476,10 +559,16 @@ def main():
|
|
|
476
559
|
"--match-template",
|
|
477
560
|
type=str,
|
|
478
561
|
default="haar",
|
|
479
|
-
choices=[
|
|
562
|
+
choices=[
|
|
563
|
+
x
|
|
564
|
+
for x in pywt.wavelist(kind="discrete")
|
|
565
|
+
if "bio" not in x
|
|
566
|
+
],
|
|
480
567
|
dest="matchTemplate",
|
|
481
568
|
)
|
|
482
|
-
parser.add_argument(
|
|
569
|
+
parser.add_argument(
|
|
570
|
+
"--match-level", type=int, default=2, dest="matchLevel"
|
|
571
|
+
)
|
|
483
572
|
parser.add_argument(
|
|
484
573
|
"--match-alpha", type=float, default=0.05, dest="matchAlpha"
|
|
485
574
|
)
|
|
@@ -508,16 +597,24 @@ def main():
|
|
|
508
597
|
"--match-no-merge", action="store_true", dest="matchNoMerge"
|
|
509
598
|
)
|
|
510
599
|
parser.add_argument(
|
|
511
|
-
"--match-merge-gap",
|
|
600
|
+
"--match-merge-gap",
|
|
601
|
+
type=int,
|
|
602
|
+
default=None,
|
|
603
|
+
dest="matchMergeGapBP",
|
|
512
604
|
)
|
|
513
605
|
parser.add_argument(
|
|
514
|
-
"--match-use-wavelet",
|
|
606
|
+
"--match-use-wavelet",
|
|
607
|
+
action="store_true",
|
|
608
|
+
dest="matchUseWavelet",
|
|
515
609
|
)
|
|
516
610
|
parser.add_argument(
|
|
517
611
|
"--match-seed", type=int, default=42, dest="matchRandSeed"
|
|
518
612
|
)
|
|
519
613
|
parser.add_argument(
|
|
520
|
-
"--match-exclude-bed",
|
|
614
|
+
"--match-exclude-bed",
|
|
615
|
+
type=str,
|
|
616
|
+
default=None,
|
|
617
|
+
dest="matchExcludeBed",
|
|
521
618
|
)
|
|
522
619
|
parser.add_argument(
|
|
523
620
|
"--verbose", action="store_true", help="If set, logs config"
|
|
@@ -601,7 +698,8 @@ def main():
|
|
|
601
698
|
config_truncated = {
|
|
602
699
|
k: v
|
|
603
700
|
for k, v in config.items()
|
|
604
|
-
if k
|
|
701
|
+
if k
|
|
702
|
+
not in ["inputArgs", "genomeArgs", "countingArgs"]
|
|
605
703
|
}
|
|
606
704
|
config_truncated["experimentName"] = experimentName
|
|
607
705
|
config_truncated["inputArgs"] = inputArgs
|
|
@@ -619,7 +717,9 @@ def main():
|
|
|
619
717
|
controlsPresent = checkControlsPresent(inputArgs)
|
|
620
718
|
if args.verbose:
|
|
621
719
|
logger.info(f"controlsPresent: {controlsPresent}")
|
|
622
|
-
readLengthsBamFiles = getReadLengths(
|
|
720
|
+
readLengthsBamFiles = getReadLengths(
|
|
721
|
+
inputArgs, countingArgs, samArgs
|
|
722
|
+
)
|
|
623
723
|
effectiveGenomeSizes = getEffectiveGenomeSizes(
|
|
624
724
|
genomeArgs, readLengthsBamFiles
|
|
625
725
|
)
|
|
@@ -641,11 +741,16 @@ def main():
|
|
|
641
741
|
for bamFile in bamFilesControl
|
|
642
742
|
]
|
|
643
743
|
effectiveGenomeSizesControl = [
|
|
644
|
-
constants.getEffectiveGenomeSize(
|
|
744
|
+
constants.getEffectiveGenomeSize(
|
|
745
|
+
genomeArgs.genomeName, readLength
|
|
746
|
+
)
|
|
645
747
|
for readLength in readLengthsControlBamFiles
|
|
646
748
|
]
|
|
647
749
|
|
|
648
|
-
if
|
|
750
|
+
if (
|
|
751
|
+
scaleFactors is not None
|
|
752
|
+
and scaleFactorsControl is not None
|
|
753
|
+
):
|
|
649
754
|
treatScaleFactors = scaleFactors
|
|
650
755
|
controlScaleFactors = scaleFactorsControl
|
|
651
756
|
# still make sure this is accessible
|
|
@@ -662,7 +767,9 @@ def main():
|
|
|
662
767
|
samArgs.samThreads,
|
|
663
768
|
)
|
|
664
769
|
for bamFile, effectiveGenomeSize, readLength in zip(
|
|
665
|
-
bamFiles,
|
|
770
|
+
bamFiles,
|
|
771
|
+
effectiveGenomeSizes,
|
|
772
|
+
readLengthsBamFiles,
|
|
666
773
|
)
|
|
667
774
|
]
|
|
668
775
|
except Exception:
|
|
@@ -716,7 +823,8 @@ def main():
|
|
|
716
823
|
)
|
|
717
824
|
]
|
|
718
825
|
chromSizesDict = misc_util.getChromSizesDict(
|
|
719
|
-
genomeArgs.chromSizesFile,
|
|
826
|
+
genomeArgs.chromSizesFile,
|
|
827
|
+
excludeChroms=genomeArgs.excludeChroms,
|
|
720
828
|
)
|
|
721
829
|
chromosomes = genomeArgs.chromosomes
|
|
722
830
|
|
|
@@ -731,11 +839,15 @@ def main():
|
|
|
731
839
|
chromosomeStart = max(
|
|
732
840
|
0, (chromosomeStart - (chromosomeStart % stepSize))
|
|
733
841
|
)
|
|
734
|
-
chromosomeEnd = max(
|
|
842
|
+
chromosomeEnd = max(
|
|
843
|
+
0, (chromosomeEnd - (chromosomeEnd % stepSize))
|
|
844
|
+
)
|
|
735
845
|
numIntervals = (
|
|
736
846
|
((chromosomeEnd - chromosomeStart) + stepSize) - 1
|
|
737
847
|
) // stepSize
|
|
738
|
-
intervals = np.arange(
|
|
848
|
+
intervals = np.arange(
|
|
849
|
+
chromosomeStart, chromosomeEnd, stepSize
|
|
850
|
+
)
|
|
739
851
|
chromMat: np.ndarray = np.empty(
|
|
740
852
|
(numSamples, numIntervals), dtype=np.float32
|
|
741
853
|
)
|
|
@@ -752,7 +864,10 @@ def main():
|
|
|
752
864
|
chromosomeStart,
|
|
753
865
|
chromosomeEnd,
|
|
754
866
|
stepSize,
|
|
755
|
-
[
|
|
867
|
+
[
|
|
868
|
+
readLengthsBamFiles[j_],
|
|
869
|
+
readLengthsControlBamFiles[j_],
|
|
870
|
+
],
|
|
756
871
|
[treatScaleFactors[j_], controlScaleFactors[j_]],
|
|
757
872
|
samArgs.oneReadPerBin,
|
|
758
873
|
samArgs.samThreads,
|
|
@@ -764,10 +879,12 @@ def main():
|
|
|
764
879
|
inferFragmentLength=samArgs.inferFragmentLength,
|
|
765
880
|
applyAsinh=countingArgs.applyAsinh,
|
|
766
881
|
applyLog=countingArgs.applyLog,
|
|
767
|
-
countEndsOnly=samArgs.countEndsOnly
|
|
882
|
+
countEndsOnly=samArgs.countEndsOnly,
|
|
768
883
|
)
|
|
769
884
|
if countingArgs.rescaleToTreatmentCoverage:
|
|
770
|
-
finalSF = max(
|
|
885
|
+
finalSF = max(
|
|
886
|
+
1.0, initialTreatmentScaleFactors[j_]
|
|
887
|
+
)
|
|
771
888
|
chromMat[j_, :] = finalSF * (
|
|
772
889
|
pairMatrix[0, :] - pairMatrix[1, :]
|
|
773
890
|
)
|
|
@@ -791,18 +908,25 @@ def main():
|
|
|
791
908
|
inferFragmentLength=samArgs.inferFragmentLength,
|
|
792
909
|
applyAsinh=countingArgs.applyAsinh,
|
|
793
910
|
applyLog=countingArgs.applyLog,
|
|
794
|
-
countEndsOnly=samArgs.countEndsOnly
|
|
911
|
+
countEndsOnly=samArgs.countEndsOnly,
|
|
795
912
|
)
|
|
796
913
|
sparseMap = None
|
|
797
914
|
if genomeArgs.sparseBedFile and not observationArgs.useALV:
|
|
798
|
-
logger.info(
|
|
915
|
+
logger.info(
|
|
916
|
+
f"Building sparse mapping for {chromosome}..."
|
|
917
|
+
)
|
|
799
918
|
sparseMap = core.getSparseMap(
|
|
800
|
-
chromosome,
|
|
919
|
+
chromosome,
|
|
920
|
+
intervals,
|
|
921
|
+
numNearest,
|
|
922
|
+
genomeArgs.sparseBedFile,
|
|
801
923
|
)
|
|
802
924
|
|
|
803
925
|
muncMat = np.empty_like(chromMat, dtype=np.float32)
|
|
804
926
|
for j in range(numSamples):
|
|
805
|
-
logger.info(
|
|
927
|
+
logger.info(
|
|
928
|
+
f"Muncing {j + 1}/{numSamples} for {chromosome}..."
|
|
929
|
+
)
|
|
806
930
|
muncMat[j, :] = core.getMuncTrack(
|
|
807
931
|
chromosome,
|
|
808
932
|
intervals,
|
|
@@ -873,8 +997,11 @@ def main():
|
|
|
873
997
|
)
|
|
874
998
|
if c_ == 0 and len(chromosomes) > 1:
|
|
875
999
|
for file_ in os.listdir("."):
|
|
876
|
-
if file_.startswith(
|
|
877
|
-
|
|
1000
|
+
if file_.startswith(
|
|
1001
|
+
f"consenrichOutput_{experimentName}"
|
|
1002
|
+
) and (
|
|
1003
|
+
file_.endswith(".bedGraph")
|
|
1004
|
+
or file_.endswith(".narrowPeak")
|
|
878
1005
|
):
|
|
879
1006
|
logger.warning(f"Overwriting: {file_}")
|
|
880
1007
|
os.remove(file_)
|
|
@@ -927,13 +1054,22 @@ def main():
|
|
|
927
1054
|
convertBedGraphToBigWig(experimentName, genomeArgs.chromSizesFile)
|
|
928
1055
|
if matchingEnabled and matchingArgs.merge:
|
|
929
1056
|
try:
|
|
1057
|
+
mergeGapBP_ = matchingArgs.mergeGapBP
|
|
1058
|
+
if mergeGapBP_ is None:
|
|
1059
|
+
mergeGapBP_ = (
|
|
1060
|
+
int(matchingArgs.minMatchLengthBP/2) + 1
|
|
1061
|
+
if matchingArgs.minMatchLengthBP is not None
|
|
1062
|
+
else 75
|
|
1063
|
+
)
|
|
930
1064
|
matching.mergeMatches(
|
|
931
1065
|
f"consenrichOutput_{experimentName}_matches.narrowPeak",
|
|
932
|
-
mergeGapBP=
|
|
1066
|
+
mergeGapBP=mergeGapBP_,
|
|
933
1067
|
)
|
|
934
1068
|
|
|
935
1069
|
except Exception as e:
|
|
936
|
-
logger.warning(
|
|
1070
|
+
logger.warning(
|
|
1071
|
+
f"Failed to merge matches...SKIPPING:\n{e}\n\n"
|
|
1072
|
+
)
|
|
937
1073
|
logger.info("Done.")
|
|
938
1074
|
|
|
939
1075
|
|
consenrich/core.py
CHANGED
|
@@ -326,16 +326,16 @@ class matchingParams(NamedTuple):
|
|
|
326
326
|
an empirical null to test significance. See :func:`cconsenrich.csampleBlockStats`.
|
|
327
327
|
:type iters: int
|
|
328
328
|
:param alpha: Primary significance threshold on detected matches. Specifically, the
|
|
329
|
-
|
|
330
|
-
|
|
329
|
+
minimum corr. empirical p-value approximated from randomly sampled blocks in the
|
|
330
|
+
response sequence.
|
|
331
331
|
:type alpha: float
|
|
332
332
|
:param minMatchLengthBP: Within a window of `minMatchLengthBP` length (bp), relative maxima in
|
|
333
333
|
the signal-template convolution must be greater in value than others to qualify as matches.
|
|
334
334
|
:type minMatchLengthBP: int
|
|
335
335
|
:param minSignalAtMaxima: Secondary significance threshold coupled with `alpha`. Requires the *signal value*
|
|
336
|
-
at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale
|
|
337
|
-
If a `float` value is provided, the minimum signal value must be greater
|
|
338
|
-
negative value to disable the threshold*.
|
|
336
|
+
at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale
|
|
337
|
+
to temper genome-wide dynamic range. If a `float` value is provided, the minimum signal value must be greater
|
|
338
|
+
than this (absolute) value. *Set to a negative value to disable the threshold*.
|
|
339
339
|
If a `str` value is provided, looks for 'q:quantileValue', e.g., 'q:0.90'. The
|
|
340
340
|
threshold is then set to the corresponding quantile of the non-zero signal estimates.
|
|
341
341
|
:type minSignalAtMaxima: Optional[str | float]
|