consenrich 0.7.0b1__cp312-cp312-macosx_11_0_arm64.whl → 0.7.1b2__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of consenrich might be problematic. Click here for more details.
- consenrich/cconsenrich.c +174 -174
- consenrich/cconsenrich.cpython-312-darwin.so +0 -0
- consenrich/consenrich.py +273 -77
- consenrich/core.py +11 -9
- consenrich/matching.py +513 -373
- {consenrich-0.7.0b1.dist-info → consenrich-0.7.1b2.dist-info}/METADATA +1 -1
- {consenrich-0.7.0b1.dist-info → consenrich-0.7.1b2.dist-info}/RECORD +11 -11
- {consenrich-0.7.0b1.dist-info → consenrich-0.7.1b2.dist-info}/WHEEL +0 -0
- {consenrich-0.7.0b1.dist-info → consenrich-0.7.1b2.dist-info}/entry_points.txt +0 -0
- {consenrich-0.7.0b1.dist-info → consenrich-0.7.1b2.dist-info}/licenses/LICENSE +0 -0
- {consenrich-0.7.0b1.dist-info → consenrich-0.7.1b2.dist-info}/top_level.txt +0 -0
|
Binary file
|
consenrich/consenrich.py
CHANGED
|
@@ -76,9 +76,14 @@ def getReadLengths(
|
|
|
76
76
|
:return: List of read lengths for each BAM file.
|
|
77
77
|
"""
|
|
78
78
|
if not inputArgs.bamFiles:
|
|
79
|
-
raise ValueError(
|
|
79
|
+
raise ValueError(
|
|
80
|
+
"No BAM files provided in the input arguments."
|
|
81
|
+
)
|
|
80
82
|
|
|
81
|
-
if
|
|
83
|
+
if (
|
|
84
|
+
not isinstance(inputArgs.bamFiles, list)
|
|
85
|
+
or len(inputArgs.bamFiles) == 0
|
|
86
|
+
):
|
|
82
87
|
raise ValueError("bam files list is empty")
|
|
83
88
|
|
|
84
89
|
return [
|
|
@@ -148,7 +153,9 @@ def getInputArgs(config_path: str) -> core.inputParams:
|
|
|
148
153
|
bamFiles = _expandWildCards(bamFilesRaw)
|
|
149
154
|
bamFilesControl = _expandWildCards(bamFilesControlRaw)
|
|
150
155
|
if len(bamFiles) == 0:
|
|
151
|
-
raise ValueError(
|
|
156
|
+
raise ValueError(
|
|
157
|
+
"No BAM files provided in the configuration."
|
|
158
|
+
)
|
|
152
159
|
if (
|
|
153
160
|
len(bamFilesControl) > 0
|
|
154
161
|
and len(bamFilesControl) != len(bamFiles)
|
|
@@ -164,7 +171,11 @@ def getInputArgs(config_path: str) -> core.inputParams:
|
|
|
164
171
|
)
|
|
165
172
|
bamFilesControl = bamFilesControl * len(bamFiles)
|
|
166
173
|
|
|
167
|
-
if
|
|
174
|
+
if (
|
|
175
|
+
not bamFiles
|
|
176
|
+
or not isinstance(bamFiles, list)
|
|
177
|
+
or len(bamFiles) == 0
|
|
178
|
+
):
|
|
168
179
|
raise ValueError("No BAM files found")
|
|
169
180
|
|
|
170
181
|
for i, bamFile in enumerate(bamFiles):
|
|
@@ -176,19 +187,21 @@ def getInputArgs(config_path: str) -> core.inputParams:
|
|
|
176
187
|
|
|
177
188
|
# if we've made it here, we can check pairedEnd
|
|
178
189
|
pairedEndList = misc_util.bamsArePairedEnd(bamFiles)
|
|
179
|
-
_isPairedEnd: Optional[bool] = config.get(
|
|
190
|
+
_isPairedEnd: Optional[bool] = config.get(
|
|
191
|
+
"inputParams.pairedEnd", None
|
|
192
|
+
)
|
|
180
193
|
if _isPairedEnd is None:
|
|
181
194
|
# only set auto if not provided in config
|
|
182
195
|
_isPairedEnd = all(pairedEndList)
|
|
183
196
|
if _isPairedEnd:
|
|
184
|
-
logger.info(
|
|
185
|
-
"Paired-end BAM files detected"
|
|
186
|
-
)
|
|
197
|
+
logger.info("Paired-end BAM files detected")
|
|
187
198
|
else:
|
|
188
|
-
logger.info(
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
199
|
+
logger.info("One or more single-end BAM files detected")
|
|
200
|
+
return core.inputParams(
|
|
201
|
+
bamFiles=bamFiles,
|
|
202
|
+
bamFilesControl=bamFilesControl,
|
|
203
|
+
pairedEnd=_isPairedEnd,
|
|
204
|
+
)
|
|
192
205
|
|
|
193
206
|
|
|
194
207
|
def getGenomeArgs(config_path: str) -> core.genomeParams:
|
|
@@ -200,12 +213,22 @@ def getGenomeArgs(config_path: str) -> core.genomeParams:
|
|
|
200
213
|
blacklistFile: Optional[str] = None
|
|
201
214
|
sparseBedFile: Optional[str] = None
|
|
202
215
|
chromosomes: Optional[List[str]] = None
|
|
203
|
-
excludeChroms: List[str] = config.get(
|
|
204
|
-
|
|
216
|
+
excludeChroms: List[str] = config.get(
|
|
217
|
+
"genomeParams.excludeChroms", []
|
|
218
|
+
)
|
|
219
|
+
excludeForNorm: List[str] = config.get(
|
|
220
|
+
"genomeParams.excludeForNorm", []
|
|
221
|
+
)
|
|
205
222
|
if genome:
|
|
206
|
-
chromSizesFile = constants.getGenomeResourceFile(
|
|
207
|
-
|
|
208
|
-
|
|
223
|
+
chromSizesFile = constants.getGenomeResourceFile(
|
|
224
|
+
genome, "sizes"
|
|
225
|
+
)
|
|
226
|
+
blacklistFile = constants.getGenomeResourceFile(
|
|
227
|
+
genome, "blacklist"
|
|
228
|
+
)
|
|
229
|
+
sparseBedFile = constants.getGenomeResourceFile(
|
|
230
|
+
genome, "sparse"
|
|
231
|
+
)
|
|
209
232
|
if config.get("genomeParams.chromSizesFile", None):
|
|
210
233
|
chromSizesFile = config["genomeParams.chromSizesFile"]
|
|
211
234
|
if config.get("genomeParams.blacklistFile", None):
|
|
@@ -232,10 +255,14 @@ def getGenomeArgs(config_path: str) -> core.genomeParams:
|
|
|
232
255
|
raise ValueError(
|
|
233
256
|
"No chromosomes provided in the configuration and no chromosome sizes file specified."
|
|
234
257
|
)
|
|
235
|
-
chromosomes = [
|
|
258
|
+
chromosomes = [
|
|
259
|
+
chrom.strip() for chrom in chromosomes if chrom.strip()
|
|
260
|
+
]
|
|
236
261
|
if excludeChroms:
|
|
237
262
|
chromosomes = [
|
|
238
|
-
chrom
|
|
263
|
+
chrom
|
|
264
|
+
for chrom in chromosomes
|
|
265
|
+
if chrom not in excludeChroms
|
|
239
266
|
]
|
|
240
267
|
if not chromosomes:
|
|
241
268
|
raise ValueError(
|
|
@@ -259,7 +286,9 @@ def getCountingArgs(config_path: str) -> core.countingParams:
|
|
|
259
286
|
scaleDown = config.get("countingParams.scaleDown", True)
|
|
260
287
|
scaleFactors = config.get("countingParams.scaleFactors", None)
|
|
261
288
|
numReads = config.get("countingParams.numReads", 100)
|
|
262
|
-
scaleFactorsControl = config.get(
|
|
289
|
+
scaleFactorsControl = config.get(
|
|
290
|
+
"countingParams.scaleFactorsControl", None
|
|
291
|
+
)
|
|
263
292
|
applyAsinh = config.get("countingParams.applyAsinh", False)
|
|
264
293
|
applyLog = config.get("countingParams.applyLog", False)
|
|
265
294
|
if applyAsinh and applyLog:
|
|
@@ -271,19 +300,25 @@ def getCountingArgs(config_path: str) -> core.countingParams:
|
|
|
271
300
|
rescaleToTreatmentCoverage = config.get(
|
|
272
301
|
"countingParams.rescaleToTreatmentCoverage", True
|
|
273
302
|
)
|
|
274
|
-
if scaleFactors is not None and not isinstance(
|
|
303
|
+
if scaleFactors is not None and not isinstance(
|
|
304
|
+
scaleFactors, list
|
|
305
|
+
):
|
|
275
306
|
raise ValueError("`scaleFactors` should be a list of floats.")
|
|
276
307
|
if scaleFactorsControl is not None and not isinstance(
|
|
277
308
|
scaleFactorsControl, list
|
|
278
309
|
):
|
|
279
|
-
raise ValueError(
|
|
310
|
+
raise ValueError(
|
|
311
|
+
"`scaleFactorsControl` should be a list of floats."
|
|
312
|
+
)
|
|
280
313
|
if (
|
|
281
314
|
scaleFactors is not None
|
|
282
315
|
and scaleFactorsControl is not None
|
|
283
316
|
and len(scaleFactors) != len(scaleFactorsControl)
|
|
284
317
|
):
|
|
285
318
|
if len(scaleFactorsControl) == 1:
|
|
286
|
-
scaleFactorsControl = scaleFactorsControl * len(
|
|
319
|
+
scaleFactorsControl = scaleFactorsControl * len(
|
|
320
|
+
scaleFactors
|
|
321
|
+
)
|
|
287
322
|
else:
|
|
288
323
|
raise ValueError(
|
|
289
324
|
"control and treatment scale factors: must be equal length or 1 control"
|
|
@@ -308,12 +343,46 @@ def readConfig(config_path: str) -> Dict[str, Any]:
|
|
|
308
343
|
genomeParams = getGenomeArgs(config_path)
|
|
309
344
|
countingParams = getCountingArgs(config_path)
|
|
310
345
|
minR_default = _getMinR(config, len(inputParams.bamFiles))
|
|
311
|
-
minQ_default = (
|
|
346
|
+
minQ_default = (
|
|
347
|
+
minR_default / (len(inputParams.bamFiles))
|
|
348
|
+
) + 0.10 # protect condition number
|
|
349
|
+
|
|
312
350
|
matchingExcludeRegionsBedFile_default: Optional[str] = (
|
|
313
351
|
genomeParams.blacklistFile
|
|
314
352
|
)
|
|
353
|
+
|
|
354
|
+
# apply less aggressive *default* detrending/background removal
|
|
355
|
+
# ...IF input controls are present. In either case, respect
|
|
356
|
+
# ...user-specified params
|
|
357
|
+
detrendWindowLengthBP_: int = -1
|
|
358
|
+
detrendSavitzkyGolayDegree_: int = -1
|
|
359
|
+
|
|
360
|
+
if (
|
|
361
|
+
inputParams.bamFilesControl is not None
|
|
362
|
+
and len(inputParams.bamFilesControl) > 0
|
|
363
|
+
):
|
|
364
|
+
detrendWindowLengthBP_ = config.get(
|
|
365
|
+
"detrendParams.detrendWindowLengthBP",
|
|
366
|
+
25_000,
|
|
367
|
+
)
|
|
368
|
+
detrendSavitzkyGolayDegree_ = config.get(
|
|
369
|
+
"detrendParams.detrendSavitzkyGolayDegree",
|
|
370
|
+
1,
|
|
371
|
+
)
|
|
372
|
+
else:
|
|
373
|
+
detrendWindowLengthBP_ = config.get(
|
|
374
|
+
"detrendParams.detrendWindowLengthBP",
|
|
375
|
+
10_000,
|
|
376
|
+
)
|
|
377
|
+
detrendSavitzkyGolayDegree_ = config.get(
|
|
378
|
+
"detrendParams.detrendSavitzkyGolayDegree",
|
|
379
|
+
2,
|
|
380
|
+
)
|
|
381
|
+
|
|
315
382
|
return {
|
|
316
|
-
"experimentName": config.get(
|
|
383
|
+
"experimentName": config.get(
|
|
384
|
+
"experimentName", "consenrichExperiment"
|
|
385
|
+
),
|
|
317
386
|
"genomeArgs": genomeParams,
|
|
318
387
|
"inputArgs": inputParams,
|
|
319
388
|
"countingArgs": countingParams,
|
|
@@ -338,60 +407,112 @@ def readConfig(config_path: str) -> Dict[str, Any]:
|
|
|
338
407
|
),
|
|
339
408
|
noGlobal=config.get("observationParams.noGlobal", False),
|
|
340
409
|
numNearest=config.get("observationParams.numNearest", 25),
|
|
341
|
-
localWeight=config.get(
|
|
342
|
-
|
|
410
|
+
localWeight=config.get(
|
|
411
|
+
"observationParams.localWeight",
|
|
412
|
+
0.333,
|
|
413
|
+
),
|
|
414
|
+
globalWeight=config.get(
|
|
415
|
+
"observationParams.globalWeight",
|
|
416
|
+
0.667,
|
|
417
|
+
),
|
|
343
418
|
approximationWindowLengthBP=config.get(
|
|
344
|
-
"observationParams.approximationWindowLengthBP",
|
|
419
|
+
"observationParams.approximationWindowLengthBP",
|
|
420
|
+
10000,
|
|
345
421
|
),
|
|
346
422
|
lowPassWindowLengthBP=config.get(
|
|
347
|
-
"observationParams.lowPassWindowLengthBP",
|
|
423
|
+
"observationParams.lowPassWindowLengthBP",
|
|
424
|
+
20000,
|
|
348
425
|
),
|
|
349
426
|
lowPassFilterType=config.get(
|
|
350
|
-
"observationParams.lowPassFilterType",
|
|
427
|
+
"observationParams.lowPassFilterType",
|
|
428
|
+
"median",
|
|
429
|
+
),
|
|
430
|
+
returnCenter=config.get(
|
|
431
|
+
"observationParams.returnCenter",
|
|
432
|
+
True,
|
|
351
433
|
),
|
|
352
|
-
returnCenter=config.get("observationParams.returnCenter", True),
|
|
353
434
|
),
|
|
354
435
|
"stateArgs": core.stateParams(
|
|
355
436
|
stateInit=config.get("stateParams.stateInit", 0.0),
|
|
356
|
-
stateCovarInit=config.get(
|
|
437
|
+
stateCovarInit=config.get(
|
|
438
|
+
"stateParams.stateCovarInit",
|
|
439
|
+
100.0,
|
|
440
|
+
),
|
|
357
441
|
boundState=config.get("stateParams.boundState", True),
|
|
358
|
-
stateLowerBound=config.get(
|
|
359
|
-
|
|
442
|
+
stateLowerBound=config.get(
|
|
443
|
+
"stateParams.stateLowerBound",
|
|
444
|
+
0.0,
|
|
445
|
+
),
|
|
446
|
+
stateUpperBound=config.get(
|
|
447
|
+
"stateParams.stateUpperBound",
|
|
448
|
+
10000.0,
|
|
449
|
+
),
|
|
360
450
|
),
|
|
361
451
|
"samArgs": core.samParams(
|
|
362
452
|
samThreads=config.get("samParams.samThreads", 1),
|
|
363
|
-
samFlagExclude=config.get(
|
|
453
|
+
samFlagExclude=config.get(
|
|
454
|
+
"samParams.samFlagExclude", 3844
|
|
455
|
+
),
|
|
364
456
|
oneReadPerBin=config.get("samParams.oneReadPerBin", 0),
|
|
365
457
|
chunkSize=config.get("samParams.chunkSize", 1000000),
|
|
366
458
|
offsetStr=config.get("samParams.offsetStr", "0,0"),
|
|
367
459
|
extendBP=config.get("samParams.extendBP", []),
|
|
368
460
|
maxInsertSize=config.get("samParams.maxInsertSize", 1000),
|
|
369
|
-
pairedEndMode=config.get(
|
|
370
|
-
|
|
371
|
-
|
|
461
|
+
pairedEndMode=config.get(
|
|
462
|
+
"samParams.pairedEndMode",
|
|
463
|
+
1
|
|
464
|
+
if inputParams.pairedEnd is not None
|
|
465
|
+
and int(inputParams.pairedEnd) > 0
|
|
466
|
+
else 0,
|
|
467
|
+
),
|
|
468
|
+
inferFragmentLength=config.get(
|
|
469
|
+
"samParams.inferFragmentLength",
|
|
470
|
+
1
|
|
471
|
+
if inputParams.pairedEnd is not None
|
|
472
|
+
and int(inputParams.pairedEnd) == 0
|
|
473
|
+
else 0,
|
|
474
|
+
),
|
|
475
|
+
countEndsOnly=config.get(
|
|
476
|
+
"samParams.countEndsOnly",
|
|
477
|
+
False,
|
|
478
|
+
),
|
|
372
479
|
),
|
|
373
480
|
"detrendArgs": core.detrendParams(
|
|
374
|
-
detrendWindowLengthBP=
|
|
375
|
-
"detrendParams.detrendWindowLengthBP", 10000
|
|
376
|
-
),
|
|
481
|
+
detrendWindowLengthBP=detrendWindowLengthBP_,
|
|
377
482
|
detrendTrackPercentile=config.get(
|
|
378
|
-
"detrendParams.detrendTrackPercentile",
|
|
483
|
+
"detrendParams.detrendTrackPercentile",
|
|
484
|
+
75,
|
|
485
|
+
),
|
|
486
|
+
usePolyFilter=config.get(
|
|
487
|
+
"detrendParams.usePolyFilter",
|
|
488
|
+
False,
|
|
379
489
|
),
|
|
380
|
-
usePolyFilter=config.get("detrendParams.usePolyFilter", False),
|
|
381
490
|
detrendSavitzkyGolayDegree=config.get(
|
|
382
|
-
"detrendParams.detrendSavitzkyGolayDegree",
|
|
491
|
+
"detrendParams.detrendSavitzkyGolayDegree",
|
|
492
|
+
detrendSavitzkyGolayDegree_,
|
|
383
493
|
),
|
|
384
494
|
useOrderStatFilter=config.get(
|
|
385
|
-
"detrendParams.useOrderStatFilter",
|
|
495
|
+
"detrendParams.useOrderStatFilter",
|
|
496
|
+
True,
|
|
386
497
|
),
|
|
387
498
|
),
|
|
388
499
|
"matchingArgs": core.matchingParams(
|
|
389
|
-
templateNames=config.get(
|
|
390
|
-
|
|
500
|
+
templateNames=config.get(
|
|
501
|
+
"matchingParams.templateNames",
|
|
502
|
+
[],
|
|
503
|
+
),
|
|
504
|
+
cascadeLevels=config.get(
|
|
505
|
+
"matchingParams.cascadeLevels",
|
|
506
|
+
[],
|
|
507
|
+
),
|
|
391
508
|
iters=config.get("matchingParams.iters", 25_000),
|
|
392
509
|
alpha=config.get("matchingParams.alpha", 0.05),
|
|
393
|
-
minMatchLengthBP=config.get(
|
|
394
|
-
|
|
510
|
+
minMatchLengthBP=config.get(
|
|
511
|
+
"matchingParams.minMatchLengthBP", 250
|
|
512
|
+
),
|
|
513
|
+
maxNumMatches=config.get(
|
|
514
|
+
"matchingParams.maxNumMatches", 100_000
|
|
515
|
+
),
|
|
395
516
|
minSignalAtMaxima=config.get(
|
|
396
517
|
"matchingParams.minSignalAtMaxima", "q:0.75"
|
|
397
518
|
),
|
|
@@ -418,7 +539,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
|
|
|
418
539
|
"OR install via conda (conda install -c bioconda ucsc-bedgraphtobigwig)."
|
|
419
540
|
)
|
|
420
541
|
|
|
421
|
-
logger.info(
|
|
542
|
+
logger.info(
|
|
543
|
+
"Attempting to generate bigWig files from bedGraph format..."
|
|
544
|
+
)
|
|
422
545
|
try:
|
|
423
546
|
path_ = shutil.which("bedGraphToBigWig")
|
|
424
547
|
except Exception as e:
|
|
@@ -429,7 +552,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
|
|
|
429
552
|
return
|
|
430
553
|
logger.info(f"Using bedGraphToBigWig from {path_}")
|
|
431
554
|
for suffix in suffixes:
|
|
432
|
-
bedgraph =
|
|
555
|
+
bedgraph = (
|
|
556
|
+
f"consenrichOutput_{experimentName}_{suffix}.bedGraph"
|
|
557
|
+
)
|
|
433
558
|
if not os.path.exists(bedgraph):
|
|
434
559
|
logger.warning(
|
|
435
560
|
f"bedGraph file {bedgraph} does not exist. Skipping bigWig conversion."
|
|
@@ -452,7 +577,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
|
|
|
452
577
|
)
|
|
453
578
|
continue
|
|
454
579
|
if os.path.exists(bigwig) and os.path.getsize(bigwig) > 100:
|
|
455
|
-
logger.info(
|
|
580
|
+
logger.info(
|
|
581
|
+
f"Finished: converted {bedgraph} to {bigwig}."
|
|
582
|
+
)
|
|
456
583
|
|
|
457
584
|
|
|
458
585
|
def main():
|
|
@@ -476,10 +603,16 @@ def main():
|
|
|
476
603
|
"--match-template",
|
|
477
604
|
type=str,
|
|
478
605
|
default="haar",
|
|
479
|
-
choices=[
|
|
606
|
+
choices=[
|
|
607
|
+
x
|
|
608
|
+
for x in pywt.wavelist(kind="discrete")
|
|
609
|
+
if "bio" not in x
|
|
610
|
+
],
|
|
480
611
|
dest="matchTemplate",
|
|
481
612
|
)
|
|
482
|
-
parser.add_argument(
|
|
613
|
+
parser.add_argument(
|
|
614
|
+
"--match-level", type=int, default=2, dest="matchLevel"
|
|
615
|
+
)
|
|
483
616
|
parser.add_argument(
|
|
484
617
|
"--match-alpha", type=float, default=0.05, dest="matchAlpha"
|
|
485
618
|
)
|
|
@@ -508,16 +641,24 @@ def main():
|
|
|
508
641
|
"--match-no-merge", action="store_true", dest="matchNoMerge"
|
|
509
642
|
)
|
|
510
643
|
parser.add_argument(
|
|
511
|
-
"--match-merge-gap",
|
|
644
|
+
"--match-merge-gap",
|
|
645
|
+
type=int,
|
|
646
|
+
default=None,
|
|
647
|
+
dest="matchMergeGapBP",
|
|
512
648
|
)
|
|
513
649
|
parser.add_argument(
|
|
514
|
-
"--match-use-wavelet",
|
|
650
|
+
"--match-use-wavelet",
|
|
651
|
+
action="store_true",
|
|
652
|
+
dest="matchUseWavelet",
|
|
515
653
|
)
|
|
516
654
|
parser.add_argument(
|
|
517
655
|
"--match-seed", type=int, default=42, dest="matchRandSeed"
|
|
518
656
|
)
|
|
519
657
|
parser.add_argument(
|
|
520
|
-
"--match-exclude-bed",
|
|
658
|
+
"--match-exclude-bed",
|
|
659
|
+
type=str,
|
|
660
|
+
default=None,
|
|
661
|
+
dest="matchExcludeBed",
|
|
521
662
|
)
|
|
522
663
|
parser.add_argument(
|
|
523
664
|
"--verbose", action="store_true", help="If set, logs config"
|
|
@@ -595,13 +736,17 @@ def main():
|
|
|
595
736
|
scaleDown = countingArgs.scaleDown
|
|
596
737
|
extendBP_ = core.resolveExtendBP(samArgs.extendBP, bamFiles)
|
|
597
738
|
initialTreatmentScaleFactors = []
|
|
739
|
+
minMatchLengthBP_: Optional[int] = matchingArgs.minMatchLengthBP
|
|
740
|
+
mergeGapBP_: Optional[int] = matchingArgs.mergeGapBP
|
|
741
|
+
|
|
598
742
|
if args.verbose:
|
|
599
743
|
try:
|
|
600
744
|
logger.info("Configuration:\n")
|
|
601
745
|
config_truncated = {
|
|
602
746
|
k: v
|
|
603
747
|
for k, v in config.items()
|
|
604
|
-
if k
|
|
748
|
+
if k
|
|
749
|
+
not in ["inputArgs", "genomeArgs", "countingArgs"]
|
|
605
750
|
}
|
|
606
751
|
config_truncated["experimentName"] = experimentName
|
|
607
752
|
config_truncated["inputArgs"] = inputArgs
|
|
@@ -619,7 +764,9 @@ def main():
|
|
|
619
764
|
controlsPresent = checkControlsPresent(inputArgs)
|
|
620
765
|
if args.verbose:
|
|
621
766
|
logger.info(f"controlsPresent: {controlsPresent}")
|
|
622
|
-
readLengthsBamFiles = getReadLengths(
|
|
767
|
+
readLengthsBamFiles = getReadLengths(
|
|
768
|
+
inputArgs, countingArgs, samArgs
|
|
769
|
+
)
|
|
623
770
|
effectiveGenomeSizes = getEffectiveGenomeSizes(
|
|
624
771
|
genomeArgs, readLengthsBamFiles
|
|
625
772
|
)
|
|
@@ -641,11 +788,16 @@ def main():
|
|
|
641
788
|
for bamFile in bamFilesControl
|
|
642
789
|
]
|
|
643
790
|
effectiveGenomeSizesControl = [
|
|
644
|
-
constants.getEffectiveGenomeSize(
|
|
791
|
+
constants.getEffectiveGenomeSize(
|
|
792
|
+
genomeArgs.genomeName, readLength
|
|
793
|
+
)
|
|
645
794
|
for readLength in readLengthsControlBamFiles
|
|
646
795
|
]
|
|
647
796
|
|
|
648
|
-
if
|
|
797
|
+
if (
|
|
798
|
+
scaleFactors is not None
|
|
799
|
+
and scaleFactorsControl is not None
|
|
800
|
+
):
|
|
649
801
|
treatScaleFactors = scaleFactors
|
|
650
802
|
controlScaleFactors = scaleFactorsControl
|
|
651
803
|
# still make sure this is accessible
|
|
@@ -662,7 +814,9 @@ def main():
|
|
|
662
814
|
samArgs.samThreads,
|
|
663
815
|
)
|
|
664
816
|
for bamFile, effectiveGenomeSize, readLength in zip(
|
|
665
|
-
bamFiles,
|
|
817
|
+
bamFiles,
|
|
818
|
+
effectiveGenomeSizes,
|
|
819
|
+
readLengthsBamFiles,
|
|
666
820
|
)
|
|
667
821
|
]
|
|
668
822
|
except Exception:
|
|
@@ -716,7 +870,8 @@ def main():
|
|
|
716
870
|
)
|
|
717
871
|
]
|
|
718
872
|
chromSizesDict = misc_util.getChromSizesDict(
|
|
719
|
-
genomeArgs.chromSizesFile,
|
|
873
|
+
genomeArgs.chromSizesFile,
|
|
874
|
+
excludeChroms=genomeArgs.excludeChroms,
|
|
720
875
|
)
|
|
721
876
|
chromosomes = genomeArgs.chromosomes
|
|
722
877
|
|
|
@@ -731,11 +886,15 @@ def main():
|
|
|
731
886
|
chromosomeStart = max(
|
|
732
887
|
0, (chromosomeStart - (chromosomeStart % stepSize))
|
|
733
888
|
)
|
|
734
|
-
chromosomeEnd = max(
|
|
889
|
+
chromosomeEnd = max(
|
|
890
|
+
0, (chromosomeEnd - (chromosomeEnd % stepSize))
|
|
891
|
+
)
|
|
735
892
|
numIntervals = (
|
|
736
893
|
((chromosomeEnd - chromosomeStart) + stepSize) - 1
|
|
737
894
|
) // stepSize
|
|
738
|
-
intervals = np.arange(
|
|
895
|
+
intervals = np.arange(
|
|
896
|
+
chromosomeStart, chromosomeEnd, stepSize
|
|
897
|
+
)
|
|
739
898
|
chromMat: np.ndarray = np.empty(
|
|
740
899
|
(numSamples, numIntervals), dtype=np.float32
|
|
741
900
|
)
|
|
@@ -752,7 +911,10 @@ def main():
|
|
|
752
911
|
chromosomeStart,
|
|
753
912
|
chromosomeEnd,
|
|
754
913
|
stepSize,
|
|
755
|
-
[
|
|
914
|
+
[
|
|
915
|
+
readLengthsBamFiles[j_],
|
|
916
|
+
readLengthsControlBamFiles[j_],
|
|
917
|
+
],
|
|
756
918
|
[treatScaleFactors[j_], controlScaleFactors[j_]],
|
|
757
919
|
samArgs.oneReadPerBin,
|
|
758
920
|
samArgs.samThreads,
|
|
@@ -764,10 +926,12 @@ def main():
|
|
|
764
926
|
inferFragmentLength=samArgs.inferFragmentLength,
|
|
765
927
|
applyAsinh=countingArgs.applyAsinh,
|
|
766
928
|
applyLog=countingArgs.applyLog,
|
|
767
|
-
countEndsOnly=samArgs.countEndsOnly
|
|
929
|
+
countEndsOnly=samArgs.countEndsOnly,
|
|
768
930
|
)
|
|
769
931
|
if countingArgs.rescaleToTreatmentCoverage:
|
|
770
|
-
finalSF = max(
|
|
932
|
+
finalSF = max(
|
|
933
|
+
1.0, initialTreatmentScaleFactors[j_]
|
|
934
|
+
)
|
|
771
935
|
chromMat[j_, :] = finalSF * (
|
|
772
936
|
pairMatrix[0, :] - pairMatrix[1, :]
|
|
773
937
|
)
|
|
@@ -791,18 +955,25 @@ def main():
|
|
|
791
955
|
inferFragmentLength=samArgs.inferFragmentLength,
|
|
792
956
|
applyAsinh=countingArgs.applyAsinh,
|
|
793
957
|
applyLog=countingArgs.applyLog,
|
|
794
|
-
countEndsOnly=samArgs.countEndsOnly
|
|
958
|
+
countEndsOnly=samArgs.countEndsOnly,
|
|
795
959
|
)
|
|
796
960
|
sparseMap = None
|
|
797
961
|
if genomeArgs.sparseBedFile and not observationArgs.useALV:
|
|
798
|
-
logger.info(
|
|
962
|
+
logger.info(
|
|
963
|
+
f"Building sparse mapping for {chromosome}..."
|
|
964
|
+
)
|
|
799
965
|
sparseMap = core.getSparseMap(
|
|
800
|
-
chromosome,
|
|
966
|
+
chromosome,
|
|
967
|
+
intervals,
|
|
968
|
+
numNearest,
|
|
969
|
+
genomeArgs.sparseBedFile,
|
|
801
970
|
)
|
|
802
971
|
|
|
803
972
|
muncMat = np.empty_like(chromMat, dtype=np.float32)
|
|
804
973
|
for j in range(numSamples):
|
|
805
|
-
logger.info(
|
|
974
|
+
logger.info(
|
|
975
|
+
f"Muncing {j + 1}/{numSamples} for {chromosome}..."
|
|
976
|
+
)
|
|
806
977
|
muncMat[j, :] = core.getMuncTrack(
|
|
807
978
|
chromosome,
|
|
808
979
|
intervals,
|
|
@@ -873,8 +1044,11 @@ def main():
|
|
|
873
1044
|
)
|
|
874
1045
|
if c_ == 0 and len(chromosomes) > 1:
|
|
875
1046
|
for file_ in os.listdir("."):
|
|
876
|
-
if file_.startswith(
|
|
877
|
-
|
|
1047
|
+
if file_.startswith(
|
|
1048
|
+
f"consenrichOutput_{experimentName}"
|
|
1049
|
+
) and (
|
|
1050
|
+
file_.endswith(".bedGraph")
|
|
1051
|
+
or file_.endswith(".narrowPeak")
|
|
878
1052
|
):
|
|
879
1053
|
logger.warning(f"Overwriting: {file_}")
|
|
880
1054
|
os.remove(file_)
|
|
@@ -894,6 +1068,18 @@ def main():
|
|
|
894
1068
|
)
|
|
895
1069
|
try:
|
|
896
1070
|
if matchingEnabled:
|
|
1071
|
+
if (
|
|
1072
|
+
minMatchLengthBP_ is None
|
|
1073
|
+
or minMatchLengthBP_ <= 0
|
|
1074
|
+
):
|
|
1075
|
+
minMatchLengthBP_ = (
|
|
1076
|
+
matching.autoMinLengthIntervals(x_)
|
|
1077
|
+
* (intervals[1] - intervals[0])
|
|
1078
|
+
)
|
|
1079
|
+
|
|
1080
|
+
if mergeGapBP_ is None:
|
|
1081
|
+
mergeGapBP_ = int(minMatchLengthBP_ / 2) + 1
|
|
1082
|
+
|
|
897
1083
|
matchingDF = matching.matchWavelet(
|
|
898
1084
|
chromosome,
|
|
899
1085
|
intervals,
|
|
@@ -902,7 +1088,7 @@ def main():
|
|
|
902
1088
|
matchingArgs.cascadeLevels,
|
|
903
1089
|
matchingArgs.iters,
|
|
904
1090
|
matchingArgs.alpha,
|
|
905
|
-
|
|
1091
|
+
minMatchLengthBP_,
|
|
906
1092
|
matchingArgs.maxNumMatches,
|
|
907
1093
|
matchingArgs.minSignalAtMaxima,
|
|
908
1094
|
useScalingFunction=matchingArgs.useScalingFunction,
|
|
@@ -927,13 +1113,23 @@ def main():
|
|
|
927
1113
|
convertBedGraphToBigWig(experimentName, genomeArgs.chromSizesFile)
|
|
928
1114
|
if matchingEnabled and matchingArgs.merge:
|
|
929
1115
|
try:
|
|
1116
|
+
mergeGapBP_ = matchingArgs.mergeGapBP
|
|
1117
|
+
if mergeGapBP_ is None or mergeGapBP_ <= 0:
|
|
1118
|
+
mergeGapBP_ = (
|
|
1119
|
+
int(minMatchLengthBP_ / 2) + 1
|
|
1120
|
+
if minMatchLengthBP_ is not None
|
|
1121
|
+
and minMatchLengthBP_ >= 0
|
|
1122
|
+
else 75
|
|
1123
|
+
)
|
|
930
1124
|
matching.mergeMatches(
|
|
931
1125
|
f"consenrichOutput_{experimentName}_matches.narrowPeak",
|
|
932
|
-
mergeGapBP=
|
|
1126
|
+
mergeGapBP=mergeGapBP_,
|
|
933
1127
|
)
|
|
934
1128
|
|
|
935
1129
|
except Exception as e:
|
|
936
|
-
logger.warning(
|
|
1130
|
+
logger.warning(
|
|
1131
|
+
f"Failed to merge matches...SKIPPING:\n{e}\n\n"
|
|
1132
|
+
)
|
|
937
1133
|
logger.info("Done.")
|
|
938
1134
|
|
|
939
1135
|
|
consenrich/core.py
CHANGED
|
@@ -317,25 +317,27 @@ class matchingParams(NamedTuple):
|
|
|
317
317
|
|
|
318
318
|
See :ref:`matching` for an overview of the approach.
|
|
319
319
|
|
|
320
|
-
:param templateNames: A list of str values -- wavelet
|
|
320
|
+
:param templateNames: A list of str values -- each entry references a mother wavelet (or its corresponding scaling function). e.g., `[haar, db2]`
|
|
321
321
|
:type templateNames: List[str]
|
|
322
|
-
:param cascadeLevels:
|
|
323
|
-
the
|
|
322
|
+
:param cascadeLevels: Number of cascade iterations used to approximate each template (wavelet or scaling function).
|
|
323
|
+
Must have the same length as `templateNames`, with each entry aligned to the
|
|
324
|
+
corresponding template. e.g., given templateNames `[haar, db2]`, then `[2,2]` would use 2 cascade levels for both templates.
|
|
324
325
|
:type cascadeLevels: List[int]
|
|
325
326
|
:param iters: Number of random blocks to sample in the response sequence while building
|
|
326
327
|
an empirical null to test significance. See :func:`cconsenrich.csampleBlockStats`.
|
|
327
328
|
:type iters: int
|
|
328
329
|
:param alpha: Primary significance threshold on detected matches. Specifically, the
|
|
329
|
-
|
|
330
|
-
|
|
330
|
+
minimum corr. empirical p-value approximated from randomly sampled blocks in the
|
|
331
|
+
response sequence.
|
|
331
332
|
:type alpha: float
|
|
332
333
|
:param minMatchLengthBP: Within a window of `minMatchLengthBP` length (bp), relative maxima in
|
|
333
334
|
the signal-template convolution must be greater in value than others to qualify as matches.
|
|
334
|
-
|
|
335
|
+
If set to a value less than 1, the minimum length is determined via :func:`consenrich.matching.autoMinLengthIntervals`.
|
|
336
|
+
If set to `None`, defaults to 250 bp.
|
|
335
337
|
:param minSignalAtMaxima: Secondary significance threshold coupled with `alpha`. Requires the *signal value*
|
|
336
|
-
at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale
|
|
337
|
-
If a `float` value is provided, the minimum signal value must be greater
|
|
338
|
-
negative value to disable the threshold*.
|
|
338
|
+
at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale
|
|
339
|
+
to temper genome-wide dynamic range. If a `float` value is provided, the minimum signal value must be greater
|
|
340
|
+
than this (absolute) value. *Set to a negative value to disable the threshold*.
|
|
339
341
|
If a `str` value is provided, looks for 'q:quantileValue', e.g., 'q:0.90'. The
|
|
340
342
|
threshold is then set to the corresponding quantile of the non-zero signal estimates.
|
|
341
343
|
:type minSignalAtMaxima: Optional[str | float]
|