consenrich 0.6.3b1__cp311-cp311-macosx_11_0_arm64.whl → 0.7.1b1__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of consenrich might be problematic. Click here for more details.
- consenrich/cconsenrich.c +404 -404
- consenrich/cconsenrich.cpython-311-darwin.so +0 -0
- consenrich/consenrich.py +216 -62
- consenrich/core.py +30 -17
- consenrich/detrorm.py +12 -3
- consenrich/matching.py +444 -369
- consenrich/misc_util.py +29 -0
- {consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/METADATA +3 -3
- {consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/RECORD +13 -13
- {consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/WHEEL +0 -0
- {consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/entry_points.txt +0 -0
- {consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/licenses/LICENSE +0 -0
- {consenrich-0.6.3b1.dist-info → consenrich-0.7.1b1.dist-info}/top_level.txt +0 -0
|
Binary file
|
consenrich/consenrich.py
CHANGED
|
@@ -39,11 +39,11 @@ def _listOrEmpty(list_):
|
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
def _getMinR(cfg, numBams: int) -> float:
|
|
42
|
+
fallBackMinR: float = 1.0
|
|
42
43
|
try:
|
|
43
44
|
raw = cfg.get("observationParams.minR", None)
|
|
44
|
-
return float(raw) if raw is not None else
|
|
45
|
+
return float(raw) if raw is not None else fallBackMinR
|
|
45
46
|
except (TypeError, ValueError, KeyError):
|
|
46
|
-
fallBackMinR: float = 1.0e-2
|
|
47
47
|
logger.warning(
|
|
48
48
|
f"Invalid or missing 'observationParams.minR' in config. Using `{fallBackMinR}`."
|
|
49
49
|
)
|
|
@@ -76,9 +76,14 @@ def getReadLengths(
|
|
|
76
76
|
:return: List of read lengths for each BAM file.
|
|
77
77
|
"""
|
|
78
78
|
if not inputArgs.bamFiles:
|
|
79
|
-
raise ValueError(
|
|
79
|
+
raise ValueError(
|
|
80
|
+
"No BAM files provided in the input arguments."
|
|
81
|
+
)
|
|
80
82
|
|
|
81
|
-
if
|
|
83
|
+
if (
|
|
84
|
+
not isinstance(inputArgs.bamFiles, list)
|
|
85
|
+
or len(inputArgs.bamFiles) == 0
|
|
86
|
+
):
|
|
82
87
|
raise ValueError("bam files list is empty")
|
|
83
88
|
|
|
84
89
|
return [
|
|
@@ -148,7 +153,9 @@ def getInputArgs(config_path: str) -> core.inputParams:
|
|
|
148
153
|
bamFiles = _expandWildCards(bamFilesRaw)
|
|
149
154
|
bamFilesControl = _expandWildCards(bamFilesControlRaw)
|
|
150
155
|
if len(bamFiles) == 0:
|
|
151
|
-
raise ValueError(
|
|
156
|
+
raise ValueError(
|
|
157
|
+
"No BAM files provided in the configuration."
|
|
158
|
+
)
|
|
152
159
|
if (
|
|
153
160
|
len(bamFilesControl) > 0
|
|
154
161
|
and len(bamFilesControl) != len(bamFiles)
|
|
@@ -164,7 +171,11 @@ def getInputArgs(config_path: str) -> core.inputParams:
|
|
|
164
171
|
)
|
|
165
172
|
bamFilesControl = bamFilesControl * len(bamFiles)
|
|
166
173
|
|
|
167
|
-
if
|
|
174
|
+
if (
|
|
175
|
+
not bamFiles
|
|
176
|
+
or not isinstance(bamFiles, list)
|
|
177
|
+
or len(bamFiles) == 0
|
|
178
|
+
):
|
|
168
179
|
raise ValueError("No BAM files found")
|
|
169
180
|
|
|
170
181
|
for i, bamFile in enumerate(bamFiles):
|
|
@@ -174,7 +185,23 @@ def getInputArgs(config_path: str) -> core.inputParams:
|
|
|
174
185
|
for i, bamFile in enumerate(bamFilesControl):
|
|
175
186
|
misc_util.checkBamFile(bamFile)
|
|
176
187
|
|
|
177
|
-
|
|
188
|
+
# if we've made it here, we can check pairedEnd
|
|
189
|
+
pairedEndList = misc_util.bamsArePairedEnd(bamFiles)
|
|
190
|
+
_isPairedEnd: Optional[bool] = config.get(
|
|
191
|
+
"inputParams.pairedEnd", None
|
|
192
|
+
)
|
|
193
|
+
if _isPairedEnd is None:
|
|
194
|
+
# only set auto if not provided in config
|
|
195
|
+
_isPairedEnd = all(pairedEndList)
|
|
196
|
+
if _isPairedEnd:
|
|
197
|
+
logger.info("Paired-end BAM files detected")
|
|
198
|
+
else:
|
|
199
|
+
logger.info("One or more single-end BAM files detected")
|
|
200
|
+
return core.inputParams(
|
|
201
|
+
bamFiles=bamFiles,
|
|
202
|
+
bamFilesControl=bamFilesControl,
|
|
203
|
+
pairedEnd=_isPairedEnd,
|
|
204
|
+
)
|
|
178
205
|
|
|
179
206
|
|
|
180
207
|
def getGenomeArgs(config_path: str) -> core.genomeParams:
|
|
@@ -186,12 +213,22 @@ def getGenomeArgs(config_path: str) -> core.genomeParams:
|
|
|
186
213
|
blacklistFile: Optional[str] = None
|
|
187
214
|
sparseBedFile: Optional[str] = None
|
|
188
215
|
chromosomes: Optional[List[str]] = None
|
|
189
|
-
excludeChroms: List[str] = config.get(
|
|
190
|
-
|
|
216
|
+
excludeChroms: List[str] = config.get(
|
|
217
|
+
"genomeParams.excludeChroms", []
|
|
218
|
+
)
|
|
219
|
+
excludeForNorm: List[str] = config.get(
|
|
220
|
+
"genomeParams.excludeForNorm", []
|
|
221
|
+
)
|
|
191
222
|
if genome:
|
|
192
|
-
chromSizesFile = constants.getGenomeResourceFile(
|
|
193
|
-
|
|
194
|
-
|
|
223
|
+
chromSizesFile = constants.getGenomeResourceFile(
|
|
224
|
+
genome, "sizes"
|
|
225
|
+
)
|
|
226
|
+
blacklistFile = constants.getGenomeResourceFile(
|
|
227
|
+
genome, "blacklist"
|
|
228
|
+
)
|
|
229
|
+
sparseBedFile = constants.getGenomeResourceFile(
|
|
230
|
+
genome, "sparse"
|
|
231
|
+
)
|
|
195
232
|
if config.get("genomeParams.chromSizesFile", None):
|
|
196
233
|
chromSizesFile = config["genomeParams.chromSizesFile"]
|
|
197
234
|
if config.get("genomeParams.blacklistFile", None):
|
|
@@ -218,10 +255,14 @@ def getGenomeArgs(config_path: str) -> core.genomeParams:
|
|
|
218
255
|
raise ValueError(
|
|
219
256
|
"No chromosomes provided in the configuration and no chromosome sizes file specified."
|
|
220
257
|
)
|
|
221
|
-
chromosomes = [
|
|
258
|
+
chromosomes = [
|
|
259
|
+
chrom.strip() for chrom in chromosomes if chrom.strip()
|
|
260
|
+
]
|
|
222
261
|
if excludeChroms:
|
|
223
262
|
chromosomes = [
|
|
224
|
-
chrom
|
|
263
|
+
chrom
|
|
264
|
+
for chrom in chromosomes
|
|
265
|
+
if chrom not in excludeChroms
|
|
225
266
|
]
|
|
226
267
|
if not chromosomes:
|
|
227
268
|
raise ValueError(
|
|
@@ -245,7 +286,9 @@ def getCountingArgs(config_path: str) -> core.countingParams:
|
|
|
245
286
|
scaleDown = config.get("countingParams.scaleDown", True)
|
|
246
287
|
scaleFactors = config.get("countingParams.scaleFactors", None)
|
|
247
288
|
numReads = config.get("countingParams.numReads", 100)
|
|
248
|
-
scaleFactorsControl = config.get(
|
|
289
|
+
scaleFactorsControl = config.get(
|
|
290
|
+
"countingParams.scaleFactorsControl", None
|
|
291
|
+
)
|
|
249
292
|
applyAsinh = config.get("countingParams.applyAsinh", False)
|
|
250
293
|
applyLog = config.get("countingParams.applyLog", False)
|
|
251
294
|
if applyAsinh and applyLog:
|
|
@@ -257,19 +300,25 @@ def getCountingArgs(config_path: str) -> core.countingParams:
|
|
|
257
300
|
rescaleToTreatmentCoverage = config.get(
|
|
258
301
|
"countingParams.rescaleToTreatmentCoverage", True
|
|
259
302
|
)
|
|
260
|
-
if scaleFactors is not None and not isinstance(
|
|
303
|
+
if scaleFactors is not None and not isinstance(
|
|
304
|
+
scaleFactors, list
|
|
305
|
+
):
|
|
261
306
|
raise ValueError("`scaleFactors` should be a list of floats.")
|
|
262
307
|
if scaleFactorsControl is not None and not isinstance(
|
|
263
308
|
scaleFactorsControl, list
|
|
264
309
|
):
|
|
265
|
-
raise ValueError(
|
|
310
|
+
raise ValueError(
|
|
311
|
+
"`scaleFactorsControl` should be a list of floats."
|
|
312
|
+
)
|
|
266
313
|
if (
|
|
267
314
|
scaleFactors is not None
|
|
268
315
|
and scaleFactorsControl is not None
|
|
269
316
|
and len(scaleFactors) != len(scaleFactorsControl)
|
|
270
317
|
):
|
|
271
318
|
if len(scaleFactorsControl) == 1:
|
|
272
|
-
scaleFactorsControl = scaleFactorsControl * len(
|
|
319
|
+
scaleFactorsControl = scaleFactorsControl * len(
|
|
320
|
+
scaleFactors
|
|
321
|
+
)
|
|
273
322
|
else:
|
|
274
323
|
raise ValueError(
|
|
275
324
|
"control and treatment scale factors: must be equal length or 1 control"
|
|
@@ -294,22 +343,27 @@ def readConfig(config_path: str) -> Dict[str, Any]:
|
|
|
294
343
|
genomeParams = getGenomeArgs(config_path)
|
|
295
344
|
countingParams = getCountingArgs(config_path)
|
|
296
345
|
minR_default = _getMinR(config, len(inputParams.bamFiles))
|
|
346
|
+
minQ_default = (
|
|
347
|
+
minR_default / (len(inputParams.bamFiles))
|
|
348
|
+
) + 0.10 # protect condition number
|
|
297
349
|
matchingExcludeRegionsBedFile_default: Optional[str] = (
|
|
298
350
|
genomeParams.blacklistFile
|
|
299
351
|
)
|
|
300
352
|
return {
|
|
301
|
-
"experimentName": config.get(
|
|
353
|
+
"experimentName": config.get(
|
|
354
|
+
"experimentName", "consenrichExperiment"
|
|
355
|
+
),
|
|
302
356
|
"genomeArgs": genomeParams,
|
|
303
357
|
"inputArgs": inputParams,
|
|
304
358
|
"countingArgs": countingParams,
|
|
305
359
|
"processArgs": core.processParams(
|
|
306
360
|
deltaF=config.get("processParams.deltaF", 0.5),
|
|
307
|
-
minQ=config.get("processParams.minQ",
|
|
361
|
+
minQ=config.get("processParams.minQ", minQ_default),
|
|
308
362
|
maxQ=config.get("processParams.maxQ", 500.0),
|
|
309
363
|
offDiagQ=config.get("processParams.offDiagQ", 0.0),
|
|
310
364
|
dStatAlpha=config.get("processParams.dStatAlpha", 3.0),
|
|
311
365
|
dStatd=config.get("processParams.dStatd", 10.0),
|
|
312
|
-
dStatPC=config.get("processParams.dStatPC",
|
|
366
|
+
dStatPC=config.get("processParams.dStatPC", 1.0),
|
|
313
367
|
scaleResidualsByP11=config.get(
|
|
314
368
|
"processParams.scaleResidualsByP11", False
|
|
315
369
|
),
|
|
@@ -323,8 +377,12 @@ def readConfig(config_path: str) -> Dict[str, Any]:
|
|
|
323
377
|
),
|
|
324
378
|
noGlobal=config.get("observationParams.noGlobal", False),
|
|
325
379
|
numNearest=config.get("observationParams.numNearest", 25),
|
|
326
|
-
localWeight=config.get(
|
|
327
|
-
|
|
380
|
+
localWeight=config.get(
|
|
381
|
+
"observationParams.localWeight", 0.333
|
|
382
|
+
),
|
|
383
|
+
globalWeight=config.get(
|
|
384
|
+
"observationParams.globalWeight", 0.667
|
|
385
|
+
),
|
|
328
386
|
approximationWindowLengthBP=config.get(
|
|
329
387
|
"observationParams.approximationWindowLengthBP", 10000
|
|
330
388
|
),
|
|
@@ -334,25 +392,50 @@ def readConfig(config_path: str) -> Dict[str, Any]:
|
|
|
334
392
|
lowPassFilterType=config.get(
|
|
335
393
|
"observationParams.lowPassFilterType", "median"
|
|
336
394
|
),
|
|
337
|
-
returnCenter=config.get(
|
|
395
|
+
returnCenter=config.get(
|
|
396
|
+
"observationParams.returnCenter", True
|
|
397
|
+
),
|
|
338
398
|
),
|
|
339
399
|
"stateArgs": core.stateParams(
|
|
340
400
|
stateInit=config.get("stateParams.stateInit", 0.0),
|
|
341
|
-
stateCovarInit=config.get(
|
|
401
|
+
stateCovarInit=config.get(
|
|
402
|
+
"stateParams.stateCovarInit", 100.0
|
|
403
|
+
),
|
|
342
404
|
boundState=config.get("stateParams.boundState", True),
|
|
343
|
-
stateLowerBound=config.get(
|
|
344
|
-
|
|
405
|
+
stateLowerBound=config.get(
|
|
406
|
+
"stateParams.stateLowerBound", 0.0
|
|
407
|
+
),
|
|
408
|
+
stateUpperBound=config.get(
|
|
409
|
+
"stateParams.stateUpperBound", 10000.0
|
|
410
|
+
),
|
|
345
411
|
),
|
|
346
412
|
"samArgs": core.samParams(
|
|
347
413
|
samThreads=config.get("samParams.samThreads", 1),
|
|
348
|
-
samFlagExclude=config.get(
|
|
414
|
+
samFlagExclude=config.get(
|
|
415
|
+
"samParams.samFlagExclude", 3844
|
|
416
|
+
),
|
|
349
417
|
oneReadPerBin=config.get("samParams.oneReadPerBin", 0),
|
|
350
418
|
chunkSize=config.get("samParams.chunkSize", 1000000),
|
|
351
419
|
offsetStr=config.get("samParams.offsetStr", "0,0"),
|
|
352
420
|
extendBP=config.get("samParams.extendBP", []),
|
|
353
421
|
maxInsertSize=config.get("samParams.maxInsertSize", 1000),
|
|
354
|
-
pairedEndMode=config.get(
|
|
355
|
-
|
|
422
|
+
pairedEndMode=config.get(
|
|
423
|
+
"samParams.pairedEndMode",
|
|
424
|
+
1
|
|
425
|
+
if inputParams.pairedEnd is not None
|
|
426
|
+
and int(inputParams.pairedEnd) > 0
|
|
427
|
+
else 0,
|
|
428
|
+
),
|
|
429
|
+
inferFragmentLength=config.get(
|
|
430
|
+
"samParams.inferFragmentLength",
|
|
431
|
+
1
|
|
432
|
+
if inputParams.pairedEnd is not None
|
|
433
|
+
and int(inputParams.pairedEnd) == 0
|
|
434
|
+
else 0,
|
|
435
|
+
),
|
|
436
|
+
countEndsOnly=config.get(
|
|
437
|
+
"samParams.countEndsOnly", False
|
|
438
|
+
),
|
|
356
439
|
),
|
|
357
440
|
"detrendArgs": core.detrendParams(
|
|
358
441
|
detrendWindowLengthBP=config.get(
|
|
@@ -361,7 +444,9 @@ def readConfig(config_path: str) -> Dict[str, Any]:
|
|
|
361
444
|
detrendTrackPercentile=config.get(
|
|
362
445
|
"detrendParams.detrendTrackPercentile", 75.0
|
|
363
446
|
),
|
|
364
|
-
usePolyFilter=config.get(
|
|
447
|
+
usePolyFilter=config.get(
|
|
448
|
+
"detrendParams.usePolyFilter", False
|
|
449
|
+
),
|
|
365
450
|
detrendSavitzkyGolayDegree=config.get(
|
|
366
451
|
"detrendParams.detrendSavitzkyGolayDegree", 2
|
|
367
452
|
),
|
|
@@ -370,17 +455,25 @@ def readConfig(config_path: str) -> Dict[str, Any]:
|
|
|
370
455
|
),
|
|
371
456
|
),
|
|
372
457
|
"matchingArgs": core.matchingParams(
|
|
373
|
-
templateNames=config.get(
|
|
374
|
-
|
|
458
|
+
templateNames=config.get(
|
|
459
|
+
"matchingParams.templateNames", []
|
|
460
|
+
),
|
|
461
|
+
cascadeLevels=config.get(
|
|
462
|
+
"matchingParams.cascadeLevels", [2]
|
|
463
|
+
),
|
|
375
464
|
iters=config.get("matchingParams.iters", 25_000),
|
|
376
465
|
alpha=config.get("matchingParams.alpha", 0.05),
|
|
377
|
-
minMatchLengthBP=config.get(
|
|
378
|
-
|
|
466
|
+
minMatchLengthBP=config.get(
|
|
467
|
+
"matchingParams.minMatchLengthBP", 250
|
|
468
|
+
),
|
|
469
|
+
maxNumMatches=config.get(
|
|
470
|
+
"matchingParams.maxNumMatches", 100_000
|
|
471
|
+
),
|
|
379
472
|
minSignalAtMaxima=config.get(
|
|
380
473
|
"matchingParams.minSignalAtMaxima", "q:0.75"
|
|
381
474
|
),
|
|
382
475
|
merge=config.get("matchingParams.merge", True),
|
|
383
|
-
mergeGapBP=config.get("matchingParams.mergeGapBP",
|
|
476
|
+
mergeGapBP=config.get("matchingParams.mergeGapBP", None),
|
|
384
477
|
useScalingFunction=config.get(
|
|
385
478
|
"matchingParams.useScalingFunction", True
|
|
386
479
|
),
|
|
@@ -402,7 +495,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
|
|
|
402
495
|
"OR install via conda (conda install -c bioconda ucsc-bedgraphtobigwig)."
|
|
403
496
|
)
|
|
404
497
|
|
|
405
|
-
logger.info(
|
|
498
|
+
logger.info(
|
|
499
|
+
"Attempting to generate bigWig files from bedGraph format..."
|
|
500
|
+
)
|
|
406
501
|
try:
|
|
407
502
|
path_ = shutil.which("bedGraphToBigWig")
|
|
408
503
|
except Exception as e:
|
|
@@ -413,7 +508,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
|
|
|
413
508
|
return
|
|
414
509
|
logger.info(f"Using bedGraphToBigWig from {path_}")
|
|
415
510
|
for suffix in suffixes:
|
|
416
|
-
bedgraph =
|
|
511
|
+
bedgraph = (
|
|
512
|
+
f"consenrichOutput_{experimentName}_{suffix}.bedGraph"
|
|
513
|
+
)
|
|
417
514
|
if not os.path.exists(bedgraph):
|
|
418
515
|
logger.warning(
|
|
419
516
|
f"bedGraph file {bedgraph} does not exist. Skipping bigWig conversion."
|
|
@@ -436,7 +533,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
|
|
|
436
533
|
)
|
|
437
534
|
continue
|
|
438
535
|
if os.path.exists(bigwig) and os.path.getsize(bigwig) > 100:
|
|
439
|
-
logger.info(
|
|
536
|
+
logger.info(
|
|
537
|
+
f"Finished: converted {bedgraph} to {bigwig}."
|
|
538
|
+
)
|
|
440
539
|
|
|
441
540
|
|
|
442
541
|
def main():
|
|
@@ -460,10 +559,16 @@ def main():
|
|
|
460
559
|
"--match-template",
|
|
461
560
|
type=str,
|
|
462
561
|
default="haar",
|
|
463
|
-
choices=[
|
|
562
|
+
choices=[
|
|
563
|
+
x
|
|
564
|
+
for x in pywt.wavelist(kind="discrete")
|
|
565
|
+
if "bio" not in x
|
|
566
|
+
],
|
|
464
567
|
dest="matchTemplate",
|
|
465
568
|
)
|
|
466
|
-
parser.add_argument(
|
|
569
|
+
parser.add_argument(
|
|
570
|
+
"--match-level", type=int, default=2, dest="matchLevel"
|
|
571
|
+
)
|
|
467
572
|
parser.add_argument(
|
|
468
573
|
"--match-alpha", type=float, default=0.05, dest="matchAlpha"
|
|
469
574
|
)
|
|
@@ -492,16 +597,24 @@ def main():
|
|
|
492
597
|
"--match-no-merge", action="store_true", dest="matchNoMerge"
|
|
493
598
|
)
|
|
494
599
|
parser.add_argument(
|
|
495
|
-
"--match-merge-gap",
|
|
600
|
+
"--match-merge-gap",
|
|
601
|
+
type=int,
|
|
602
|
+
default=None,
|
|
603
|
+
dest="matchMergeGapBP",
|
|
496
604
|
)
|
|
497
605
|
parser.add_argument(
|
|
498
|
-
"--match-use-wavelet",
|
|
606
|
+
"--match-use-wavelet",
|
|
607
|
+
action="store_true",
|
|
608
|
+
dest="matchUseWavelet",
|
|
499
609
|
)
|
|
500
610
|
parser.add_argument(
|
|
501
611
|
"--match-seed", type=int, default=42, dest="matchRandSeed"
|
|
502
612
|
)
|
|
503
613
|
parser.add_argument(
|
|
504
|
-
"--match-exclude-bed",
|
|
614
|
+
"--match-exclude-bed",
|
|
615
|
+
type=str,
|
|
616
|
+
default=None,
|
|
617
|
+
dest="matchExcludeBed",
|
|
505
618
|
)
|
|
506
619
|
parser.add_argument(
|
|
507
620
|
"--verbose", action="store_true", help="If set, logs config"
|
|
@@ -585,7 +698,8 @@ def main():
|
|
|
585
698
|
config_truncated = {
|
|
586
699
|
k: v
|
|
587
700
|
for k, v in config.items()
|
|
588
|
-
if k
|
|
701
|
+
if k
|
|
702
|
+
not in ["inputArgs", "genomeArgs", "countingArgs"]
|
|
589
703
|
}
|
|
590
704
|
config_truncated["experimentName"] = experimentName
|
|
591
705
|
config_truncated["inputArgs"] = inputArgs
|
|
@@ -603,7 +717,9 @@ def main():
|
|
|
603
717
|
controlsPresent = checkControlsPresent(inputArgs)
|
|
604
718
|
if args.verbose:
|
|
605
719
|
logger.info(f"controlsPresent: {controlsPresent}")
|
|
606
|
-
readLengthsBamFiles = getReadLengths(
|
|
720
|
+
readLengthsBamFiles = getReadLengths(
|
|
721
|
+
inputArgs, countingArgs, samArgs
|
|
722
|
+
)
|
|
607
723
|
effectiveGenomeSizes = getEffectiveGenomeSizes(
|
|
608
724
|
genomeArgs, readLengthsBamFiles
|
|
609
725
|
)
|
|
@@ -625,11 +741,16 @@ def main():
|
|
|
625
741
|
for bamFile in bamFilesControl
|
|
626
742
|
]
|
|
627
743
|
effectiveGenomeSizesControl = [
|
|
628
|
-
constants.getEffectiveGenomeSize(
|
|
744
|
+
constants.getEffectiveGenomeSize(
|
|
745
|
+
genomeArgs.genomeName, readLength
|
|
746
|
+
)
|
|
629
747
|
for readLength in readLengthsControlBamFiles
|
|
630
748
|
]
|
|
631
749
|
|
|
632
|
-
if
|
|
750
|
+
if (
|
|
751
|
+
scaleFactors is not None
|
|
752
|
+
and scaleFactorsControl is not None
|
|
753
|
+
):
|
|
633
754
|
treatScaleFactors = scaleFactors
|
|
634
755
|
controlScaleFactors = scaleFactorsControl
|
|
635
756
|
# still make sure this is accessible
|
|
@@ -646,7 +767,9 @@ def main():
|
|
|
646
767
|
samArgs.samThreads,
|
|
647
768
|
)
|
|
648
769
|
for bamFile, effectiveGenomeSize, readLength in zip(
|
|
649
|
-
bamFiles,
|
|
770
|
+
bamFiles,
|
|
771
|
+
effectiveGenomeSizes,
|
|
772
|
+
readLengthsBamFiles,
|
|
650
773
|
)
|
|
651
774
|
]
|
|
652
775
|
except Exception:
|
|
@@ -700,7 +823,8 @@ def main():
|
|
|
700
823
|
)
|
|
701
824
|
]
|
|
702
825
|
chromSizesDict = misc_util.getChromSizesDict(
|
|
703
|
-
genomeArgs.chromSizesFile,
|
|
826
|
+
genomeArgs.chromSizesFile,
|
|
827
|
+
excludeChroms=genomeArgs.excludeChroms,
|
|
704
828
|
)
|
|
705
829
|
chromosomes = genomeArgs.chromosomes
|
|
706
830
|
|
|
@@ -715,11 +839,15 @@ def main():
|
|
|
715
839
|
chromosomeStart = max(
|
|
716
840
|
0, (chromosomeStart - (chromosomeStart % stepSize))
|
|
717
841
|
)
|
|
718
|
-
chromosomeEnd = max(
|
|
842
|
+
chromosomeEnd = max(
|
|
843
|
+
0, (chromosomeEnd - (chromosomeEnd % stepSize))
|
|
844
|
+
)
|
|
719
845
|
numIntervals = (
|
|
720
846
|
((chromosomeEnd - chromosomeStart) + stepSize) - 1
|
|
721
847
|
) // stepSize
|
|
722
|
-
intervals = np.arange(
|
|
848
|
+
intervals = np.arange(
|
|
849
|
+
chromosomeStart, chromosomeEnd, stepSize
|
|
850
|
+
)
|
|
723
851
|
chromMat: np.ndarray = np.empty(
|
|
724
852
|
(numSamples, numIntervals), dtype=np.float32
|
|
725
853
|
)
|
|
@@ -736,7 +864,10 @@ def main():
|
|
|
736
864
|
chromosomeStart,
|
|
737
865
|
chromosomeEnd,
|
|
738
866
|
stepSize,
|
|
739
|
-
[
|
|
867
|
+
[
|
|
868
|
+
readLengthsBamFiles[j_],
|
|
869
|
+
readLengthsControlBamFiles[j_],
|
|
870
|
+
],
|
|
740
871
|
[treatScaleFactors[j_], controlScaleFactors[j_]],
|
|
741
872
|
samArgs.oneReadPerBin,
|
|
742
873
|
samArgs.samThreads,
|
|
@@ -748,9 +879,12 @@ def main():
|
|
|
748
879
|
inferFragmentLength=samArgs.inferFragmentLength,
|
|
749
880
|
applyAsinh=countingArgs.applyAsinh,
|
|
750
881
|
applyLog=countingArgs.applyLog,
|
|
882
|
+
countEndsOnly=samArgs.countEndsOnly,
|
|
751
883
|
)
|
|
752
884
|
if countingArgs.rescaleToTreatmentCoverage:
|
|
753
|
-
finalSF = max(
|
|
885
|
+
finalSF = max(
|
|
886
|
+
1.0, initialTreatmentScaleFactors[j_]
|
|
887
|
+
)
|
|
754
888
|
chromMat[j_, :] = finalSF * (
|
|
755
889
|
pairMatrix[0, :] - pairMatrix[1, :]
|
|
756
890
|
)
|
|
@@ -768,23 +902,31 @@ def main():
|
|
|
768
902
|
samArgs.samThreads,
|
|
769
903
|
samArgs.samFlagExclude,
|
|
770
904
|
offsetStr=samArgs.offsetStr,
|
|
771
|
-
extendBP=
|
|
905
|
+
extendBP=extendBP_,
|
|
772
906
|
maxInsertSize=samArgs.maxInsertSize,
|
|
773
907
|
pairedEndMode=samArgs.pairedEndMode,
|
|
774
908
|
inferFragmentLength=samArgs.inferFragmentLength,
|
|
775
909
|
applyAsinh=countingArgs.applyAsinh,
|
|
776
910
|
applyLog=countingArgs.applyLog,
|
|
911
|
+
countEndsOnly=samArgs.countEndsOnly,
|
|
777
912
|
)
|
|
778
913
|
sparseMap = None
|
|
779
914
|
if genomeArgs.sparseBedFile and not observationArgs.useALV:
|
|
780
|
-
logger.info(
|
|
915
|
+
logger.info(
|
|
916
|
+
f"Building sparse mapping for {chromosome}..."
|
|
917
|
+
)
|
|
781
918
|
sparseMap = core.getSparseMap(
|
|
782
|
-
chromosome,
|
|
919
|
+
chromosome,
|
|
920
|
+
intervals,
|
|
921
|
+
numNearest,
|
|
922
|
+
genomeArgs.sparseBedFile,
|
|
783
923
|
)
|
|
784
924
|
|
|
785
925
|
muncMat = np.empty_like(chromMat, dtype=np.float32)
|
|
786
926
|
for j in range(numSamples):
|
|
787
|
-
logger.info(
|
|
927
|
+
logger.info(
|
|
928
|
+
f"Muncing {j + 1}/{numSamples} for {chromosome}..."
|
|
929
|
+
)
|
|
788
930
|
muncMat[j, :] = core.getMuncTrack(
|
|
789
931
|
chromosome,
|
|
790
932
|
intervals,
|
|
@@ -855,8 +997,11 @@ def main():
|
|
|
855
997
|
)
|
|
856
998
|
if c_ == 0 and len(chromosomes) > 1:
|
|
857
999
|
for file_ in os.listdir("."):
|
|
858
|
-
if file_.startswith(
|
|
859
|
-
|
|
1000
|
+
if file_.startswith(
|
|
1001
|
+
f"consenrichOutput_{experimentName}"
|
|
1002
|
+
) and (
|
|
1003
|
+
file_.endswith(".bedGraph")
|
|
1004
|
+
or file_.endswith(".narrowPeak")
|
|
860
1005
|
):
|
|
861
1006
|
logger.warning(f"Overwriting: {file_}")
|
|
862
1007
|
os.remove(file_)
|
|
@@ -909,13 +1054,22 @@ def main():
|
|
|
909
1054
|
convertBedGraphToBigWig(experimentName, genomeArgs.chromSizesFile)
|
|
910
1055
|
if matchingEnabled and matchingArgs.merge:
|
|
911
1056
|
try:
|
|
1057
|
+
mergeGapBP_ = matchingArgs.mergeGapBP
|
|
1058
|
+
if mergeGapBP_ is None:
|
|
1059
|
+
mergeGapBP_ = (
|
|
1060
|
+
int(matchingArgs.minMatchLengthBP/2) + 1
|
|
1061
|
+
if matchingArgs.minMatchLengthBP is not None
|
|
1062
|
+
else 75
|
|
1063
|
+
)
|
|
912
1064
|
matching.mergeMatches(
|
|
913
1065
|
f"consenrichOutput_{experimentName}_matches.narrowPeak",
|
|
914
|
-
mergeGapBP=
|
|
1066
|
+
mergeGapBP=mergeGapBP_,
|
|
915
1067
|
)
|
|
916
1068
|
|
|
917
1069
|
except Exception as e:
|
|
918
|
-
logger.warning(
|
|
1070
|
+
logger.warning(
|
|
1071
|
+
f"Failed to merge matches...SKIPPING:\n{e}\n\n"
|
|
1072
|
+
)
|
|
919
1073
|
logger.info("Done.")
|
|
920
1074
|
|
|
921
1075
|
|