consenrich 0.7.11b2__cp314-cp314-macosx_15_0_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of consenrich might be problematic. Click here for more details.
- consenrich/.dylibs/libomp.dylib +0 -0
- consenrich/__init__.py +11 -0
- consenrich/cconsenrich.c +50610 -0
- consenrich/cconsenrich.cpython-314-darwin.so +0 -0
- consenrich/cconsenrich.pyx +1065 -0
- consenrich/consenrich.py +1802 -0
- consenrich/constants.py +172 -0
- consenrich/core.py +2068 -0
- consenrich/data/ce10.sizes +6 -0
- consenrich/data/ce10_blacklist.bed +100 -0
- consenrich/data/ce10_sparse.bed +11828 -0
- consenrich/data/ce11.sizes +6 -0
- consenrich/data/ce11_blacklist.bed +97 -0
- consenrich/data/ce11_sparse.bed +11828 -0
- consenrich/data/dm6.sizes +7 -0
- consenrich/data/dm6_blacklist.bed +182 -0
- consenrich/data/dm6_sparse.bed +20000 -0
- consenrich/data/hg19.sizes +24 -0
- consenrich/data/hg19_blacklist.bed +834 -0
- consenrich/data/hg19_sparse.bed +288358 -0
- consenrich/data/hg38.sizes +24 -0
- consenrich/data/hg38_blacklist.bed +636 -0
- consenrich/data/hg38_sparse.bed +288699 -0
- consenrich/data/mm10.sizes +21 -0
- consenrich/data/mm10_blacklist.bed +3435 -0
- consenrich/data/mm10_sparse.bed +100400 -0
- consenrich/data/mm39.sizes +21 -0
- consenrich/data/mm39_blacklist.bed +3360 -0
- consenrich/data/mm39_sparse.bed +100381 -0
- consenrich/detrorm.py +297 -0
- consenrich/matching.py +929 -0
- consenrich/misc_util.py +122 -0
- consenrich-0.7.11b2.dist-info/METADATA +66 -0
- consenrich-0.7.11b2.dist-info/RECORD +38 -0
- consenrich-0.7.11b2.dist-info/WHEEL +6 -0
- consenrich-0.7.11b2.dist-info/entry_points.txt +2 -0
- consenrich-0.7.11b2.dist-info/licenses/LICENSE +21 -0
- consenrich-0.7.11b2.dist-info/top_level.txt +1 -0
consenrich/consenrich.py
ADDED
|
@@ -0,0 +1,1802 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import glob
|
|
6
|
+
import logging
|
|
7
|
+
import pprint
|
|
8
|
+
import os
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from collections.abc import Mapping
|
|
11
|
+
from typing import List, Optional, Tuple, Dict, Any, Union, Sequence
|
|
12
|
+
import shutil
|
|
13
|
+
import subprocess
|
|
14
|
+
import sys
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
import pysam
|
|
18
|
+
import pywt
|
|
19
|
+
import yaml
|
|
20
|
+
|
|
21
|
+
import consenrich.core as core
|
|
22
|
+
import consenrich.misc_util as misc_util
|
|
23
|
+
import consenrich.constants as constants
|
|
24
|
+
import consenrich.detrorm as detrorm
|
|
25
|
+
import consenrich.matching as matching
|
|
26
|
+
import consenrich.cconsenrich as cconsenrich
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
logging.basicConfig(
|
|
30
|
+
level=logging.INFO,
|
|
31
|
+
format="%(asctime)s - %(module)s.%(funcName)s - %(levelname)s - %(message)s",
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _resolveFragmentLengthPairs(
|
|
38
|
+
treatmentFragmentLengths: Optional[Sequence[Union[int, float]]],
|
|
39
|
+
controlFragmentLengths: Optional[Sequence[Union[int, float]]],
|
|
40
|
+
) -> Tuple[List[int], List[int]]:
|
|
41
|
+
r"""Assign consistent fragment length estimates to treatment and control BAM files.
|
|
42
|
+
|
|
43
|
+
For single-end data, cross-correlation-based fragment estimates for control inputs
|
|
44
|
+
can be much smaller than for treatment samples due to lack of structure. This creates
|
|
45
|
+
artifacts during signal quantification and normalization steps, and it's common to use
|
|
46
|
+
the treatment fragment length for both treatment and control samples. So we offer that here.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
if not treatmentFragmentLengths:
|
|
50
|
+
logger.warning(
|
|
51
|
+
"No treatment fragment lengths provided...returning [],[]"
|
|
52
|
+
)
|
|
53
|
+
return [], []
|
|
54
|
+
|
|
55
|
+
n_treat = len(treatmentFragmentLengths)
|
|
56
|
+
|
|
57
|
+
if controlFragmentLengths:
|
|
58
|
+
if len(controlFragmentLengths) == 1 and n_treat > 1:
|
|
59
|
+
controlFragmentLengths = (
|
|
60
|
+
list(controlFragmentLengths) * n_treat
|
|
61
|
+
)
|
|
62
|
+
logger.info(
|
|
63
|
+
"Only one control fragment length provided: broadcasting this value for all control BAM files."
|
|
64
|
+
)
|
|
65
|
+
elif len(controlFragmentLengths) != n_treat:
|
|
66
|
+
logger.warning(
|
|
67
|
+
"Sizes of treatment and control fragment length lists are incompatible...returning [],[]"
|
|
68
|
+
)
|
|
69
|
+
return [], []
|
|
70
|
+
else:
|
|
71
|
+
controlFragmentLengths = list(controlFragmentLengths)
|
|
72
|
+
else:
|
|
73
|
+
controlFragmentLengths = list(treatmentFragmentLengths)
|
|
74
|
+
|
|
75
|
+
finalTreatment = [int(x) for x in treatmentFragmentLengths]
|
|
76
|
+
finalControl = [int(x) for x in treatmentFragmentLengths]
|
|
77
|
+
|
|
78
|
+
return finalTreatment, finalControl
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def loadConfig(
|
|
82
|
+
configSource: Union[str, Path, Mapping[str, Any]],
|
|
83
|
+
) -> Dict[str, Any]:
|
|
84
|
+
r"""Load a YAML config from a path or accept an already-parsed mapping.
|
|
85
|
+
|
|
86
|
+
If given a dict-like object, just return it. If given a path, try to load as YAML --> dict
|
|
87
|
+
If given a path, try to load as YAML --> dict
|
|
88
|
+
|
|
89
|
+
"""
|
|
90
|
+
if isinstance(configSource, Mapping):
|
|
91
|
+
configData = configSource
|
|
92
|
+
elif isinstance(configSource, (str, Path)):
|
|
93
|
+
with open(configSource, "r") as fileHandle:
|
|
94
|
+
configData = yaml.safe_load(fileHandle) or {}
|
|
95
|
+
else:
|
|
96
|
+
raise TypeError("`config` must be a path or a mapping/dict.")
|
|
97
|
+
|
|
98
|
+
if not isinstance(configData, Mapping):
|
|
99
|
+
raise TypeError("Top-level YAML must be a mapping/object.")
|
|
100
|
+
return configData
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _cfgGet(
|
|
104
|
+
configMap: Mapping[str, Any],
|
|
105
|
+
dottedKey: str,
|
|
106
|
+
defaultVal: Any = None,
|
|
107
|
+
) -> Any:
|
|
108
|
+
r"""Support both dotted keys and yaml/dict-style nested access for configs."""
|
|
109
|
+
|
|
110
|
+
# e.g., inputParams.bamFiles
|
|
111
|
+
if dottedKey in configMap:
|
|
112
|
+
return configMap[dottedKey]
|
|
113
|
+
|
|
114
|
+
# e.g.,
|
|
115
|
+
# inputParams:
|
|
116
|
+
# bamFiles: [...]
|
|
117
|
+
currentVal: Any = configMap
|
|
118
|
+
for keyPart in dottedKey.split("."):
|
|
119
|
+
if isinstance(currentVal, Mapping) and keyPart in currentVal:
|
|
120
|
+
currentVal = currentVal[keyPart]
|
|
121
|
+
else:
|
|
122
|
+
return defaultVal
|
|
123
|
+
return currentVal
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _listOrEmpty(list_):
|
|
127
|
+
if list_ is None:
|
|
128
|
+
return []
|
|
129
|
+
return list_
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def checkControlsPresent(inputArgs: core.inputParams) -> bool:
|
|
133
|
+
"""Check if control BAM files are present in the input arguments.
|
|
134
|
+
|
|
135
|
+
:param inputArgs: core.inputParams object
|
|
136
|
+
:return: True if control BAM files are present, False otherwise.
|
|
137
|
+
"""
|
|
138
|
+
return (
|
|
139
|
+
bool(inputArgs.bamFilesControl)
|
|
140
|
+
and isinstance(inputArgs.bamFilesControl, list)
|
|
141
|
+
and len(inputArgs.bamFilesControl) > 0
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def getReadLengths(
|
|
146
|
+
inputArgs: core.inputParams,
|
|
147
|
+
countingArgs: core.countingParams,
|
|
148
|
+
samArgs: core.samParams,
|
|
149
|
+
) -> List[int]:
|
|
150
|
+
r"""Get read lengths for each BAM file in the input arguments.
|
|
151
|
+
|
|
152
|
+
:param inputArgs: core.inputParams object containing BAM file paths.
|
|
153
|
+
:param countingArgs: core.countingParams object containing number of reads.
|
|
154
|
+
:param samArgs: core.samParams object containing SAM thread and flag exclude parameters.
|
|
155
|
+
:return: List of read lengths for each BAM file.
|
|
156
|
+
"""
|
|
157
|
+
if not inputArgs.bamFiles:
|
|
158
|
+
raise ValueError(
|
|
159
|
+
"No BAM files provided in the input arguments."
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
if (
|
|
163
|
+
not isinstance(inputArgs.bamFiles, list)
|
|
164
|
+
or len(inputArgs.bamFiles) == 0
|
|
165
|
+
):
|
|
166
|
+
raise ValueError("bam files list is empty")
|
|
167
|
+
|
|
168
|
+
return [
|
|
169
|
+
core.getReadLength(
|
|
170
|
+
bamFile,
|
|
171
|
+
countingArgs.numReads,
|
|
172
|
+
1000,
|
|
173
|
+
samArgs.samThreads,
|
|
174
|
+
samArgs.samFlagExclude,
|
|
175
|
+
)
|
|
176
|
+
for bamFile in inputArgs.bamFiles
|
|
177
|
+
]
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def checkMatchingEnabled(matchingArgs: core.matchingParams) -> bool:
|
|
181
|
+
matchingEnabled = (
|
|
182
|
+
(matchingArgs.templateNames is not None)
|
|
183
|
+
and isinstance(matchingArgs.templateNames, list)
|
|
184
|
+
and len(matchingArgs.templateNames) > 0
|
|
185
|
+
)
|
|
186
|
+
matchingEnabled = (
|
|
187
|
+
matchingEnabled
|
|
188
|
+
and (matchingArgs.cascadeLevels is not None)
|
|
189
|
+
and isinstance(matchingArgs.cascadeLevels, list)
|
|
190
|
+
and len(matchingArgs.cascadeLevels) > 0
|
|
191
|
+
)
|
|
192
|
+
return matchingEnabled
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def getEffectiveGenomeSizes(
|
|
196
|
+
genomeArgs: core.genomeParams, readLengths: List[int]
|
|
197
|
+
) -> List[int]:
|
|
198
|
+
r"""Get effective genome sizes for the given genome name and read lengths.
|
|
199
|
+
:param genomeArgs: core.genomeParams object
|
|
200
|
+
:param readLengths: List of read lengths for which to get effective genome sizes.
|
|
201
|
+
:return: List of effective genome sizes corresponding to the read lengths.
|
|
202
|
+
"""
|
|
203
|
+
genomeName = genomeArgs.genomeName
|
|
204
|
+
if not genomeName or not isinstance(genomeName, str):
|
|
205
|
+
raise ValueError("Genome name must be a non-empty string.")
|
|
206
|
+
|
|
207
|
+
if not isinstance(readLengths, list) or len(readLengths) == 0:
|
|
208
|
+
raise ValueError(
|
|
209
|
+
"Read lengths must be a non-empty list. Try calling `getReadLengths` first."
|
|
210
|
+
)
|
|
211
|
+
return [
|
|
212
|
+
constants.getEffectiveGenomeSize(genomeName, readLength)
|
|
213
|
+
for readLength in readLengths
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def getInputArgs(config_path: str) -> core.inputParams:
|
|
218
|
+
configData = loadConfig(config_path)
|
|
219
|
+
|
|
220
|
+
def expandWildCards(bamList: List[str]) -> List[str]:
|
|
221
|
+
expandedList: List[str] = []
|
|
222
|
+
for bamEntry in bamList:
|
|
223
|
+
if "*" in bamEntry or "?" in bamEntry or "[" in bamEntry:
|
|
224
|
+
matchedList = glob.glob(bamEntry)
|
|
225
|
+
else:
|
|
226
|
+
expandedList.append(bamEntry)
|
|
227
|
+
return expandedList
|
|
228
|
+
|
|
229
|
+
bamFilesRaw = (
|
|
230
|
+
_cfgGet(configData, "inputParams.bamFiles", []) or []
|
|
231
|
+
)
|
|
232
|
+
bamFilesControlRaw = (
|
|
233
|
+
_cfgGet(configData, "inputParams.bamFilesControl", []) or []
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
bamFiles = expandWildCards(bamFilesRaw)
|
|
237
|
+
bamFilesControl = expandWildCards(bamFilesControlRaw)
|
|
238
|
+
|
|
239
|
+
if len(bamFiles) == 0:
|
|
240
|
+
raise ValueError(
|
|
241
|
+
"No BAM files provided in the configuration."
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
if (
|
|
245
|
+
len(bamFilesControl) > 0
|
|
246
|
+
and len(bamFilesControl) != len(bamFiles)
|
|
247
|
+
and len(bamFilesControl) != 1
|
|
248
|
+
):
|
|
249
|
+
raise ValueError(
|
|
250
|
+
"Number of control BAM files must be 0, 1, or the same as number of treatment files"
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
if len(bamFilesControl) == 1:
|
|
254
|
+
logger.info(
|
|
255
|
+
f"Only one control given: Using {bamFilesControl[0]} for all treatment files."
|
|
256
|
+
)
|
|
257
|
+
bamFilesControl = bamFilesControl * len(bamFiles)
|
|
258
|
+
|
|
259
|
+
if not bamFiles or not isinstance(bamFiles, list):
|
|
260
|
+
raise ValueError("No BAM files found")
|
|
261
|
+
|
|
262
|
+
for bamFile in bamFiles:
|
|
263
|
+
misc_util.checkBamFile(bamFile)
|
|
264
|
+
|
|
265
|
+
if bamFilesControl:
|
|
266
|
+
for bamFile in bamFilesControl:
|
|
267
|
+
misc_util.checkBamFile(bamFile)
|
|
268
|
+
|
|
269
|
+
pairedEndList = misc_util.bamsArePairedEnd(bamFiles)
|
|
270
|
+
pairedEndConfig: Optional[bool] = _cfgGet(
|
|
271
|
+
configData, "inputParams.pairedEnd", None
|
|
272
|
+
)
|
|
273
|
+
if pairedEndConfig is None:
|
|
274
|
+
pairedEndConfig = all(pairedEndList)
|
|
275
|
+
if pairedEndConfig:
|
|
276
|
+
logger.info("Paired-end BAM files detected")
|
|
277
|
+
else:
|
|
278
|
+
logger.info("One or more single-end BAM files detected")
|
|
279
|
+
|
|
280
|
+
return core.inputParams(
|
|
281
|
+
bamFiles=bamFiles,
|
|
282
|
+
bamFilesControl=bamFilesControl,
|
|
283
|
+
pairedEnd=pairedEndConfig,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def getOutputArgs(config_path: str) -> core.outputParams:
|
|
288
|
+
configData = loadConfig(config_path)
|
|
289
|
+
|
|
290
|
+
convertToBigWig_ = _cfgGet(
|
|
291
|
+
configData,
|
|
292
|
+
"outputParams.convertToBigWig",
|
|
293
|
+
True if shutil.which("bedGraphToBigWig") else False,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
roundDigits_ = _cfgGet(configData, "outputParams.roundDigits", 3)
|
|
297
|
+
|
|
298
|
+
writeResiduals_ = _cfgGet(
|
|
299
|
+
configData,
|
|
300
|
+
"outputParams.writeResiduals",
|
|
301
|
+
True,
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
writeMuncTrace: bool = _cfgGet(
|
|
305
|
+
configData, "outputParams.writeMuncTrace", False
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
writeStateStd: bool = _cfgGet(
|
|
309
|
+
configData,
|
|
310
|
+
"outputParams.writeStateStd",
|
|
311
|
+
True,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
return core.outputParams(
|
|
315
|
+
convertToBigWig=convertToBigWig_,
|
|
316
|
+
roundDigits=roundDigits_,
|
|
317
|
+
writeResiduals=writeResiduals_,
|
|
318
|
+
writeMuncTrace=writeMuncTrace,
|
|
319
|
+
writeStateStd=writeStateStd,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def getGenomeArgs(config_path: str) -> core.genomeParams:
|
|
324
|
+
configData = loadConfig(config_path)
|
|
325
|
+
|
|
326
|
+
genomeName = _cfgGet(configData, "genomeParams.name", None)
|
|
327
|
+
genomeLabel = constants.resolveGenomeName(genomeName)
|
|
328
|
+
|
|
329
|
+
chromSizesFile: Optional[str] = None
|
|
330
|
+
blacklistFile: Optional[str] = None
|
|
331
|
+
sparseBedFile: Optional[str] = None
|
|
332
|
+
chromosomesList: Optional[List[str]] = None
|
|
333
|
+
|
|
334
|
+
excludeChromsList: List[str] = (
|
|
335
|
+
_cfgGet(configData, "genomeParams.excludeChroms", []) or []
|
|
336
|
+
)
|
|
337
|
+
excludeForNormList: List[str] = (
|
|
338
|
+
_cfgGet(configData, "genomeParams.excludeForNorm", []) or []
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
if genomeLabel:
|
|
342
|
+
chromSizesFile = constants.getGenomeResourceFile(
|
|
343
|
+
genomeLabel, "sizes"
|
|
344
|
+
)
|
|
345
|
+
blacklistFile = constants.getGenomeResourceFile(
|
|
346
|
+
genomeLabel, "blacklist"
|
|
347
|
+
)
|
|
348
|
+
sparseBedFile = constants.getGenomeResourceFile(
|
|
349
|
+
genomeLabel, "sparse"
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
chromSizesOverride = _cfgGet(
|
|
353
|
+
configData, "genomeParams.chromSizesFile", None
|
|
354
|
+
)
|
|
355
|
+
if chromSizesOverride:
|
|
356
|
+
chromSizesFile = chromSizesOverride
|
|
357
|
+
|
|
358
|
+
blacklistOverride = _cfgGet(
|
|
359
|
+
configData, "genomeParams.blacklistFile", None
|
|
360
|
+
)
|
|
361
|
+
if blacklistOverride:
|
|
362
|
+
blacklistFile = blacklistOverride
|
|
363
|
+
|
|
364
|
+
sparseOverride = _cfgGet(
|
|
365
|
+
configData, "genomeParams.sparseBedFile", None
|
|
366
|
+
)
|
|
367
|
+
if sparseOverride:
|
|
368
|
+
sparseBedFile = sparseOverride
|
|
369
|
+
|
|
370
|
+
if not chromSizesFile or not os.path.exists(chromSizesFile):
|
|
371
|
+
raise FileNotFoundError(
|
|
372
|
+
f"Chromosome sizes file {chromSizesFile} does not exist."
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
chromosomesConfig = _cfgGet(
|
|
376
|
+
configData, "genomeParams.chromosomes", None
|
|
377
|
+
)
|
|
378
|
+
if chromosomesConfig is not None:
|
|
379
|
+
chromosomesList = chromosomesConfig
|
|
380
|
+
else:
|
|
381
|
+
if chromSizesFile:
|
|
382
|
+
chromosomesFrame = pd.read_csv(
|
|
383
|
+
chromSizesFile,
|
|
384
|
+
sep="\t",
|
|
385
|
+
header=None,
|
|
386
|
+
names=["chrom", "size"],
|
|
387
|
+
)
|
|
388
|
+
chromosomesList = list(chromosomesFrame["chrom"])
|
|
389
|
+
else:
|
|
390
|
+
raise ValueError(
|
|
391
|
+
"No chromosomes provided in the configuration and no chromosome sizes file specified."
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
chromosomesList = [
|
|
395
|
+
chromName.strip()
|
|
396
|
+
for chromName in chromosomesList
|
|
397
|
+
if chromName and chromName.strip()
|
|
398
|
+
]
|
|
399
|
+
if excludeChromsList:
|
|
400
|
+
chromosomesList = [
|
|
401
|
+
chromName
|
|
402
|
+
for chromName in chromosomesList
|
|
403
|
+
if chromName not in excludeChromsList
|
|
404
|
+
]
|
|
405
|
+
if not chromosomesList:
|
|
406
|
+
raise ValueError(
|
|
407
|
+
"No valid chromosomes found after excluding specified chromosomes."
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
return core.genomeParams(
|
|
411
|
+
genomeName=genomeLabel,
|
|
412
|
+
chromSizesFile=chromSizesFile,
|
|
413
|
+
blacklistFile=blacklistFile,
|
|
414
|
+
sparseBedFile=sparseBedFile,
|
|
415
|
+
chromosomes=chromosomesList,
|
|
416
|
+
excludeChroms=excludeChromsList,
|
|
417
|
+
excludeForNorm=excludeForNormList,
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def getCountingArgs(config_path: str) -> core.countingParams:
|
|
422
|
+
configData = loadConfig(config_path)
|
|
423
|
+
|
|
424
|
+
stepSize = _cfgGet(configData, "countingParams.stepSize", 25)
|
|
425
|
+
scaleDownFlag = _cfgGet(
|
|
426
|
+
configData,
|
|
427
|
+
"countingParams.scaleDown",
|
|
428
|
+
False,
|
|
429
|
+
)
|
|
430
|
+
scaleFactorList = _cfgGet(
|
|
431
|
+
configData, "countingParams.scaleFactors", None
|
|
432
|
+
)
|
|
433
|
+
numReads = _cfgGet(configData, "countingParams.numReads", 100)
|
|
434
|
+
scaleFactorsControlList = _cfgGet(
|
|
435
|
+
configData, "countingParams.scaleFactorsControl", None
|
|
436
|
+
)
|
|
437
|
+
applyAsinhFlag = _cfgGet(
|
|
438
|
+
configData,
|
|
439
|
+
"countingParams.applyAsinh",
|
|
440
|
+
False,
|
|
441
|
+
)
|
|
442
|
+
applyLogFlag = _cfgGet(
|
|
443
|
+
configData,
|
|
444
|
+
"countingParams.applyLog",
|
|
445
|
+
False,
|
|
446
|
+
)
|
|
447
|
+
applySqrtFlag = _cfgGet(
|
|
448
|
+
configData,
|
|
449
|
+
"countingParams.applySqrt",
|
|
450
|
+
False,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
noTransformFlag = _cfgGet(
|
|
454
|
+
configData,
|
|
455
|
+
"countingParams.noTransform",
|
|
456
|
+
False,
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
if (
|
|
460
|
+
int(applyAsinhFlag) + int(applyLogFlag) + int(applySqrtFlag)
|
|
461
|
+
> 1
|
|
462
|
+
and not noTransformFlag
|
|
463
|
+
):
|
|
464
|
+
logger.warning(
|
|
465
|
+
"Only <= 1 of `applyAsinh`, `applyLog`, `applySqrt` can be true...using applySqrt..."
|
|
466
|
+
)
|
|
467
|
+
applyAsinhFlag = False
|
|
468
|
+
applyLogFlag = False
|
|
469
|
+
applySqrtFlag = True
|
|
470
|
+
|
|
471
|
+
if noTransformFlag:
|
|
472
|
+
applyAsinhFlag = False
|
|
473
|
+
applyLogFlag = False
|
|
474
|
+
applySqrtFlag = False
|
|
475
|
+
|
|
476
|
+
rescaleToTreatmentCoverageFlag = _cfgGet(
|
|
477
|
+
configData,
|
|
478
|
+
"countingParams.rescaleToTreatmentCoverage",
|
|
479
|
+
False,
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
trimLeftTail = _cfgGet(
|
|
483
|
+
configData,
|
|
484
|
+
"countingParams.trimLeftTail",
|
|
485
|
+
0.0,
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
if scaleFactorList is not None and not isinstance(
|
|
489
|
+
scaleFactorList, list
|
|
490
|
+
):
|
|
491
|
+
raise ValueError("`scaleFactors` should be a list of floats.")
|
|
492
|
+
|
|
493
|
+
if scaleFactorsControlList is not None and not isinstance(
|
|
494
|
+
scaleFactorsControlList, list
|
|
495
|
+
):
|
|
496
|
+
raise ValueError(
|
|
497
|
+
"`scaleFactorsControl` should be a list of floats."
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
if (
|
|
501
|
+
scaleFactorList is not None
|
|
502
|
+
and scaleFactorsControlList is not None
|
|
503
|
+
and len(scaleFactorList) != len(scaleFactorsControlList)
|
|
504
|
+
):
|
|
505
|
+
if len(scaleFactorsControlList) == 1:
|
|
506
|
+
scaleFactorsControlList = scaleFactorsControlList * len(
|
|
507
|
+
scaleFactorList
|
|
508
|
+
)
|
|
509
|
+
else:
|
|
510
|
+
raise ValueError(
|
|
511
|
+
"control and treatment scale factors: must be equal length or 1 control"
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
normMethod_ = _cfgGet(
|
|
515
|
+
configData,
|
|
516
|
+
"countingParams.normMethod",
|
|
517
|
+
"EGS",
|
|
518
|
+
)
|
|
519
|
+
if normMethod_.upper() not in ["EGS", "RPKM"]:
|
|
520
|
+
logger.warning(
|
|
521
|
+
f"Unknown `countingParams.normMethod`...Using `EGS`...",
|
|
522
|
+
)
|
|
523
|
+
normMethod_ = "EGS"
|
|
524
|
+
|
|
525
|
+
fragmentLengths: Optional[List[int]] = _cfgGet(
|
|
526
|
+
configData,
|
|
527
|
+
"countingParams.fragmentLengths",
|
|
528
|
+
None,
|
|
529
|
+
)
|
|
530
|
+
fragmentLengthsControl: Optional[List[int]] = _cfgGet(
|
|
531
|
+
configData,
|
|
532
|
+
"countingParams.fragmentLengthsControl",
|
|
533
|
+
None,
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
if fragmentLengths is not None and not isinstance(
|
|
537
|
+
fragmentLengths, list
|
|
538
|
+
):
|
|
539
|
+
raise ValueError(
|
|
540
|
+
"`fragmentLengths` should be a list of integers."
|
|
541
|
+
)
|
|
542
|
+
if fragmentLengthsControl is not None and not isinstance(
|
|
543
|
+
fragmentLengthsControl, list
|
|
544
|
+
):
|
|
545
|
+
raise ValueError(
|
|
546
|
+
"`fragmentLengthsControl` should be a list of integers."
|
|
547
|
+
)
|
|
548
|
+
if (
|
|
549
|
+
fragmentLengths is not None
|
|
550
|
+
and fragmentLengthsControl is not None
|
|
551
|
+
and len(fragmentLengths) != len(fragmentLengthsControl)
|
|
552
|
+
):
|
|
553
|
+
if len(fragmentLengthsControl) == 1:
|
|
554
|
+
fragmentLengthsControl = fragmentLengthsControl * len(
|
|
555
|
+
fragmentLengths
|
|
556
|
+
)
|
|
557
|
+
else:
|
|
558
|
+
raise ValueError(
|
|
559
|
+
"control and treatment fragment lengths: must be equal length or 1 control"
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
return core.countingParams(
|
|
563
|
+
stepSize=stepSize,
|
|
564
|
+
scaleDown=scaleDownFlag,
|
|
565
|
+
scaleFactors=scaleFactorList,
|
|
566
|
+
scaleFactorsControl=scaleFactorsControlList,
|
|
567
|
+
numReads=numReads,
|
|
568
|
+
applyAsinh=applyAsinhFlag,
|
|
569
|
+
applyLog=applyLogFlag,
|
|
570
|
+
applySqrt=applySqrtFlag,
|
|
571
|
+
rescaleToTreatmentCoverage=rescaleToTreatmentCoverageFlag,
|
|
572
|
+
normMethod=normMethod_,
|
|
573
|
+
noTransform=noTransformFlag,
|
|
574
|
+
trimLeftTail=trimLeftTail,
|
|
575
|
+
fragmentLengths=fragmentLengths,
|
|
576
|
+
fragmentLengthsControl=fragmentLengthsControl,
|
|
577
|
+
useTreatmentFragmentLengths=_cfgGet(
|
|
578
|
+
configData,
|
|
579
|
+
"countingParams.useTreatmentFragmentLengths",
|
|
580
|
+
True,
|
|
581
|
+
),
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def getPlotArgs(
|
|
586
|
+
config_path: str, experimentName: str
|
|
587
|
+
) -> core.plotParams:
|
|
588
|
+
configData = loadConfig(config_path)
|
|
589
|
+
|
|
590
|
+
plotPrefix_ = _cfgGet(
|
|
591
|
+
configData, "plotParams.plotPrefix", experimentName
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
plotStateEstimatesHistogram_ = _cfgGet(
|
|
595
|
+
configData,
|
|
596
|
+
"plotParams.plotStateEstimatesHistogram",
|
|
597
|
+
False,
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
plotResidualsHistogram_ = _cfgGet(
|
|
601
|
+
configData,
|
|
602
|
+
"plotParams.plotResidualsHistogram",
|
|
603
|
+
False,
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
plotStateStdHistogram_ = _cfgGet(
|
|
607
|
+
configData,
|
|
608
|
+
"plotParams.plotStateStdHistogram",
|
|
609
|
+
False,
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
plotHeightInches_ = _cfgGet(
|
|
613
|
+
configData,
|
|
614
|
+
"plotParams.plotHeightInches",
|
|
615
|
+
6.0,
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
plotWidthInches_ = _cfgGet(
|
|
619
|
+
configData,
|
|
620
|
+
"plotParams.plotWidthInches",
|
|
621
|
+
8.0,
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
plotDPI_ = _cfgGet(
|
|
625
|
+
configData,
|
|
626
|
+
"plotParams.plotDPI",
|
|
627
|
+
300,
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
plotDirectory_ = _cfgGet(
|
|
631
|
+
configData,
|
|
632
|
+
"plotParams.plotDirectory",
|
|
633
|
+
os.path.join(
|
|
634
|
+
os.getcwd(), f"{experimentName}_consenrichPlots"
|
|
635
|
+
),
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
if (
|
|
639
|
+
int(plotStateEstimatesHistogram_)
|
|
640
|
+
+ int(plotResidualsHistogram_)
|
|
641
|
+
+ int(plotStateStdHistogram_)
|
|
642
|
+
>= 1
|
|
643
|
+
):
|
|
644
|
+
if plotDirectory_ is not None and (
|
|
645
|
+
not os.path.exists(plotDirectory_)
|
|
646
|
+
or not os.path.isdir(plotDirectory_)
|
|
647
|
+
):
|
|
648
|
+
try:
|
|
649
|
+
os.makedirs(plotDirectory_, exist_ok=True)
|
|
650
|
+
except Exception as e:
|
|
651
|
+
logger.warning(
|
|
652
|
+
f"Failed to create {plotDirectory_}:\n\t{e}\nUsing CWD."
|
|
653
|
+
)
|
|
654
|
+
plotDirectory_ = os.getcwd()
|
|
655
|
+
elif plotDirectory_ is None:
|
|
656
|
+
plotDirectory_ = os.getcwd()
|
|
657
|
+
|
|
658
|
+
elif os.path.exists(plotDirectory_) and os.path.isdir(
|
|
659
|
+
plotDirectory_
|
|
660
|
+
):
|
|
661
|
+
logger.warning(
|
|
662
|
+
f"Using existing plot directory: {plotDirectory_}"
|
|
663
|
+
)
|
|
664
|
+
else:
|
|
665
|
+
logger.warning(
|
|
666
|
+
f"Failed creating/identifying {plotDirectory_}...Using CWD."
|
|
667
|
+
)
|
|
668
|
+
plotDirectory_ = os.getcwd()
|
|
669
|
+
|
|
670
|
+
return core.plotParams(
|
|
671
|
+
plotPrefix=plotPrefix_,
|
|
672
|
+
plotStateEstimatesHistogram=plotStateEstimatesHistogram_,
|
|
673
|
+
plotResidualsHistogram=plotResidualsHistogram_,
|
|
674
|
+
plotStateStdHistogram=plotStateStdHistogram_,
|
|
675
|
+
plotHeightInches=plotHeightInches_,
|
|
676
|
+
plotWidthInches=plotWidthInches_,
|
|
677
|
+
plotDPI=plotDPI_,
|
|
678
|
+
plotDirectory=plotDirectory_,
|
|
679
|
+
)
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
def readConfig(config_path: str) -> Dict[str, Any]:
|
|
683
|
+
r"""Read and parse the configuration file for Consenrich.
|
|
684
|
+
|
|
685
|
+
:param config_path: Path to the YAML configuration file.
|
|
686
|
+
:return: Dictionary containing all parsed configuration parameters.
|
|
687
|
+
"""
|
|
688
|
+
configData = loadConfig(config_path)
|
|
689
|
+
|
|
690
|
+
inputParams = getInputArgs(config_path)
|
|
691
|
+
outputParams = getOutputArgs(config_path)
|
|
692
|
+
genomeParams = getGenomeArgs(config_path)
|
|
693
|
+
countingParams = getCountingArgs(config_path)
|
|
694
|
+
|
|
695
|
+
matchingExcludeRegionsFileDefault: Optional[str] = (
|
|
696
|
+
genomeParams.blacklistFile
|
|
697
|
+
)
|
|
698
|
+
|
|
699
|
+
experimentName = _cfgGet(
|
|
700
|
+
configData, "experimentName", "consenrichExperiment"
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
processArgs = core.processParams(
|
|
704
|
+
deltaF=_cfgGet(configData, "processParams.deltaF", -1.0),
|
|
705
|
+
minQ=_cfgGet(configData, "processParams.minQ", -1.0),
|
|
706
|
+
maxQ=_cfgGet(configData, "processParams.maxQ", 10_000),
|
|
707
|
+
offDiagQ=_cfgGet(
|
|
708
|
+
configData, "processParams.offDiagQ", 1.0e-3
|
|
709
|
+
),
|
|
710
|
+
dStatAlpha=_cfgGet(
|
|
711
|
+
configData,
|
|
712
|
+
"processParams.dStatAlpha",
|
|
713
|
+
2.0,
|
|
714
|
+
),
|
|
715
|
+
dStatd=_cfgGet(configData, "processParams.dStatd", 1.0),
|
|
716
|
+
dStatPC=_cfgGet(configData, "processParams.dStatPC", 1.0),
|
|
717
|
+
dStatUseMean=_cfgGet(
|
|
718
|
+
configData,
|
|
719
|
+
"processParams.dStatUseMean",
|
|
720
|
+
False,
|
|
721
|
+
),
|
|
722
|
+
scaleResidualsByP11=_cfgGet(
|
|
723
|
+
configData,
|
|
724
|
+
"processParams.scaleResidualsByP11",
|
|
725
|
+
True,
|
|
726
|
+
),
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
plotArgs = getPlotArgs(config_path, experimentName)
|
|
730
|
+
|
|
731
|
+
observationArgs = core.observationParams(
|
|
732
|
+
minR=_cfgGet(configData, "observationParams.minR", -1.0),
|
|
733
|
+
maxR=_cfgGet(configData, "observationParams.maxR", 10_000),
|
|
734
|
+
useALV=_cfgGet(configData, "observationParams.useALV", False),
|
|
735
|
+
useConstantNoiseLevel=_cfgGet(
|
|
736
|
+
configData,
|
|
737
|
+
"observationParams.useConstantNoiseLevel",
|
|
738
|
+
False,
|
|
739
|
+
),
|
|
740
|
+
noGlobal=_cfgGet(
|
|
741
|
+
configData, "observationParams.noGlobal", False
|
|
742
|
+
),
|
|
743
|
+
numNearest=_cfgGet(
|
|
744
|
+
configData,
|
|
745
|
+
"observationParams.numNearest",
|
|
746
|
+
25,
|
|
747
|
+
),
|
|
748
|
+
localWeight=_cfgGet(
|
|
749
|
+
configData, "observationParams.localWeight", 0.333,
|
|
750
|
+
),
|
|
751
|
+
globalWeight=_cfgGet(
|
|
752
|
+
configData, "observationParams.globalWeight", 0.667,
|
|
753
|
+
),
|
|
754
|
+
approximationWindowLengthBP=_cfgGet(
|
|
755
|
+
configData,
|
|
756
|
+
"observationParams.approximationWindowLengthBP",
|
|
757
|
+
25_000,
|
|
758
|
+
),
|
|
759
|
+
lowPassWindowLengthBP=_cfgGet(
|
|
760
|
+
configData,
|
|
761
|
+
"observationParams.lowPassWindowLengthBP",
|
|
762
|
+
50_000,
|
|
763
|
+
),
|
|
764
|
+
lowPassFilterType=_cfgGet(
|
|
765
|
+
configData,
|
|
766
|
+
"observationParams.lowPassFilterType",
|
|
767
|
+
"median",
|
|
768
|
+
),
|
|
769
|
+
returnCenter=_cfgGet(
|
|
770
|
+
configData, "observationParams.returnCenter", True
|
|
771
|
+
),
|
|
772
|
+
shrinkOffset=_cfgGet(
|
|
773
|
+
configData,
|
|
774
|
+
"observationParams.shrinkOffset",
|
|
775
|
+
1 - 0.05,
|
|
776
|
+
),
|
|
777
|
+
kappaALV=_cfgGet(
|
|
778
|
+
configData,
|
|
779
|
+
"observationParams.kappaALV",
|
|
780
|
+
50.0,
|
|
781
|
+
),
|
|
782
|
+
)
|
|
783
|
+
|
|
784
|
+
stateArgs = core.stateParams(
|
|
785
|
+
stateInit=_cfgGet(configData, "stateParams.stateInit", 0.0),
|
|
786
|
+
stateCovarInit=_cfgGet(
|
|
787
|
+
configData,
|
|
788
|
+
"stateParams.stateCovarInit",
|
|
789
|
+
1000.0,
|
|
790
|
+
),
|
|
791
|
+
boundState=_cfgGet(
|
|
792
|
+
configData,
|
|
793
|
+
"stateParams.boundState",
|
|
794
|
+
True,
|
|
795
|
+
),
|
|
796
|
+
stateLowerBound=_cfgGet(
|
|
797
|
+
configData,
|
|
798
|
+
"stateParams.stateLowerBound",
|
|
799
|
+
0.0,
|
|
800
|
+
),
|
|
801
|
+
stateUpperBound=_cfgGet(
|
|
802
|
+
configData,
|
|
803
|
+
"stateParams.stateUpperBound",
|
|
804
|
+
10000.0,
|
|
805
|
+
),
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
samThreads = _cfgGet(configData, "samParams.samThreads", 1)
|
|
809
|
+
samFlagExclude = _cfgGet(
|
|
810
|
+
configData,
|
|
811
|
+
"samParams.samFlagExclude",
|
|
812
|
+
3844,
|
|
813
|
+
)
|
|
814
|
+
minMappingQuality = _cfgGet(
|
|
815
|
+
configData,
|
|
816
|
+
"samParams.minMappingQuality",
|
|
817
|
+
0,
|
|
818
|
+
)
|
|
819
|
+
oneReadPerBin = _cfgGet(configData, "samParams.oneReadPerBin", 0)
|
|
820
|
+
chunkSize = _cfgGet(configData, "samParams.chunkSize", 1_000_000)
|
|
821
|
+
offsetStr = _cfgGet(configData, "samParams.offsetStr", "0,0")
|
|
822
|
+
maxInsertSize = _cfgGet(
|
|
823
|
+
configData,
|
|
824
|
+
"samParams.maxInsertSize",
|
|
825
|
+
1000,
|
|
826
|
+
)
|
|
827
|
+
|
|
828
|
+
pairedEndDefault = (
|
|
829
|
+
1
|
|
830
|
+
if inputParams.pairedEnd is not None
|
|
831
|
+
and int(inputParams.pairedEnd) > 0
|
|
832
|
+
else 0
|
|
833
|
+
)
|
|
834
|
+
inferFragmentDefault = (
|
|
835
|
+
1
|
|
836
|
+
if inputParams.pairedEnd is not None
|
|
837
|
+
and int(inputParams.pairedEnd) == 0
|
|
838
|
+
else 0
|
|
839
|
+
)
|
|
840
|
+
|
|
841
|
+
samArgs = core.samParams(
|
|
842
|
+
samThreads=samThreads,
|
|
843
|
+
samFlagExclude=samFlagExclude,
|
|
844
|
+
oneReadPerBin=oneReadPerBin,
|
|
845
|
+
chunkSize=chunkSize,
|
|
846
|
+
offsetStr=offsetStr,
|
|
847
|
+
maxInsertSize=maxInsertSize,
|
|
848
|
+
pairedEndMode=_cfgGet(
|
|
849
|
+
configData,
|
|
850
|
+
"samParams.pairedEndMode",
|
|
851
|
+
pairedEndDefault,
|
|
852
|
+
),
|
|
853
|
+
inferFragmentLength=_cfgGet(
|
|
854
|
+
configData,
|
|
855
|
+
"samParams.inferFragmentLength",
|
|
856
|
+
inferFragmentDefault,
|
|
857
|
+
),
|
|
858
|
+
countEndsOnly=_cfgGet(
|
|
859
|
+
configData, "samParams.countEndsOnly", False
|
|
860
|
+
),
|
|
861
|
+
minMappingQuality=minMappingQuality,
|
|
862
|
+
minTemplateLength=_cfgGet(
|
|
863
|
+
configData,
|
|
864
|
+
"samParams.minTemplateLength",
|
|
865
|
+
-1,
|
|
866
|
+
),
|
|
867
|
+
)
|
|
868
|
+
|
|
869
|
+
detrendArgs = core.detrendParams(
|
|
870
|
+
detrendWindowLengthBP=_cfgGet(
|
|
871
|
+
configData, "detrendParams.detrendWindowLengthBP", 20_000
|
|
872
|
+
),
|
|
873
|
+
detrendTrackPercentile=_cfgGet(
|
|
874
|
+
configData,
|
|
875
|
+
"detrendParams.detrendTrackPercentile",
|
|
876
|
+
75.0,
|
|
877
|
+
),
|
|
878
|
+
usePolyFilter=_cfgGet(
|
|
879
|
+
configData,
|
|
880
|
+
"detrendParams.usePolyFilter",
|
|
881
|
+
False,
|
|
882
|
+
),
|
|
883
|
+
detrendSavitzkyGolayDegree=_cfgGet(
|
|
884
|
+
configData,
|
|
885
|
+
"detrendParams.detrendSavitzkyGolayDegree",
|
|
886
|
+
0,
|
|
887
|
+
),
|
|
888
|
+
useOrderStatFilter=_cfgGet(
|
|
889
|
+
configData,
|
|
890
|
+
"detrendParams.useOrderStatFilter",
|
|
891
|
+
True,
|
|
892
|
+
),
|
|
893
|
+
)
|
|
894
|
+
|
|
895
|
+
matchingArgs = core.matchingParams(
|
|
896
|
+
templateNames=_cfgGet(
|
|
897
|
+
configData, "matchingParams.templateNames", []
|
|
898
|
+
),
|
|
899
|
+
cascadeLevels=_cfgGet(
|
|
900
|
+
configData, "matchingParams.cascadeLevels", []
|
|
901
|
+
),
|
|
902
|
+
iters=_cfgGet(configData, "matchingParams.iters", 25_000),
|
|
903
|
+
alpha=_cfgGet(configData, "matchingParams.alpha", 0.05),
|
|
904
|
+
minMatchLengthBP=_cfgGet(
|
|
905
|
+
configData,
|
|
906
|
+
"matchingParams.minMatchLengthBP",
|
|
907
|
+
-1,
|
|
908
|
+
),
|
|
909
|
+
maxNumMatches=_cfgGet(
|
|
910
|
+
configData,
|
|
911
|
+
"matchingParams.maxNumMatches",
|
|
912
|
+
100_000,
|
|
913
|
+
),
|
|
914
|
+
minSignalAtMaxima=_cfgGet(
|
|
915
|
+
configData,
|
|
916
|
+
"matchingParams.minSignalAtMaxima",
|
|
917
|
+
"q:0.75",
|
|
918
|
+
),
|
|
919
|
+
merge=_cfgGet(configData, "matchingParams.merge", True),
|
|
920
|
+
mergeGapBP=_cfgGet(
|
|
921
|
+
configData,
|
|
922
|
+
"matchingParams.mergeGapBP",
|
|
923
|
+
-1,
|
|
924
|
+
),
|
|
925
|
+
useScalingFunction=_cfgGet(
|
|
926
|
+
configData,
|
|
927
|
+
"matchingParams.useScalingFunction",
|
|
928
|
+
True,
|
|
929
|
+
),
|
|
930
|
+
excludeRegionsBedFile=_cfgGet(
|
|
931
|
+
configData,
|
|
932
|
+
"matchingParams.excludeRegionsBedFile",
|
|
933
|
+
matchingExcludeRegionsFileDefault,
|
|
934
|
+
),
|
|
935
|
+
randSeed=_cfgGet(configData, "matchingParams.randSeed", 42),
|
|
936
|
+
penalizeBy=_cfgGet(
|
|
937
|
+
configData, "matchingParams.penalizeBy", None
|
|
938
|
+
),
|
|
939
|
+
eps=_cfgGet(configData, "matchingParams.eps", 1.0e-2),
|
|
940
|
+
autoLengthQuantile=_cfgGet(
|
|
941
|
+
configData,
|
|
942
|
+
"matchingParams.autoLengthQuantile",
|
|
943
|
+
0.90,
|
|
944
|
+
),
|
|
945
|
+
methodFDR=_cfgGet(
|
|
946
|
+
configData,
|
|
947
|
+
"matchingParams.methodFDR",
|
|
948
|
+
None,
|
|
949
|
+
),
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
return {
|
|
953
|
+
"experimentName": experimentName,
|
|
954
|
+
"genomeArgs": genomeParams,
|
|
955
|
+
"inputArgs": inputParams,
|
|
956
|
+
"outputArgs": outputParams,
|
|
957
|
+
"countingArgs": countingParams,
|
|
958
|
+
"processArgs": processArgs,
|
|
959
|
+
"plotArgs": plotArgs,
|
|
960
|
+
"observationArgs": observationArgs,
|
|
961
|
+
"stateArgs": stateArgs,
|
|
962
|
+
"samArgs": samArgs,
|
|
963
|
+
"detrendArgs": detrendArgs,
|
|
964
|
+
"matchingArgs": matchingArgs,
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
def convertBedGraphToBigWig(
|
|
969
|
+
experimentName,
|
|
970
|
+
chromSizesFile,
|
|
971
|
+
suffixes: Optional[List[str]] = None,
|
|
972
|
+
):
|
|
973
|
+
if suffixes is None:
|
|
974
|
+
# at least look for `state` bedGraph
|
|
975
|
+
suffixes = ["state"]
|
|
976
|
+
path_ = ""
|
|
977
|
+
warningMessage = (
|
|
978
|
+
"Could not find UCSC bedGraphToBigWig binary utility."
|
|
979
|
+
"If you need bigWig files instead of the default, human-readable bedGraph files,"
|
|
980
|
+
"you can download the `bedGraphToBigWig` binary from https://hgdownload.soe.ucsc.edu/admin/exe/<operatingSystem, architecture>"
|
|
981
|
+
"OR install via conda (conda install -c bioconda ucsc-bedgraphtobigwig)."
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
logger.info(
|
|
985
|
+
"Attempting to generate bigWig files from bedGraph format..."
|
|
986
|
+
)
|
|
987
|
+
try:
|
|
988
|
+
path_ = shutil.which("bedGraphToBigWig")
|
|
989
|
+
except Exception as e:
|
|
990
|
+
logger.warning(f"\n{warningMessage}\n")
|
|
991
|
+
return
|
|
992
|
+
if path_ is None or len(path_) == 0:
|
|
993
|
+
logger.warning(f"\n{warningMessage}\n")
|
|
994
|
+
return
|
|
995
|
+
logger.info(f"Using bedGraphToBigWig from {path_}")
|
|
996
|
+
for suffix in suffixes:
|
|
997
|
+
bedgraph = (
|
|
998
|
+
f"consenrichOutput_{experimentName}_{suffix}.bedGraph"
|
|
999
|
+
)
|
|
1000
|
+
if not os.path.exists(bedgraph):
|
|
1001
|
+
logger.warning(
|
|
1002
|
+
f"bedGraph file {bedgraph} does not exist. Skipping bigWig conversion."
|
|
1003
|
+
)
|
|
1004
|
+
continue
|
|
1005
|
+
if not os.path.exists(chromSizesFile):
|
|
1006
|
+
logger.warning(
|
|
1007
|
+
f"{chromSizesFile} does not exist. Skipping bigWig conversion."
|
|
1008
|
+
)
|
|
1009
|
+
return
|
|
1010
|
+
bigwig = f"{experimentName}_consenrich_{suffix}.bw"
|
|
1011
|
+
logger.info(f"Start: {bedgraph} --> {bigwig}...")
|
|
1012
|
+
try:
|
|
1013
|
+
subprocess.run(
|
|
1014
|
+
[path_, bedgraph, chromSizesFile, bigwig], check=True
|
|
1015
|
+
)
|
|
1016
|
+
except Exception as e:
|
|
1017
|
+
logger.warning(
|
|
1018
|
+
f"bedGraph-->bigWig conversion with\n\n\t`bedGraphToBigWig {bedgraph} {chromSizesFile} {bigwig}`\nraised: \n{e}\n\n"
|
|
1019
|
+
)
|
|
1020
|
+
continue
|
|
1021
|
+
if os.path.exists(bigwig) and os.path.getsize(bigwig) > 100:
|
|
1022
|
+
logger.info(
|
|
1023
|
+
f"Finished: converted {bedgraph} to {bigwig}."
|
|
1024
|
+
)
|
|
1025
|
+
|
|
1026
|
+
|
|
1027
|
+
def main():
|
|
1028
|
+
parser = argparse.ArgumentParser(description="Consenrich CLI")
|
|
1029
|
+
parser.add_argument(
|
|
1030
|
+
"--config",
|
|
1031
|
+
type=str,
|
|
1032
|
+
dest="config",
|
|
1033
|
+
help="Path to a YAML config file with parameters + arguments defined in `consenrich.core`",
|
|
1034
|
+
)
|
|
1035
|
+
|
|
1036
|
+
# --- Matching-specific command-line arguments ---
|
|
1037
|
+
parser.add_argument(
|
|
1038
|
+
"--match-bedGraph",
|
|
1039
|
+
type=str,
|
|
1040
|
+
dest="matchBedGraph",
|
|
1041
|
+
help="Path to a bedGraph file of Consenrich estimates to match templates against.\
|
|
1042
|
+
If provided, *only* the matching algorithm is run (no other processing). Note that \
|
|
1043
|
+
some features in `consenrich.matching` may not be supported through this CLI interface.",
|
|
1044
|
+
)
|
|
1045
|
+
parser.add_argument(
|
|
1046
|
+
"--match-template",
|
|
1047
|
+
nargs="+",
|
|
1048
|
+
type=str,
|
|
1049
|
+
help="List of template names to use in matching. See PyWavelets discrete wavelet families: https://pywavelets.readthedocs.io/en/latest/ref/wavelets.html#discrete-wavelets. \
|
|
1050
|
+
Needs to match `--match-level` in length",
|
|
1051
|
+
dest="matchTemplate",
|
|
1052
|
+
)
|
|
1053
|
+
|
|
1054
|
+
parser.add_argument(
|
|
1055
|
+
"--match-level",
|
|
1056
|
+
nargs="+",
|
|
1057
|
+
type=int,
|
|
1058
|
+
help="List of cascade levels to use in matching. Needs to match `--match-template` in length",
|
|
1059
|
+
dest="matchLevel",
|
|
1060
|
+
)
|
|
1061
|
+
|
|
1062
|
+
parser.add_argument(
|
|
1063
|
+
"--match-alpha",
|
|
1064
|
+
type=float,
|
|
1065
|
+
default=0.05,
|
|
1066
|
+
dest="matchAlpha",
|
|
1067
|
+
help="Cutoff qualifying candidate matches as significant (FDR-adjusted p-value < alpha).",
|
|
1068
|
+
)
|
|
1069
|
+
parser.add_argument(
|
|
1070
|
+
"--match-min-length",
|
|
1071
|
+
type=int,
|
|
1072
|
+
default=-1,
|
|
1073
|
+
dest="matchMinMatchLengthBP",
|
|
1074
|
+
help="Minimum length (bp) qualifying candidate matches. Set to -1 for auto calculation from data",
|
|
1075
|
+
)
|
|
1076
|
+
parser.add_argument(
|
|
1077
|
+
"--match-iters",
|
|
1078
|
+
type=int,
|
|
1079
|
+
default=50000,
|
|
1080
|
+
dest="matchIters",
|
|
1081
|
+
help="Number of sampled blocks for estimating null distribution of match scores (cross correlations with templates).",
|
|
1082
|
+
)
|
|
1083
|
+
parser.add_argument(
|
|
1084
|
+
"--match-min-signal",
|
|
1085
|
+
type=str,
|
|
1086
|
+
default="q:0.75",
|
|
1087
|
+
dest="matchMinSignalAtMaxima",
|
|
1088
|
+
help="Minimum signal at local maxima in the response sequence that qualifies candidate matches\
|
|
1089
|
+
Can be an absolute value (e.g., `50.0`) or a quantile (e.g., `q:0.75` for 75th percentile).",
|
|
1090
|
+
)
|
|
1091
|
+
parser.add_argument(
|
|
1092
|
+
"--match-max-matches",
|
|
1093
|
+
type=int,
|
|
1094
|
+
default=1000000,
|
|
1095
|
+
dest="matchMaxNumMatches",
|
|
1096
|
+
)
|
|
1097
|
+
parser.add_argument(
|
|
1098
|
+
"--match-merge-gap",
|
|
1099
|
+
type=int,
|
|
1100
|
+
default=-1,
|
|
1101
|
+
dest="matchMergeGapBP",
|
|
1102
|
+
help="Maximum gap (bp) between candidate matches to merge into a single match.\
|
|
1103
|
+
Set to -1 for auto calculation from data.",
|
|
1104
|
+
)
|
|
1105
|
+
parser.add_argument(
|
|
1106
|
+
"--match-use-wavelet",
|
|
1107
|
+
action="store_true",
|
|
1108
|
+
dest="matchUseWavelet",
|
|
1109
|
+
help="If set, use the wavelet function at the given level rather than scaling function.",
|
|
1110
|
+
)
|
|
1111
|
+
parser.add_argument(
|
|
1112
|
+
"--match-seed", type=int, default=42, dest="matchRandSeed"
|
|
1113
|
+
)
|
|
1114
|
+
parser.add_argument(
|
|
1115
|
+
"--match-exclude-bed",
|
|
1116
|
+
type=str,
|
|
1117
|
+
default=None,
|
|
1118
|
+
dest="matchExcludeBed",
|
|
1119
|
+
)
|
|
1120
|
+
parser.add_argument(
|
|
1121
|
+
"--match-auto-length-quantile",
|
|
1122
|
+
type=float,
|
|
1123
|
+
default=0.90,
|
|
1124
|
+
dest="matchAutoLengthQuantile",
|
|
1125
|
+
help="Cutoff in standardized values to use when auto-calculating minimum match length and merge gap.",
|
|
1126
|
+
)
|
|
1127
|
+
parser.add_argument(
|
|
1128
|
+
"--match-method-fdr",
|
|
1129
|
+
type=str,
|
|
1130
|
+
default=None,
|
|
1131
|
+
dest="matchMethodFDR",
|
|
1132
|
+
help="Method for multiple hypothesis correction of p-values. (bh, by)",
|
|
1133
|
+
)
|
|
1134
|
+
parser.add_argument(
|
|
1135
|
+
"--match-is-log-scale",
|
|
1136
|
+
action="store_true",
|
|
1137
|
+
dest="matchIsLogScale",
|
|
1138
|
+
help="If set, indicates that the input bedGraph has already been transformed.",
|
|
1139
|
+
)
|
|
1140
|
+
parser.add_argument(
|
|
1141
|
+
"--verbose", action="store_true", help="If set, logs config"
|
|
1142
|
+
)
|
|
1143
|
+
args = parser.parse_args()
|
|
1144
|
+
|
|
1145
|
+
if args.matchBedGraph:
|
|
1146
|
+
if not os.path.exists(args.matchBedGraph):
|
|
1147
|
+
raise FileNotFoundError(
|
|
1148
|
+
f"bedGraph file {args.matchBedGraph} couldn't be found."
|
|
1149
|
+
)
|
|
1150
|
+
logger.info(
|
|
1151
|
+
f"Running matching algorithm using bedGraph file {args.matchBedGraph}..."
|
|
1152
|
+
)
|
|
1153
|
+
|
|
1154
|
+
outName = matching.runMatchingAlgorithm(
|
|
1155
|
+
args.matchBedGraph,
|
|
1156
|
+
args.matchTemplate,
|
|
1157
|
+
args.matchLevel,
|
|
1158
|
+
alpha=args.matchAlpha,
|
|
1159
|
+
minMatchLengthBP=args.matchMinMatchLengthBP,
|
|
1160
|
+
iters=args.matchIters,
|
|
1161
|
+
minSignalAtMaxima=args.matchMinSignalAtMaxima,
|
|
1162
|
+
maxNumMatches=args.matchMaxNumMatches,
|
|
1163
|
+
useScalingFunction=(not args.matchUseWavelet),
|
|
1164
|
+
mergeGapBP=args.matchMergeGapBP,
|
|
1165
|
+
excludeRegionsBedFile=args.matchExcludeBed,
|
|
1166
|
+
autoLengthQuantile=args.matchAutoLengthQuantile,
|
|
1167
|
+
methodFDR=args.matchMethodFDR.lower()
|
|
1168
|
+
if args.matchMethodFDR
|
|
1169
|
+
else None,
|
|
1170
|
+
isLogScale=args.matchIsLogScale,
|
|
1171
|
+
randSeed=args.matchRandSeed,
|
|
1172
|
+
merge=True, # always merge for CLI use -- either way, both files produced
|
|
1173
|
+
)
|
|
1174
|
+
logger.info(f"Finished matching. Written to {outName}")
|
|
1175
|
+
sys.exit(0)
|
|
1176
|
+
|
|
1177
|
+
if not args.config:
|
|
1178
|
+
logger.info(
|
|
1179
|
+
"No config file provided, run with `--config <path_to_config.yaml>`"
|
|
1180
|
+
)
|
|
1181
|
+
logger.info(
|
|
1182
|
+
"See documentation: https://nolan-h-hamilton.github.io/Consenrich/"
|
|
1183
|
+
)
|
|
1184
|
+
sys.exit(1)
|
|
1185
|
+
|
|
1186
|
+
if not os.path.exists(args.config):
|
|
1187
|
+
logger.info(f"Config file {args.config} does not exist.")
|
|
1188
|
+
logger.info(
|
|
1189
|
+
"See documentation: https://nolan-h-hamilton.github.io/Consenrich/"
|
|
1190
|
+
)
|
|
1191
|
+
sys.exit(1)
|
|
1192
|
+
|
|
1193
|
+
config = readConfig(args.config)
|
|
1194
|
+
experimentName = config["experimentName"]
|
|
1195
|
+
genomeArgs = config["genomeArgs"]
|
|
1196
|
+
inputArgs = config["inputArgs"]
|
|
1197
|
+
outputArgs = config["outputArgs"]
|
|
1198
|
+
countingArgs = config["countingArgs"]
|
|
1199
|
+
processArgs = config["processArgs"]
|
|
1200
|
+
observationArgs = config["observationArgs"]
|
|
1201
|
+
stateArgs = config["stateArgs"]
|
|
1202
|
+
samArgs = config["samArgs"]
|
|
1203
|
+
detrendArgs = config["detrendArgs"]
|
|
1204
|
+
matchingArgs = config["matchingArgs"]
|
|
1205
|
+
plotArgs = config["plotArgs"]
|
|
1206
|
+
bamFiles = inputArgs.bamFiles
|
|
1207
|
+
bamFilesControl = inputArgs.bamFilesControl
|
|
1208
|
+
numSamples = len(bamFiles)
|
|
1209
|
+
numNearest = observationArgs.numNearest
|
|
1210
|
+
stepSize = countingArgs.stepSize
|
|
1211
|
+
excludeForNorm = genomeArgs.excludeForNorm
|
|
1212
|
+
chromSizes = genomeArgs.chromSizesFile
|
|
1213
|
+
scaleDown = countingArgs.scaleDown
|
|
1214
|
+
initialTreatmentScaleFactors = []
|
|
1215
|
+
minMatchLengthBP_: Optional[int] = matchingArgs.minMatchLengthBP
|
|
1216
|
+
deltaF_ = processArgs.deltaF
|
|
1217
|
+
minR_ = observationArgs.minR
|
|
1218
|
+
maxR_ = observationArgs.maxR
|
|
1219
|
+
minQ_ = processArgs.minQ
|
|
1220
|
+
maxQ_ = processArgs.maxQ
|
|
1221
|
+
offDiagQ_ = processArgs.offDiagQ
|
|
1222
|
+
muncEps: float = 10e-2
|
|
1223
|
+
|
|
1224
|
+
if args.verbose:
|
|
1225
|
+
try:
|
|
1226
|
+
logger.info("Initial Configuration:\n")
|
|
1227
|
+
config_truncated = {
|
|
1228
|
+
k: v
|
|
1229
|
+
for k, v in config.items()
|
|
1230
|
+
if k
|
|
1231
|
+
not in ["inputArgs", "genomeArgs", "countingArgs"]
|
|
1232
|
+
}
|
|
1233
|
+
config_truncated["experimentName"] = experimentName
|
|
1234
|
+
config_truncated["inputArgs"] = inputArgs
|
|
1235
|
+
config_truncated["outputArgs"] = outputArgs
|
|
1236
|
+
config_truncated["genomeArgs"] = genomeArgs
|
|
1237
|
+
config_truncated["countingArgs"] = countingArgs
|
|
1238
|
+
config_truncated["processArgs"] = processArgs
|
|
1239
|
+
config_truncated["observationArgs"] = observationArgs
|
|
1240
|
+
config_truncated["stateArgs"] = stateArgs
|
|
1241
|
+
config_truncated["samArgs"] = samArgs
|
|
1242
|
+
config_truncated["detrendArgs"] = detrendArgs
|
|
1243
|
+
pprint.pprint(config_truncated, indent=8)
|
|
1244
|
+
except Exception as e:
|
|
1245
|
+
logger.warning(f"Failed to print parsed config:\n{e}\n")
|
|
1246
|
+
|
|
1247
|
+
controlsPresent = checkControlsPresent(inputArgs)
|
|
1248
|
+
if args.verbose:
|
|
1249
|
+
logger.info(f"controlsPresent: {controlsPresent}")
|
|
1250
|
+
readLengthsBamFiles = getReadLengths(
|
|
1251
|
+
inputArgs, countingArgs, samArgs
|
|
1252
|
+
)
|
|
1253
|
+
effectiveGenomeSizes = getEffectiveGenomeSizes(
|
|
1254
|
+
genomeArgs, readLengthsBamFiles
|
|
1255
|
+
)
|
|
1256
|
+
|
|
1257
|
+
matchingEnabled = checkMatchingEnabled(matchingArgs)
|
|
1258
|
+
if args.verbose:
|
|
1259
|
+
logger.info(f"matchingEnabled: {matchingEnabled}")
|
|
1260
|
+
scaleFactors = countingArgs.scaleFactors
|
|
1261
|
+
scaleFactorsControl = countingArgs.scaleFactorsControl
|
|
1262
|
+
|
|
1263
|
+
fragmentLengthsTreatment: List[int] = []
|
|
1264
|
+
fragmentLengthsControl: List[int] = []
|
|
1265
|
+
|
|
1266
|
+
if countingArgs.fragmentLengths is not None:
|
|
1267
|
+
fragmentLengthsTreatment = list(countingArgs.fragmentLengths)
|
|
1268
|
+
else:
|
|
1269
|
+
for bamFile in bamFiles:
|
|
1270
|
+
fragmentLengthsTreatment.append(
|
|
1271
|
+
cconsenrich.cgetFragmentLength(
|
|
1272
|
+
bamFile,
|
|
1273
|
+
samThreads=samArgs.samThreads,
|
|
1274
|
+
samFlagExclude=samArgs.samFlagExclude,
|
|
1275
|
+
maxInsertSize=samArgs.maxInsertSize,
|
|
1276
|
+
)
|
|
1277
|
+
)
|
|
1278
|
+
logger.info(
|
|
1279
|
+
f"Estimated fragment length for {bamFile}: {fragmentLengthsTreatment[-1]}"
|
|
1280
|
+
)
|
|
1281
|
+
if controlsPresent:
|
|
1282
|
+
readLengthsControlBamFiles = [
|
|
1283
|
+
core.getReadLength(
|
|
1284
|
+
bamFile,
|
|
1285
|
+
countingArgs.numReads,
|
|
1286
|
+
1000,
|
|
1287
|
+
samArgs.samThreads,
|
|
1288
|
+
samArgs.samFlagExclude,
|
|
1289
|
+
)
|
|
1290
|
+
for bamFile in bamFilesControl
|
|
1291
|
+
]
|
|
1292
|
+
effectiveGenomeSizesControl = [
|
|
1293
|
+
constants.getEffectiveGenomeSize(
|
|
1294
|
+
genomeArgs.genomeName, readLength
|
|
1295
|
+
)
|
|
1296
|
+
for readLength in readLengthsControlBamFiles
|
|
1297
|
+
]
|
|
1298
|
+
|
|
1299
|
+
if countingArgs.fragmentLengthsControl is not None:
|
|
1300
|
+
fragmentLengthsControl = list(
|
|
1301
|
+
countingArgs.fragmentLengthsControl
|
|
1302
|
+
)
|
|
1303
|
+
elif not countingArgs.useTreatmentFragmentLengths:
|
|
1304
|
+
for bamFile in bamFilesControl:
|
|
1305
|
+
fragmentLengthsControl.append(
|
|
1306
|
+
cconsenrich.cgetFragmentLength(
|
|
1307
|
+
bamFile,
|
|
1308
|
+
samThreads=samArgs.samThreads,
|
|
1309
|
+
samFlagExclude=samArgs.samFlagExclude,
|
|
1310
|
+
maxInsertSize=samArgs.maxInsertSize,
|
|
1311
|
+
)
|
|
1312
|
+
)
|
|
1313
|
+
logger.info(
|
|
1314
|
+
f"Estimated fragment length for {bamFile}: {fragmentLengthsControl[-1]}"
|
|
1315
|
+
)
|
|
1316
|
+
if countingArgs.useTreatmentFragmentLengths:
|
|
1317
|
+
logger.info(
|
|
1318
|
+
"`countingParams.useTreatmentFragmentLengths=True`"
|
|
1319
|
+
"`\n\t--> using treatment fraglens for control samples, too"
|
|
1320
|
+
)
|
|
1321
|
+
fragmentLengthsTreatment, fragmentLengthsControl = _resolveFragmentLengthPairs(
|
|
1322
|
+
fragmentLengthsTreatment, fragmentLengthsControl
|
|
1323
|
+
)
|
|
1324
|
+
|
|
1325
|
+
|
|
1326
|
+
|
|
1327
|
+
|
|
1328
|
+
if (
|
|
1329
|
+
scaleFactors is not None
|
|
1330
|
+
and scaleFactorsControl is not None
|
|
1331
|
+
):
|
|
1332
|
+
treatScaleFactors = scaleFactors
|
|
1333
|
+
controlScaleFactors = scaleFactorsControl
|
|
1334
|
+
# still make sure this is accessible
|
|
1335
|
+
initialTreatmentScaleFactors = [1.0] * len(bamFiles)
|
|
1336
|
+
else:
|
|
1337
|
+
try:
|
|
1338
|
+
initialTreatmentScaleFactors = [
|
|
1339
|
+
detrorm.getScaleFactor1x(
|
|
1340
|
+
bamFile,
|
|
1341
|
+
effectiveGenomeSize,
|
|
1342
|
+
readLength,
|
|
1343
|
+
excludeForNorm,
|
|
1344
|
+
genomeArgs.chromSizesFile,
|
|
1345
|
+
samArgs.samThreads,
|
|
1346
|
+
)
|
|
1347
|
+
for bamFile, effectiveGenomeSize, readLength in zip(
|
|
1348
|
+
bamFiles,
|
|
1349
|
+
effectiveGenomeSizes,
|
|
1350
|
+
fragmentLengthsTreatment,
|
|
1351
|
+
)
|
|
1352
|
+
]
|
|
1353
|
+
except Exception:
|
|
1354
|
+
initialTreatmentScaleFactors = [1.0] * len(bamFiles)
|
|
1355
|
+
|
|
1356
|
+
pairScalingFactors = [
|
|
1357
|
+
detrorm.getPairScaleFactors(
|
|
1358
|
+
bamFileA,
|
|
1359
|
+
bamFileB,
|
|
1360
|
+
effectiveGenomeSizeA,
|
|
1361
|
+
effectiveGenomeSizeB,
|
|
1362
|
+
readLengthA,
|
|
1363
|
+
readLengthB,
|
|
1364
|
+
excludeForNorm,
|
|
1365
|
+
chromSizes,
|
|
1366
|
+
samArgs.samThreads,
|
|
1367
|
+
stepSize,
|
|
1368
|
+
scaleDown,
|
|
1369
|
+
normMethod=countingArgs.normMethod,
|
|
1370
|
+
)
|
|
1371
|
+
for bamFileA, bamFileB, effectiveGenomeSizeA, effectiveGenomeSizeB, readLengthA, readLengthB in zip(
|
|
1372
|
+
bamFiles,
|
|
1373
|
+
bamFilesControl,
|
|
1374
|
+
effectiveGenomeSizes,
|
|
1375
|
+
effectiveGenomeSizesControl,
|
|
1376
|
+
fragmentLengthsTreatment,
|
|
1377
|
+
fragmentLengthsControl,
|
|
1378
|
+
)
|
|
1379
|
+
]
|
|
1380
|
+
|
|
1381
|
+
treatScaleFactors = []
|
|
1382
|
+
controlScaleFactors = []
|
|
1383
|
+
for scaleFactorA, scaleFactorB in pairScalingFactors:
|
|
1384
|
+
treatScaleFactors.append(scaleFactorA)
|
|
1385
|
+
controlScaleFactors.append(scaleFactorB)
|
|
1386
|
+
|
|
1387
|
+
else:
|
|
1388
|
+
treatScaleFactors = scaleFactors
|
|
1389
|
+
controlScaleFactors = scaleFactorsControl
|
|
1390
|
+
|
|
1391
|
+
if scaleFactors is None and not controlsPresent:
|
|
1392
|
+
if countingArgs.normMethod.upper() == "RPKM":
|
|
1393
|
+
scaleFactors = [
|
|
1394
|
+
detrorm.getScaleFactorPerMillion(
|
|
1395
|
+
bamFile,
|
|
1396
|
+
excludeForNorm,
|
|
1397
|
+
stepSize,
|
|
1398
|
+
)
|
|
1399
|
+
for bamFile in bamFiles
|
|
1400
|
+
]
|
|
1401
|
+
else:
|
|
1402
|
+
scaleFactors = [
|
|
1403
|
+
detrorm.getScaleFactor1x(
|
|
1404
|
+
bamFile,
|
|
1405
|
+
effectiveGenomeSize,
|
|
1406
|
+
readLength,
|
|
1407
|
+
excludeForNorm,
|
|
1408
|
+
genomeArgs.chromSizesFile,
|
|
1409
|
+
samArgs.samThreads,
|
|
1410
|
+
)
|
|
1411
|
+
for bamFile, effectiveGenomeSize, readLength in zip(
|
|
1412
|
+
bamFiles,
|
|
1413
|
+
effectiveGenomeSizes,
|
|
1414
|
+
fragmentLengthsTreatment,
|
|
1415
|
+
)
|
|
1416
|
+
]
|
|
1417
|
+
chromSizesDict = misc_util.getChromSizesDict(
|
|
1418
|
+
genomeArgs.chromSizesFile,
|
|
1419
|
+
excludeChroms=genomeArgs.excludeChroms,
|
|
1420
|
+
)
|
|
1421
|
+
chromosomes = genomeArgs.chromosomes
|
|
1422
|
+
|
|
1423
|
+
for c_, chromosome in enumerate(chromosomes):
|
|
1424
|
+
chromosomeStart, chromosomeEnd = core.getChromRangesJoint(
|
|
1425
|
+
bamFiles,
|
|
1426
|
+
chromosome,
|
|
1427
|
+
chromSizesDict[chromosome],
|
|
1428
|
+
samArgs.samThreads,
|
|
1429
|
+
samArgs.samFlagExclude,
|
|
1430
|
+
)
|
|
1431
|
+
chromosomeStart = max(
|
|
1432
|
+
0, (chromosomeStart - (chromosomeStart % stepSize))
|
|
1433
|
+
)
|
|
1434
|
+
chromosomeEnd = max(
|
|
1435
|
+
0, (chromosomeEnd - (chromosomeEnd % stepSize))
|
|
1436
|
+
)
|
|
1437
|
+
numIntervals = (
|
|
1438
|
+
((chromosomeEnd - chromosomeStart) + stepSize) - 1
|
|
1439
|
+
) // stepSize
|
|
1440
|
+
intervals = np.arange(
|
|
1441
|
+
chromosomeStart, chromosomeEnd, stepSize
|
|
1442
|
+
)
|
|
1443
|
+
|
|
1444
|
+
if c_ == 0 and deltaF_ < 0:
|
|
1445
|
+
logger.info(
|
|
1446
|
+
f"`processParams.deltaF < 0` --> calling core.autoDeltaF()..."
|
|
1447
|
+
)
|
|
1448
|
+
deltaF_ = core.autoDeltaF(
|
|
1449
|
+
bamFiles,
|
|
1450
|
+
stepSize,
|
|
1451
|
+
fragmentLengths=fragmentLengthsTreatment,
|
|
1452
|
+
)
|
|
1453
|
+
|
|
1454
|
+
chromMat: np.ndarray = np.empty(
|
|
1455
|
+
(numSamples, numIntervals), dtype=np.float32
|
|
1456
|
+
)
|
|
1457
|
+
if controlsPresent:
|
|
1458
|
+
j_: int = 0
|
|
1459
|
+
for bamA, bamB in zip(bamFiles, bamFilesControl):
|
|
1460
|
+
logger.info(
|
|
1461
|
+
f"Counting (trt,ctrl) for {chromosome}: ({bamA}, {bamB})"
|
|
1462
|
+
)
|
|
1463
|
+
pairMatrix: np.ndarray = core.readBamSegments(
|
|
1464
|
+
[bamA, bamB],
|
|
1465
|
+
chromosome,
|
|
1466
|
+
chromosomeStart,
|
|
1467
|
+
chromosomeEnd,
|
|
1468
|
+
stepSize,
|
|
1469
|
+
[
|
|
1470
|
+
readLengthsBamFiles[j_],
|
|
1471
|
+
readLengthsControlBamFiles[j_],
|
|
1472
|
+
],
|
|
1473
|
+
[treatScaleFactors[j_], controlScaleFactors[j_]],
|
|
1474
|
+
samArgs.oneReadPerBin,
|
|
1475
|
+
samArgs.samThreads,
|
|
1476
|
+
samArgs.samFlagExclude,
|
|
1477
|
+
offsetStr=samArgs.offsetStr,
|
|
1478
|
+
maxInsertSize=samArgs.maxInsertSize,
|
|
1479
|
+
pairedEndMode=samArgs.pairedEndMode,
|
|
1480
|
+
inferFragmentLength=samArgs.inferFragmentLength,
|
|
1481
|
+
applyAsinh=countingArgs.applyAsinh,
|
|
1482
|
+
applyLog=countingArgs.applyLog,
|
|
1483
|
+
applySqrt=countingArgs.applySqrt,
|
|
1484
|
+
countEndsOnly=samArgs.countEndsOnly,
|
|
1485
|
+
minMappingQuality=samArgs.minMappingQuality,
|
|
1486
|
+
minTemplateLength=samArgs.minTemplateLength,
|
|
1487
|
+
trimLeftTail=countingArgs.trimLeftTail,
|
|
1488
|
+
fragmentLengths=[
|
|
1489
|
+
fragmentLengthsTreatment[j_],
|
|
1490
|
+
fragmentLengthsControl[j_],
|
|
1491
|
+
],
|
|
1492
|
+
)
|
|
1493
|
+
chromMat[j_, :] = pairMatrix[0, :] - pairMatrix[1, :]
|
|
1494
|
+
j_ += 1
|
|
1495
|
+
else:
|
|
1496
|
+
chromMat = core.readBamSegments(
|
|
1497
|
+
bamFiles,
|
|
1498
|
+
chromosome,
|
|
1499
|
+
chromosomeStart,
|
|
1500
|
+
chromosomeEnd,
|
|
1501
|
+
stepSize,
|
|
1502
|
+
readLengthsBamFiles,
|
|
1503
|
+
scaleFactors,
|
|
1504
|
+
samArgs.oneReadPerBin,
|
|
1505
|
+
samArgs.samThreads,
|
|
1506
|
+
samArgs.samFlagExclude,
|
|
1507
|
+
offsetStr=samArgs.offsetStr,
|
|
1508
|
+
maxInsertSize=samArgs.maxInsertSize,
|
|
1509
|
+
pairedEndMode=samArgs.pairedEndMode,
|
|
1510
|
+
inferFragmentLength=samArgs.inferFragmentLength,
|
|
1511
|
+
applyAsinh=countingArgs.applyAsinh,
|
|
1512
|
+
applyLog=countingArgs.applyLog,
|
|
1513
|
+
applySqrt=countingArgs.applySqrt,
|
|
1514
|
+
countEndsOnly=samArgs.countEndsOnly,
|
|
1515
|
+
minMappingQuality=samArgs.minMappingQuality,
|
|
1516
|
+
minTemplateLength=samArgs.minTemplateLength,
|
|
1517
|
+
trimLeftTail=countingArgs.trimLeftTail,
|
|
1518
|
+
fragmentLengths=fragmentLengthsTreatment,
|
|
1519
|
+
)
|
|
1520
|
+
sparseMap = None
|
|
1521
|
+
if genomeArgs.sparseBedFile and not observationArgs.useALV:
|
|
1522
|
+
if c_ == 0:
|
|
1523
|
+
logger.info(
|
|
1524
|
+
f"\n\t`useALV={observationArgs.useALV}`\n\t\t--> The local component of sample-specific observation uncertainty tracks will be estimated at each interval from the `numNearest={observationArgs.numNearest}` regions in `sparseBedFile={genomeArgs.sparseBedFile}`...\n"
|
|
1525
|
+
)
|
|
1526
|
+
sparseMap = core.getSparseMap(
|
|
1527
|
+
chromosome,
|
|
1528
|
+
intervals,
|
|
1529
|
+
numNearest,
|
|
1530
|
+
genomeArgs.sparseBedFile,
|
|
1531
|
+
)
|
|
1532
|
+
|
|
1533
|
+
# negative --> data-based
|
|
1534
|
+
if observationArgs.minR < 0.0 or observationArgs.maxR < 0.0:
|
|
1535
|
+
minR_ = 0.0
|
|
1536
|
+
maxR_ = 1e4
|
|
1537
|
+
if processArgs.minQ < 0.0 or processArgs.maxQ < 0.0:
|
|
1538
|
+
minQ_ = 0.0
|
|
1539
|
+
maxQ_ = 1e4
|
|
1540
|
+
|
|
1541
|
+
muncMat = np.empty_like(chromMat, dtype=np.float32)
|
|
1542
|
+
for j in range(numSamples):
|
|
1543
|
+
logger.info(
|
|
1544
|
+
f"Muncing {j + 1}/{numSamples} for {chromosome}..."
|
|
1545
|
+
)
|
|
1546
|
+
|
|
1547
|
+
chromMat[j, :] = detrorm.detrendTrack(
|
|
1548
|
+
chromMat[j, :],
|
|
1549
|
+
stepSize,
|
|
1550
|
+
detrendArgs.detrendWindowLengthBP,
|
|
1551
|
+
detrendArgs.useOrderStatFilter,
|
|
1552
|
+
detrendArgs.usePolyFilter,
|
|
1553
|
+
detrendArgs.detrendTrackPercentile,
|
|
1554
|
+
detrendArgs.detrendSavitzkyGolayDegree,
|
|
1555
|
+
)
|
|
1556
|
+
|
|
1557
|
+
muncMat[j, :] = core.getMuncTrack(
|
|
1558
|
+
chromosome,
|
|
1559
|
+
intervals,
|
|
1560
|
+
stepSize,
|
|
1561
|
+
chromMat[j, :],
|
|
1562
|
+
minR_,
|
|
1563
|
+
maxR_,
|
|
1564
|
+
observationArgs.useALV,
|
|
1565
|
+
observationArgs.useConstantNoiseLevel,
|
|
1566
|
+
observationArgs.noGlobal,
|
|
1567
|
+
observationArgs.localWeight,
|
|
1568
|
+
observationArgs.globalWeight,
|
|
1569
|
+
observationArgs.approximationWindowLengthBP,
|
|
1570
|
+
observationArgs.lowPassWindowLengthBP,
|
|
1571
|
+
observationArgs.returnCenter,
|
|
1572
|
+
sparseMap=sparseMap,
|
|
1573
|
+
lowPassFilterType=observationArgs.lowPassFilterType,
|
|
1574
|
+
shrinkOffset=observationArgs.shrinkOffset,
|
|
1575
|
+
)
|
|
1576
|
+
|
|
1577
|
+
if observationArgs.minR < 0.0 or observationArgs.maxR < 0.0:
|
|
1578
|
+
kappa = np.float32(observationArgs.kappaALV)
|
|
1579
|
+
minR_ = np.float32(
|
|
1580
|
+
np.quantile(muncMat[muncMat > muncEps], 0.10)
|
|
1581
|
+
)
|
|
1582
|
+
|
|
1583
|
+
colMax = np.maximum(muncMat.max(axis=0), minR_).astype(
|
|
1584
|
+
np.float32
|
|
1585
|
+
)
|
|
1586
|
+
colMin = np.maximum(
|
|
1587
|
+
muncMat.min(axis=0), (colMax / kappa)
|
|
1588
|
+
).astype(np.float32)
|
|
1589
|
+
|
|
1590
|
+
np.clip(muncMat, colMin, colMax, out=muncMat)
|
|
1591
|
+
muncMat += muncEps
|
|
1592
|
+
muncMat = muncMat.astype(np.float32, copy=False)
|
|
1593
|
+
minQ_ = processArgs.minQ
|
|
1594
|
+
maxQ_ = processArgs.maxQ
|
|
1595
|
+
|
|
1596
|
+
if processArgs.minQ < 0.0 or processArgs.maxQ < 0.0:
|
|
1597
|
+
if minR_ is None:
|
|
1598
|
+
minR_ = np.float32(
|
|
1599
|
+
np.quantile(muncMat[muncMat > muncEps], 0.10)
|
|
1600
|
+
)
|
|
1601
|
+
|
|
1602
|
+
autoMinQ = np.float32(
|
|
1603
|
+
(minR_ / numSamples) + offDiagQ_,
|
|
1604
|
+
)
|
|
1605
|
+
|
|
1606
|
+
if processArgs.minQ < 0.0:
|
|
1607
|
+
minQ_ = autoMinQ
|
|
1608
|
+
else:
|
|
1609
|
+
minQ_ = np.float32(processArgs.minQ)
|
|
1610
|
+
|
|
1611
|
+
if processArgs.maxQ < 0.0:
|
|
1612
|
+
maxQ_ = minQ_
|
|
1613
|
+
else:
|
|
1614
|
+
maxQ_ = np.float32(max(processArgs.maxQ, minQ_))
|
|
1615
|
+
else:
|
|
1616
|
+
maxQ_ = np.float32(max(maxQ_, minQ_))
|
|
1617
|
+
|
|
1618
|
+
logger.info(f">>>Running consenrich: {chromosome}<<<")
|
|
1619
|
+
x, P, y = core.runConsenrich(
|
|
1620
|
+
chromMat,
|
|
1621
|
+
muncMat,
|
|
1622
|
+
deltaF_,
|
|
1623
|
+
minQ_,
|
|
1624
|
+
maxQ_,
|
|
1625
|
+
offDiagQ_,
|
|
1626
|
+
processArgs.dStatAlpha,
|
|
1627
|
+
processArgs.dStatd,
|
|
1628
|
+
processArgs.dStatPC,
|
|
1629
|
+
processArgs.dStatUseMean,
|
|
1630
|
+
stateArgs.stateInit,
|
|
1631
|
+
stateArgs.stateCovarInit,
|
|
1632
|
+
stateArgs.boundState,
|
|
1633
|
+
stateArgs.stateLowerBound,
|
|
1634
|
+
stateArgs.stateUpperBound,
|
|
1635
|
+
samArgs.chunkSize,
|
|
1636
|
+
progressIter=25_000,
|
|
1637
|
+
)
|
|
1638
|
+
logger.info("Done.")
|
|
1639
|
+
|
|
1640
|
+
x_ = core.getPrimaryState(x)
|
|
1641
|
+
y_ = core.getPrecisionWeightedResidual(
|
|
1642
|
+
y,
|
|
1643
|
+
muncMat,
|
|
1644
|
+
stateCovarSmoothed=P
|
|
1645
|
+
if processArgs.scaleResidualsByP11 is not None
|
|
1646
|
+
and processArgs.scaleResidualsByP11
|
|
1647
|
+
else None,
|
|
1648
|
+
)
|
|
1649
|
+
|
|
1650
|
+
if plotArgs.plotStateEstimatesHistogram:
|
|
1651
|
+
core.plotStateEstimatesHistogram(
|
|
1652
|
+
chromosome,
|
|
1653
|
+
plotArgs.plotPrefix,
|
|
1654
|
+
x_,
|
|
1655
|
+
plotDirectory=plotArgs.plotDirectory,
|
|
1656
|
+
)
|
|
1657
|
+
|
|
1658
|
+
if plotArgs.plotResidualsHistogram:
|
|
1659
|
+
core.plotResidualsHistogram(
|
|
1660
|
+
chromosome,
|
|
1661
|
+
plotArgs.plotPrefix,
|
|
1662
|
+
y,
|
|
1663
|
+
plotDirectory=plotArgs.plotDirectory,
|
|
1664
|
+
)
|
|
1665
|
+
|
|
1666
|
+
if plotArgs.plotStateStdHistogram:
|
|
1667
|
+
core.plotStateStdHistogram(
|
|
1668
|
+
chromosome,
|
|
1669
|
+
plotArgs.plotPrefix,
|
|
1670
|
+
np.sqrt(P[:, 0, 0]),
|
|
1671
|
+
plotDirectory=plotArgs.plotDirectory,
|
|
1672
|
+
)
|
|
1673
|
+
|
|
1674
|
+
df = pd.DataFrame(
|
|
1675
|
+
{
|
|
1676
|
+
"Chromosome": chromosome,
|
|
1677
|
+
"Start": intervals,
|
|
1678
|
+
"End": intervals + stepSize,
|
|
1679
|
+
"State": x_,
|
|
1680
|
+
}
|
|
1681
|
+
)
|
|
1682
|
+
|
|
1683
|
+
if outputArgs.writeResiduals:
|
|
1684
|
+
df["Res"] = y_.astype(np.float32) # FFR: cast necessary?
|
|
1685
|
+
if outputArgs.writeMuncTrace:
|
|
1686
|
+
munc_std = np.sqrt(
|
|
1687
|
+
np.mean(muncMat.astype(np.float64), axis=0)
|
|
1688
|
+
).astype(np.float32)
|
|
1689
|
+
df["Munc"] = munc_std
|
|
1690
|
+
if outputArgs.writeStateStd:
|
|
1691
|
+
df["StateStd"] = np.sqrt(P[:, 0, 0]).astype(np.float32)
|
|
1692
|
+
cols_ = ["Chromosome", "Start", "End", "State"]
|
|
1693
|
+
if outputArgs.writeResiduals:
|
|
1694
|
+
cols_.append("Res")
|
|
1695
|
+
if outputArgs.writeMuncTrace:
|
|
1696
|
+
cols_.append("Munc")
|
|
1697
|
+
if outputArgs.writeStateStd:
|
|
1698
|
+
cols_.append("StateStd")
|
|
1699
|
+
df = df[cols_]
|
|
1700
|
+
suffixes = ["state"]
|
|
1701
|
+
if outputArgs.writeResiduals:
|
|
1702
|
+
suffixes.append("residuals")
|
|
1703
|
+
if outputArgs.writeMuncTrace:
|
|
1704
|
+
suffixes.append("muncTraces")
|
|
1705
|
+
if outputArgs.writeStateStd:
|
|
1706
|
+
suffixes.append("stdDevs")
|
|
1707
|
+
|
|
1708
|
+
if (c_ == 0 and len(chromosomes) > 1) or (
|
|
1709
|
+
len(chromosomes) == 1
|
|
1710
|
+
):
|
|
1711
|
+
for file_ in os.listdir("."):
|
|
1712
|
+
if file_.startswith(
|
|
1713
|
+
f"consenrichOutput_{experimentName}"
|
|
1714
|
+
) and (
|
|
1715
|
+
file_.endswith(".bedGraph")
|
|
1716
|
+
or file_.endswith(".narrowPeak")
|
|
1717
|
+
):
|
|
1718
|
+
logger.warning(f"Overwriting: {file_}")
|
|
1719
|
+
os.remove(file_)
|
|
1720
|
+
|
|
1721
|
+
for col, suffix in zip(cols_[3:], suffixes):
|
|
1722
|
+
logger.info(
|
|
1723
|
+
f"{chromosome}: writing/appending to: consenrichOutput_{experimentName}_{suffix}.bedGraph"
|
|
1724
|
+
)
|
|
1725
|
+
df[["Chromosome", "Start", "End", col]].to_csv(
|
|
1726
|
+
f"consenrichOutput_{experimentName}_{suffix}.bedGraph",
|
|
1727
|
+
sep="\t",
|
|
1728
|
+
header=False,
|
|
1729
|
+
index=False,
|
|
1730
|
+
mode="a",
|
|
1731
|
+
float_format="%.3f",
|
|
1732
|
+
lineterminator="\n",
|
|
1733
|
+
)
|
|
1734
|
+
|
|
1735
|
+
logger.info("Finished: output in human-readable format")
|
|
1736
|
+
|
|
1737
|
+
if outputArgs.convertToBigWig:
|
|
1738
|
+
convertBedGraphToBigWig(
|
|
1739
|
+
experimentName,
|
|
1740
|
+
genomeArgs.chromSizesFile,
|
|
1741
|
+
suffixes=suffixes,
|
|
1742
|
+
)
|
|
1743
|
+
|
|
1744
|
+
if matchingEnabled:
|
|
1745
|
+
try:
|
|
1746
|
+
weightsBedGraph: str | None = None
|
|
1747
|
+
logger.info("Running matching algorithm...")
|
|
1748
|
+
if matchingArgs.penalizeBy is not None:
|
|
1749
|
+
if matchingArgs.penalizeBy.lower() in [
|
|
1750
|
+
"stateuncertainty",
|
|
1751
|
+
"statestddev",
|
|
1752
|
+
"statestd",
|
|
1753
|
+
"p11",
|
|
1754
|
+
]:
|
|
1755
|
+
weightsBedGraph = f"consenrichOutput_{experimentName}_stdDevs.bedGraph"
|
|
1756
|
+
elif matchingArgs.penalizeBy.lower() in [
|
|
1757
|
+
"munc",
|
|
1758
|
+
"munctrace",
|
|
1759
|
+
"avgmunctrace",
|
|
1760
|
+
]:
|
|
1761
|
+
weightsBedGraph = f"consenrichOutput_{experimentName}_muncTraces.bedGraph"
|
|
1762
|
+
elif matchingArgs.penalizeBy.lower() == "none":
|
|
1763
|
+
weightsBedGraph = None
|
|
1764
|
+
else:
|
|
1765
|
+
weightsBedGraph = None
|
|
1766
|
+
|
|
1767
|
+
outName = matching.runMatchingAlgorithm(
|
|
1768
|
+
f"consenrichOutput_{experimentName}_state.bedGraph",
|
|
1769
|
+
matchingArgs.templateNames,
|
|
1770
|
+
matchingArgs.cascadeLevels,
|
|
1771
|
+
matchingArgs.iters,
|
|
1772
|
+
alpha=matchingArgs.alpha,
|
|
1773
|
+
minMatchLengthBP=minMatchLengthBP_,
|
|
1774
|
+
maxNumMatches=matchingArgs.maxNumMatches,
|
|
1775
|
+
minSignalAtMaxima=matchingArgs.minSignalAtMaxima,
|
|
1776
|
+
useScalingFunction=matchingArgs.useScalingFunction,
|
|
1777
|
+
mergeGapBP=matchingArgs.mergeGapBP,
|
|
1778
|
+
excludeRegionsBedFile=matchingArgs.excludeRegionsBedFile,
|
|
1779
|
+
randSeed=matchingArgs.randSeed,
|
|
1780
|
+
weightsBedGraph=weightsBedGraph,
|
|
1781
|
+
eps=matchingArgs.eps,
|
|
1782
|
+
isLogScale=countingArgs.applyLog
|
|
1783
|
+
or countingArgs.applyAsinh
|
|
1784
|
+
or countingArgs.applySqrt,
|
|
1785
|
+
autoLengthQuantile=matchingArgs.autoLengthQuantile,
|
|
1786
|
+
methodFDR=matchingArgs.methodFDR.lower()
|
|
1787
|
+
if matchingArgs.methodFDR is not None
|
|
1788
|
+
else None,
|
|
1789
|
+
merge=matchingArgs.merge,
|
|
1790
|
+
)
|
|
1791
|
+
|
|
1792
|
+
logger.info(f"Finished matching. Written to {outName}")
|
|
1793
|
+
except Exception as ex_:
|
|
1794
|
+
logger.warning(
|
|
1795
|
+
f"Matching algorithm raised an exception:\n\n\t{ex_}\n"
|
|
1796
|
+
f"Skipping matching step...try running post-hoc via `consenrich --match-bedGraph <bedGraphFile>`\n"
|
|
1797
|
+
f"\tSee ``consenrich -h`` for more details.\n"
|
|
1798
|
+
)
|
|
1799
|
+
|
|
1800
|
+
|
|
1801
|
+
if __name__ == "__main__":
|
|
1802
|
+
main()
|