consenrich 0.7.0b1__cp313-cp313-macosx_11_0_arm64.whl → 0.7.1b1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of consenrich might be problematic. Click here for more details.

consenrich/consenrich.py CHANGED
@@ -76,9 +76,14 @@ def getReadLengths(
76
76
  :return: List of read lengths for each BAM file.
77
77
  """
78
78
  if not inputArgs.bamFiles:
79
- raise ValueError("No BAM files provided in the input arguments.")
79
+ raise ValueError(
80
+ "No BAM files provided in the input arguments."
81
+ )
80
82
 
81
- if not isinstance(inputArgs.bamFiles, list) or len(inputArgs.bamFiles) == 0:
83
+ if (
84
+ not isinstance(inputArgs.bamFiles, list)
85
+ or len(inputArgs.bamFiles) == 0
86
+ ):
82
87
  raise ValueError("bam files list is empty")
83
88
 
84
89
  return [
@@ -148,7 +153,9 @@ def getInputArgs(config_path: str) -> core.inputParams:
148
153
  bamFiles = _expandWildCards(bamFilesRaw)
149
154
  bamFilesControl = _expandWildCards(bamFilesControlRaw)
150
155
  if len(bamFiles) == 0:
151
- raise ValueError("No BAM files provided in the configuration.")
156
+ raise ValueError(
157
+ "No BAM files provided in the configuration."
158
+ )
152
159
  if (
153
160
  len(bamFilesControl) > 0
154
161
  and len(bamFilesControl) != len(bamFiles)
@@ -164,7 +171,11 @@ def getInputArgs(config_path: str) -> core.inputParams:
164
171
  )
165
172
  bamFilesControl = bamFilesControl * len(bamFiles)
166
173
 
167
- if not bamFiles or not isinstance(bamFiles, list) or len(bamFiles) == 0:
174
+ if (
175
+ not bamFiles
176
+ or not isinstance(bamFiles, list)
177
+ or len(bamFiles) == 0
178
+ ):
168
179
  raise ValueError("No BAM files found")
169
180
 
170
181
  for i, bamFile in enumerate(bamFiles):
@@ -176,19 +187,21 @@ def getInputArgs(config_path: str) -> core.inputParams:
176
187
 
177
188
  # if we've made it here, we can check pairedEnd
178
189
  pairedEndList = misc_util.bamsArePairedEnd(bamFiles)
179
- _isPairedEnd: Optional[bool] = config.get("inputParams.pairedEnd", None)
190
+ _isPairedEnd: Optional[bool] = config.get(
191
+ "inputParams.pairedEnd", None
192
+ )
180
193
  if _isPairedEnd is None:
181
194
  # only set auto if not provided in config
182
195
  _isPairedEnd = all(pairedEndList)
183
196
  if _isPairedEnd:
184
- logger.info(
185
- "Paired-end BAM files detected"
186
- )
197
+ logger.info("Paired-end BAM files detected")
187
198
  else:
188
- logger.info(
189
- "One or more single-end BAM files detected"
190
- )
191
- return core.inputParams(bamFiles=bamFiles, bamFilesControl=bamFilesControl, pairedEnd=_isPairedEnd)
199
+ logger.info("One or more single-end BAM files detected")
200
+ return core.inputParams(
201
+ bamFiles=bamFiles,
202
+ bamFilesControl=bamFilesControl,
203
+ pairedEnd=_isPairedEnd,
204
+ )
192
205
 
193
206
 
194
207
  def getGenomeArgs(config_path: str) -> core.genomeParams:
@@ -200,12 +213,22 @@ def getGenomeArgs(config_path: str) -> core.genomeParams:
200
213
  blacklistFile: Optional[str] = None
201
214
  sparseBedFile: Optional[str] = None
202
215
  chromosomes: Optional[List[str]] = None
203
- excludeChroms: List[str] = config.get("genomeParams.excludeChroms", [])
204
- excludeForNorm: List[str] = config.get("genomeParams.excludeForNorm", [])
216
+ excludeChroms: List[str] = config.get(
217
+ "genomeParams.excludeChroms", []
218
+ )
219
+ excludeForNorm: List[str] = config.get(
220
+ "genomeParams.excludeForNorm", []
221
+ )
205
222
  if genome:
206
- chromSizesFile = constants.getGenomeResourceFile(genome, "sizes")
207
- blacklistFile = constants.getGenomeResourceFile(genome, "blacklist")
208
- sparseBedFile = constants.getGenomeResourceFile(genome, "sparse")
223
+ chromSizesFile = constants.getGenomeResourceFile(
224
+ genome, "sizes"
225
+ )
226
+ blacklistFile = constants.getGenomeResourceFile(
227
+ genome, "blacklist"
228
+ )
229
+ sparseBedFile = constants.getGenomeResourceFile(
230
+ genome, "sparse"
231
+ )
209
232
  if config.get("genomeParams.chromSizesFile", None):
210
233
  chromSizesFile = config["genomeParams.chromSizesFile"]
211
234
  if config.get("genomeParams.blacklistFile", None):
@@ -232,10 +255,14 @@ def getGenomeArgs(config_path: str) -> core.genomeParams:
232
255
  raise ValueError(
233
256
  "No chromosomes provided in the configuration and no chromosome sizes file specified."
234
257
  )
235
- chromosomes = [chrom.strip() for chrom in chromosomes if chrom.strip()]
258
+ chromosomes = [
259
+ chrom.strip() for chrom in chromosomes if chrom.strip()
260
+ ]
236
261
  if excludeChroms:
237
262
  chromosomes = [
238
- chrom for chrom in chromosomes if chrom not in excludeChroms
263
+ chrom
264
+ for chrom in chromosomes
265
+ if chrom not in excludeChroms
239
266
  ]
240
267
  if not chromosomes:
241
268
  raise ValueError(
@@ -259,7 +286,9 @@ def getCountingArgs(config_path: str) -> core.countingParams:
259
286
  scaleDown = config.get("countingParams.scaleDown", True)
260
287
  scaleFactors = config.get("countingParams.scaleFactors", None)
261
288
  numReads = config.get("countingParams.numReads", 100)
262
- scaleFactorsControl = config.get("countingParams.scaleFactorsControl", None)
289
+ scaleFactorsControl = config.get(
290
+ "countingParams.scaleFactorsControl", None
291
+ )
263
292
  applyAsinh = config.get("countingParams.applyAsinh", False)
264
293
  applyLog = config.get("countingParams.applyLog", False)
265
294
  if applyAsinh and applyLog:
@@ -271,19 +300,25 @@ def getCountingArgs(config_path: str) -> core.countingParams:
271
300
  rescaleToTreatmentCoverage = config.get(
272
301
  "countingParams.rescaleToTreatmentCoverage", True
273
302
  )
274
- if scaleFactors is not None and not isinstance(scaleFactors, list):
303
+ if scaleFactors is not None and not isinstance(
304
+ scaleFactors, list
305
+ ):
275
306
  raise ValueError("`scaleFactors` should be a list of floats.")
276
307
  if scaleFactorsControl is not None and not isinstance(
277
308
  scaleFactorsControl, list
278
309
  ):
279
- raise ValueError("`scaleFactorsControl` should be a list of floats.")
310
+ raise ValueError(
311
+ "`scaleFactorsControl` should be a list of floats."
312
+ )
280
313
  if (
281
314
  scaleFactors is not None
282
315
  and scaleFactorsControl is not None
283
316
  and len(scaleFactors) != len(scaleFactorsControl)
284
317
  ):
285
318
  if len(scaleFactorsControl) == 1:
286
- scaleFactorsControl = scaleFactorsControl * len(scaleFactors)
319
+ scaleFactorsControl = scaleFactorsControl * len(
320
+ scaleFactors
321
+ )
287
322
  else:
288
323
  raise ValueError(
289
324
  "control and treatment scale factors: must be equal length or 1 control"
@@ -308,12 +343,16 @@ def readConfig(config_path: str) -> Dict[str, Any]:
308
343
  genomeParams = getGenomeArgs(config_path)
309
344
  countingParams = getCountingArgs(config_path)
310
345
  minR_default = _getMinR(config, len(inputParams.bamFiles))
311
- minQ_default = (minR_default / (len(inputParams.bamFiles))) + 0.10 # protect condition number
346
+ minQ_default = (
347
+ minR_default / (len(inputParams.bamFiles))
348
+ ) + 0.10 # protect condition number
312
349
  matchingExcludeRegionsBedFile_default: Optional[str] = (
313
350
  genomeParams.blacklistFile
314
351
  )
315
352
  return {
316
- "experimentName": config.get("experimentName", "consenrichExperiment"),
353
+ "experimentName": config.get(
354
+ "experimentName", "consenrichExperiment"
355
+ ),
317
356
  "genomeArgs": genomeParams,
318
357
  "inputArgs": inputParams,
319
358
  "countingArgs": countingParams,
@@ -338,8 +377,12 @@ def readConfig(config_path: str) -> Dict[str, Any]:
338
377
  ),
339
378
  noGlobal=config.get("observationParams.noGlobal", False),
340
379
  numNearest=config.get("observationParams.numNearest", 25),
341
- localWeight=config.get("observationParams.localWeight", 0.333),
342
- globalWeight=config.get("observationParams.globalWeight", 0.667),
380
+ localWeight=config.get(
381
+ "observationParams.localWeight", 0.333
382
+ ),
383
+ globalWeight=config.get(
384
+ "observationParams.globalWeight", 0.667
385
+ ),
343
386
  approximationWindowLengthBP=config.get(
344
387
  "observationParams.approximationWindowLengthBP", 10000
345
388
  ),
@@ -349,26 +392,50 @@ def readConfig(config_path: str) -> Dict[str, Any]:
349
392
  lowPassFilterType=config.get(
350
393
  "observationParams.lowPassFilterType", "median"
351
394
  ),
352
- returnCenter=config.get("observationParams.returnCenter", True),
395
+ returnCenter=config.get(
396
+ "observationParams.returnCenter", True
397
+ ),
353
398
  ),
354
399
  "stateArgs": core.stateParams(
355
400
  stateInit=config.get("stateParams.stateInit", 0.0),
356
- stateCovarInit=config.get("stateParams.stateCovarInit", 100.0),
401
+ stateCovarInit=config.get(
402
+ "stateParams.stateCovarInit", 100.0
403
+ ),
357
404
  boundState=config.get("stateParams.boundState", True),
358
- stateLowerBound=config.get("stateParams.stateLowerBound", 0.0),
359
- stateUpperBound=config.get("stateParams.stateUpperBound", 10000.0),
405
+ stateLowerBound=config.get(
406
+ "stateParams.stateLowerBound", 0.0
407
+ ),
408
+ stateUpperBound=config.get(
409
+ "stateParams.stateUpperBound", 10000.0
410
+ ),
360
411
  ),
361
412
  "samArgs": core.samParams(
362
413
  samThreads=config.get("samParams.samThreads", 1),
363
- samFlagExclude=config.get("samParams.samFlagExclude", 3844),
414
+ samFlagExclude=config.get(
415
+ "samParams.samFlagExclude", 3844
416
+ ),
364
417
  oneReadPerBin=config.get("samParams.oneReadPerBin", 0),
365
418
  chunkSize=config.get("samParams.chunkSize", 1000000),
366
419
  offsetStr=config.get("samParams.offsetStr", "0,0"),
367
420
  extendBP=config.get("samParams.extendBP", []),
368
421
  maxInsertSize=config.get("samParams.maxInsertSize", 1000),
369
- pairedEndMode=config.get("samParams.pairedEndMode", 1 if inputParams.pairedEnd is not None and int(inputParams.pairedEnd) > 0 else 0),
370
- inferFragmentLength=config.get("samParams.inferFragmentLength", 1 if inputParams.pairedEnd is not None and int(inputParams.pairedEnd) == 0 else 0),
371
- countEndsOnly=config.get("samParams.countEndsOnly", False),
422
+ pairedEndMode=config.get(
423
+ "samParams.pairedEndMode",
424
+ 1
425
+ if inputParams.pairedEnd is not None
426
+ and int(inputParams.pairedEnd) > 0
427
+ else 0,
428
+ ),
429
+ inferFragmentLength=config.get(
430
+ "samParams.inferFragmentLength",
431
+ 1
432
+ if inputParams.pairedEnd is not None
433
+ and int(inputParams.pairedEnd) == 0
434
+ else 0,
435
+ ),
436
+ countEndsOnly=config.get(
437
+ "samParams.countEndsOnly", False
438
+ ),
372
439
  ),
373
440
  "detrendArgs": core.detrendParams(
374
441
  detrendWindowLengthBP=config.get(
@@ -377,7 +444,9 @@ def readConfig(config_path: str) -> Dict[str, Any]:
377
444
  detrendTrackPercentile=config.get(
378
445
  "detrendParams.detrendTrackPercentile", 75.0
379
446
  ),
380
- usePolyFilter=config.get("detrendParams.usePolyFilter", False),
447
+ usePolyFilter=config.get(
448
+ "detrendParams.usePolyFilter", False
449
+ ),
381
450
  detrendSavitzkyGolayDegree=config.get(
382
451
  "detrendParams.detrendSavitzkyGolayDegree", 2
383
452
  ),
@@ -386,12 +455,20 @@ def readConfig(config_path: str) -> Dict[str, Any]:
386
455
  ),
387
456
  ),
388
457
  "matchingArgs": core.matchingParams(
389
- templateNames=config.get("matchingParams.templateNames", []),
390
- cascadeLevels=config.get("matchingParams.cascadeLevels", [2]),
458
+ templateNames=config.get(
459
+ "matchingParams.templateNames", []
460
+ ),
461
+ cascadeLevels=config.get(
462
+ "matchingParams.cascadeLevels", [2]
463
+ ),
391
464
  iters=config.get("matchingParams.iters", 25_000),
392
465
  alpha=config.get("matchingParams.alpha", 0.05),
393
- minMatchLengthBP=config.get("matchingParams.minMatchLengthBP", 250),
394
- maxNumMatches=config.get("matchingParams.maxNumMatches", 100_000),
466
+ minMatchLengthBP=config.get(
467
+ "matchingParams.minMatchLengthBP", 250
468
+ ),
469
+ maxNumMatches=config.get(
470
+ "matchingParams.maxNumMatches", 100_000
471
+ ),
395
472
  minSignalAtMaxima=config.get(
396
473
  "matchingParams.minSignalAtMaxima", "q:0.75"
397
474
  ),
@@ -418,7 +495,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
418
495
  "OR install via conda (conda install -c bioconda ucsc-bedgraphtobigwig)."
419
496
  )
420
497
 
421
- logger.info("Attempting to generate bigWig files from bedGraph format...")
498
+ logger.info(
499
+ "Attempting to generate bigWig files from bedGraph format..."
500
+ )
422
501
  try:
423
502
  path_ = shutil.which("bedGraphToBigWig")
424
503
  except Exception as e:
@@ -429,7 +508,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
429
508
  return
430
509
  logger.info(f"Using bedGraphToBigWig from {path_}")
431
510
  for suffix in suffixes:
432
- bedgraph = f"consenrichOutput_{experimentName}_{suffix}.bedGraph"
511
+ bedgraph = (
512
+ f"consenrichOutput_{experimentName}_{suffix}.bedGraph"
513
+ )
433
514
  if not os.path.exists(bedgraph):
434
515
  logger.warning(
435
516
  f"bedGraph file {bedgraph} does not exist. Skipping bigWig conversion."
@@ -452,7 +533,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
452
533
  )
453
534
  continue
454
535
  if os.path.exists(bigwig) and os.path.getsize(bigwig) > 100:
455
- logger.info(f"Finished: converted {bedgraph} to {bigwig}.")
536
+ logger.info(
537
+ f"Finished: converted {bedgraph} to {bigwig}."
538
+ )
456
539
 
457
540
 
458
541
  def main():
@@ -476,10 +559,16 @@ def main():
476
559
  "--match-template",
477
560
  type=str,
478
561
  default="haar",
479
- choices=[x for x in pywt.wavelist(kind="discrete") if "bio" not in x],
562
+ choices=[
563
+ x
564
+ for x in pywt.wavelist(kind="discrete")
565
+ if "bio" not in x
566
+ ],
480
567
  dest="matchTemplate",
481
568
  )
482
- parser.add_argument("--match-level", type=int, default=2, dest="matchLevel")
569
+ parser.add_argument(
570
+ "--match-level", type=int, default=2, dest="matchLevel"
571
+ )
483
572
  parser.add_argument(
484
573
  "--match-alpha", type=float, default=0.05, dest="matchAlpha"
485
574
  )
@@ -508,16 +597,24 @@ def main():
508
597
  "--match-no-merge", action="store_true", dest="matchNoMerge"
509
598
  )
510
599
  parser.add_argument(
511
- "--match-merge-gap", type=int, default=None, dest="matchMergeGapBP"
600
+ "--match-merge-gap",
601
+ type=int,
602
+ default=None,
603
+ dest="matchMergeGapBP",
512
604
  )
513
605
  parser.add_argument(
514
- "--match-use-wavelet", action="store_true", dest="matchUseWavelet"
606
+ "--match-use-wavelet",
607
+ action="store_true",
608
+ dest="matchUseWavelet",
515
609
  )
516
610
  parser.add_argument(
517
611
  "--match-seed", type=int, default=42, dest="matchRandSeed"
518
612
  )
519
613
  parser.add_argument(
520
- "--match-exclude-bed", type=str, default=None, dest="matchExcludeBed"
614
+ "--match-exclude-bed",
615
+ type=str,
616
+ default=None,
617
+ dest="matchExcludeBed",
521
618
  )
522
619
  parser.add_argument(
523
620
  "--verbose", action="store_true", help="If set, logs config"
@@ -601,7 +698,8 @@ def main():
601
698
  config_truncated = {
602
699
  k: v
603
700
  for k, v in config.items()
604
- if k not in ["inputArgs", "genomeArgs", "countingArgs"]
701
+ if k
702
+ not in ["inputArgs", "genomeArgs", "countingArgs"]
605
703
  }
606
704
  config_truncated["experimentName"] = experimentName
607
705
  config_truncated["inputArgs"] = inputArgs
@@ -619,7 +717,9 @@ def main():
619
717
  controlsPresent = checkControlsPresent(inputArgs)
620
718
  if args.verbose:
621
719
  logger.info(f"controlsPresent: {controlsPresent}")
622
- readLengthsBamFiles = getReadLengths(inputArgs, countingArgs, samArgs)
720
+ readLengthsBamFiles = getReadLengths(
721
+ inputArgs, countingArgs, samArgs
722
+ )
623
723
  effectiveGenomeSizes = getEffectiveGenomeSizes(
624
724
  genomeArgs, readLengthsBamFiles
625
725
  )
@@ -641,11 +741,16 @@ def main():
641
741
  for bamFile in bamFilesControl
642
742
  ]
643
743
  effectiveGenomeSizesControl = [
644
- constants.getEffectiveGenomeSize(genomeArgs.genomeName, readLength)
744
+ constants.getEffectiveGenomeSize(
745
+ genomeArgs.genomeName, readLength
746
+ )
645
747
  for readLength in readLengthsControlBamFiles
646
748
  ]
647
749
 
648
- if scaleFactors is not None and scaleFactorsControl is not None:
750
+ if (
751
+ scaleFactors is not None
752
+ and scaleFactorsControl is not None
753
+ ):
649
754
  treatScaleFactors = scaleFactors
650
755
  controlScaleFactors = scaleFactorsControl
651
756
  # still make sure this is accessible
@@ -662,7 +767,9 @@ def main():
662
767
  samArgs.samThreads,
663
768
  )
664
769
  for bamFile, effectiveGenomeSize, readLength in zip(
665
- bamFiles, effectiveGenomeSizes, readLengthsBamFiles
770
+ bamFiles,
771
+ effectiveGenomeSizes,
772
+ readLengthsBamFiles,
666
773
  )
667
774
  ]
668
775
  except Exception:
@@ -716,7 +823,8 @@ def main():
716
823
  )
717
824
  ]
718
825
  chromSizesDict = misc_util.getChromSizesDict(
719
- genomeArgs.chromSizesFile, excludeChroms=genomeArgs.excludeChroms
826
+ genomeArgs.chromSizesFile,
827
+ excludeChroms=genomeArgs.excludeChroms,
720
828
  )
721
829
  chromosomes = genomeArgs.chromosomes
722
830
 
@@ -731,11 +839,15 @@ def main():
731
839
  chromosomeStart = max(
732
840
  0, (chromosomeStart - (chromosomeStart % stepSize))
733
841
  )
734
- chromosomeEnd = max(0, (chromosomeEnd - (chromosomeEnd % stepSize)))
842
+ chromosomeEnd = max(
843
+ 0, (chromosomeEnd - (chromosomeEnd % stepSize))
844
+ )
735
845
  numIntervals = (
736
846
  ((chromosomeEnd - chromosomeStart) + stepSize) - 1
737
847
  ) // stepSize
738
- intervals = np.arange(chromosomeStart, chromosomeEnd, stepSize)
848
+ intervals = np.arange(
849
+ chromosomeStart, chromosomeEnd, stepSize
850
+ )
739
851
  chromMat: np.ndarray = np.empty(
740
852
  (numSamples, numIntervals), dtype=np.float32
741
853
  )
@@ -752,7 +864,10 @@ def main():
752
864
  chromosomeStart,
753
865
  chromosomeEnd,
754
866
  stepSize,
755
- [readLengthsBamFiles[j_], readLengthsControlBamFiles[j_]],
867
+ [
868
+ readLengthsBamFiles[j_],
869
+ readLengthsControlBamFiles[j_],
870
+ ],
756
871
  [treatScaleFactors[j_], controlScaleFactors[j_]],
757
872
  samArgs.oneReadPerBin,
758
873
  samArgs.samThreads,
@@ -764,10 +879,12 @@ def main():
764
879
  inferFragmentLength=samArgs.inferFragmentLength,
765
880
  applyAsinh=countingArgs.applyAsinh,
766
881
  applyLog=countingArgs.applyLog,
767
- countEndsOnly=samArgs.countEndsOnly
882
+ countEndsOnly=samArgs.countEndsOnly,
768
883
  )
769
884
  if countingArgs.rescaleToTreatmentCoverage:
770
- finalSF = max(1.0, initialTreatmentScaleFactors[j_])
885
+ finalSF = max(
886
+ 1.0, initialTreatmentScaleFactors[j_]
887
+ )
771
888
  chromMat[j_, :] = finalSF * (
772
889
  pairMatrix[0, :] - pairMatrix[1, :]
773
890
  )
@@ -791,18 +908,25 @@ def main():
791
908
  inferFragmentLength=samArgs.inferFragmentLength,
792
909
  applyAsinh=countingArgs.applyAsinh,
793
910
  applyLog=countingArgs.applyLog,
794
- countEndsOnly=samArgs.countEndsOnly
911
+ countEndsOnly=samArgs.countEndsOnly,
795
912
  )
796
913
  sparseMap = None
797
914
  if genomeArgs.sparseBedFile and not observationArgs.useALV:
798
- logger.info(f"Building sparse mapping for {chromosome}...")
915
+ logger.info(
916
+ f"Building sparse mapping for {chromosome}..."
917
+ )
799
918
  sparseMap = core.getSparseMap(
800
- chromosome, intervals, numNearest, genomeArgs.sparseBedFile
919
+ chromosome,
920
+ intervals,
921
+ numNearest,
922
+ genomeArgs.sparseBedFile,
801
923
  )
802
924
 
803
925
  muncMat = np.empty_like(chromMat, dtype=np.float32)
804
926
  for j in range(numSamples):
805
- logger.info(f"Muncing {j + 1}/{numSamples} for {chromosome}...")
927
+ logger.info(
928
+ f"Muncing {j + 1}/{numSamples} for {chromosome}..."
929
+ )
806
930
  muncMat[j, :] = core.getMuncTrack(
807
931
  chromosome,
808
932
  intervals,
@@ -873,8 +997,11 @@ def main():
873
997
  )
874
998
  if c_ == 0 and len(chromosomes) > 1:
875
999
  for file_ in os.listdir("."):
876
- if file_.startswith(f"consenrichOutput_{experimentName}") and (
877
- file_.endswith(".bedGraph") or file_.endswith(".narrowPeak")
1000
+ if file_.startswith(
1001
+ f"consenrichOutput_{experimentName}"
1002
+ ) and (
1003
+ file_.endswith(".bedGraph")
1004
+ or file_.endswith(".narrowPeak")
878
1005
  ):
879
1006
  logger.warning(f"Overwriting: {file_}")
880
1007
  os.remove(file_)
@@ -927,13 +1054,22 @@ def main():
927
1054
  convertBedGraphToBigWig(experimentName, genomeArgs.chromSizesFile)
928
1055
  if matchingEnabled and matchingArgs.merge:
929
1056
  try:
1057
+ mergeGapBP_ = matchingArgs.mergeGapBP
1058
+ if mergeGapBP_ is None:
1059
+ mergeGapBP_ = (
1060
+ int(matchingArgs.minMatchLengthBP/2) + 1
1061
+ if matchingArgs.minMatchLengthBP is not None
1062
+ else 75
1063
+ )
930
1064
  matching.mergeMatches(
931
1065
  f"consenrichOutput_{experimentName}_matches.narrowPeak",
932
- mergeGapBP=matchingArgs.mergeGapBP,
1066
+ mergeGapBP=mergeGapBP_,
933
1067
  )
934
1068
 
935
1069
  except Exception as e:
936
- logger.warning(f"Failed to merge matches...SKIPPING:\n{e}\n\n")
1070
+ logger.warning(
1071
+ f"Failed to merge matches...SKIPPING:\n{e}\n\n"
1072
+ )
937
1073
  logger.info("Done.")
938
1074
 
939
1075
 
consenrich/core.py CHANGED
@@ -326,16 +326,16 @@ class matchingParams(NamedTuple):
326
326
  an empirical null to test significance. See :func:`cconsenrich.csampleBlockStats`.
327
327
  :type iters: int
328
328
  :param alpha: Primary significance threshold on detected matches. Specifically, the
329
- :math:`1 - \alpha` quantile of an empirical null distribution. The empirical null
330
- distribution is built from cross-correlation values over randomly sampled blocks.
329
+ minimum corr. empirical p-value approximated from randomly sampled blocks in the
330
+ response sequence.
331
331
  :type alpha: float
332
332
  :param minMatchLengthBP: Within a window of `minMatchLengthBP` length (bp), relative maxima in
333
333
  the signal-template convolution must be greater in value than others to qualify as matches.
334
334
  :type minMatchLengthBP: int
335
335
  :param minSignalAtMaxima: Secondary significance threshold coupled with `alpha`. Requires the *signal value*
336
- at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale.
337
- If a `float` value is provided, the minimum signal value must be greater than this (absolute) value. *Set to a
338
- negative value to disable the threshold*.
336
+ at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale
337
+ to temper genome-wide dynamic range. If a `float` value is provided, the minimum signal value must be greater
338
+ than this (absolute) value. *Set to a negative value to disable the threshold*.
339
339
  If a `str` value is provided, looks for 'q:quantileValue', e.g., 'q:0.90'. The
340
340
  threshold is then set to the corresponding quantile of the non-zero signal estimates.
341
341
  :type minSignalAtMaxima: Optional[str | float]