consenrich 0.7.0b1__cp312-cp312-macosx_11_0_arm64.whl → 0.7.1b2__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of consenrich might be problematic. Click here for more details.

consenrich/consenrich.py CHANGED
@@ -76,9 +76,14 @@ def getReadLengths(
76
76
  :return: List of read lengths for each BAM file.
77
77
  """
78
78
  if not inputArgs.bamFiles:
79
- raise ValueError("No BAM files provided in the input arguments.")
79
+ raise ValueError(
80
+ "No BAM files provided in the input arguments."
81
+ )
80
82
 
81
- if not isinstance(inputArgs.bamFiles, list) or len(inputArgs.bamFiles) == 0:
83
+ if (
84
+ not isinstance(inputArgs.bamFiles, list)
85
+ or len(inputArgs.bamFiles) == 0
86
+ ):
82
87
  raise ValueError("bam files list is empty")
83
88
 
84
89
  return [
@@ -148,7 +153,9 @@ def getInputArgs(config_path: str) -> core.inputParams:
148
153
  bamFiles = _expandWildCards(bamFilesRaw)
149
154
  bamFilesControl = _expandWildCards(bamFilesControlRaw)
150
155
  if len(bamFiles) == 0:
151
- raise ValueError("No BAM files provided in the configuration.")
156
+ raise ValueError(
157
+ "No BAM files provided in the configuration."
158
+ )
152
159
  if (
153
160
  len(bamFilesControl) > 0
154
161
  and len(bamFilesControl) != len(bamFiles)
@@ -164,7 +171,11 @@ def getInputArgs(config_path: str) -> core.inputParams:
164
171
  )
165
172
  bamFilesControl = bamFilesControl * len(bamFiles)
166
173
 
167
- if not bamFiles or not isinstance(bamFiles, list) or len(bamFiles) == 0:
174
+ if (
175
+ not bamFiles
176
+ or not isinstance(bamFiles, list)
177
+ or len(bamFiles) == 0
178
+ ):
168
179
  raise ValueError("No BAM files found")
169
180
 
170
181
  for i, bamFile in enumerate(bamFiles):
@@ -176,19 +187,21 @@ def getInputArgs(config_path: str) -> core.inputParams:
176
187
 
177
188
  # if we've made it here, we can check pairedEnd
178
189
  pairedEndList = misc_util.bamsArePairedEnd(bamFiles)
179
- _isPairedEnd: Optional[bool] = config.get("inputParams.pairedEnd", None)
190
+ _isPairedEnd: Optional[bool] = config.get(
191
+ "inputParams.pairedEnd", None
192
+ )
180
193
  if _isPairedEnd is None:
181
194
  # only set auto if not provided in config
182
195
  _isPairedEnd = all(pairedEndList)
183
196
  if _isPairedEnd:
184
- logger.info(
185
- "Paired-end BAM files detected"
186
- )
197
+ logger.info("Paired-end BAM files detected")
187
198
  else:
188
- logger.info(
189
- "One or more single-end BAM files detected"
190
- )
191
- return core.inputParams(bamFiles=bamFiles, bamFilesControl=bamFilesControl, pairedEnd=_isPairedEnd)
199
+ logger.info("One or more single-end BAM files detected")
200
+ return core.inputParams(
201
+ bamFiles=bamFiles,
202
+ bamFilesControl=bamFilesControl,
203
+ pairedEnd=_isPairedEnd,
204
+ )
192
205
 
193
206
 
194
207
  def getGenomeArgs(config_path: str) -> core.genomeParams:
@@ -200,12 +213,22 @@ def getGenomeArgs(config_path: str) -> core.genomeParams:
200
213
  blacklistFile: Optional[str] = None
201
214
  sparseBedFile: Optional[str] = None
202
215
  chromosomes: Optional[List[str]] = None
203
- excludeChroms: List[str] = config.get("genomeParams.excludeChroms", [])
204
- excludeForNorm: List[str] = config.get("genomeParams.excludeForNorm", [])
216
+ excludeChroms: List[str] = config.get(
217
+ "genomeParams.excludeChroms", []
218
+ )
219
+ excludeForNorm: List[str] = config.get(
220
+ "genomeParams.excludeForNorm", []
221
+ )
205
222
  if genome:
206
- chromSizesFile = constants.getGenomeResourceFile(genome, "sizes")
207
- blacklistFile = constants.getGenomeResourceFile(genome, "blacklist")
208
- sparseBedFile = constants.getGenomeResourceFile(genome, "sparse")
223
+ chromSizesFile = constants.getGenomeResourceFile(
224
+ genome, "sizes"
225
+ )
226
+ blacklistFile = constants.getGenomeResourceFile(
227
+ genome, "blacklist"
228
+ )
229
+ sparseBedFile = constants.getGenomeResourceFile(
230
+ genome, "sparse"
231
+ )
209
232
  if config.get("genomeParams.chromSizesFile", None):
210
233
  chromSizesFile = config["genomeParams.chromSizesFile"]
211
234
  if config.get("genomeParams.blacklistFile", None):
@@ -232,10 +255,14 @@ def getGenomeArgs(config_path: str) -> core.genomeParams:
232
255
  raise ValueError(
233
256
  "No chromosomes provided in the configuration and no chromosome sizes file specified."
234
257
  )
235
- chromosomes = [chrom.strip() for chrom in chromosomes if chrom.strip()]
258
+ chromosomes = [
259
+ chrom.strip() for chrom in chromosomes if chrom.strip()
260
+ ]
236
261
  if excludeChroms:
237
262
  chromosomes = [
238
- chrom for chrom in chromosomes if chrom not in excludeChroms
263
+ chrom
264
+ for chrom in chromosomes
265
+ if chrom not in excludeChroms
239
266
  ]
240
267
  if not chromosomes:
241
268
  raise ValueError(
@@ -259,7 +286,9 @@ def getCountingArgs(config_path: str) -> core.countingParams:
259
286
  scaleDown = config.get("countingParams.scaleDown", True)
260
287
  scaleFactors = config.get("countingParams.scaleFactors", None)
261
288
  numReads = config.get("countingParams.numReads", 100)
262
- scaleFactorsControl = config.get("countingParams.scaleFactorsControl", None)
289
+ scaleFactorsControl = config.get(
290
+ "countingParams.scaleFactorsControl", None
291
+ )
263
292
  applyAsinh = config.get("countingParams.applyAsinh", False)
264
293
  applyLog = config.get("countingParams.applyLog", False)
265
294
  if applyAsinh and applyLog:
@@ -271,19 +300,25 @@ def getCountingArgs(config_path: str) -> core.countingParams:
271
300
  rescaleToTreatmentCoverage = config.get(
272
301
  "countingParams.rescaleToTreatmentCoverage", True
273
302
  )
274
- if scaleFactors is not None and not isinstance(scaleFactors, list):
303
+ if scaleFactors is not None and not isinstance(
304
+ scaleFactors, list
305
+ ):
275
306
  raise ValueError("`scaleFactors` should be a list of floats.")
276
307
  if scaleFactorsControl is not None and not isinstance(
277
308
  scaleFactorsControl, list
278
309
  ):
279
- raise ValueError("`scaleFactorsControl` should be a list of floats.")
310
+ raise ValueError(
311
+ "`scaleFactorsControl` should be a list of floats."
312
+ )
280
313
  if (
281
314
  scaleFactors is not None
282
315
  and scaleFactorsControl is not None
283
316
  and len(scaleFactors) != len(scaleFactorsControl)
284
317
  ):
285
318
  if len(scaleFactorsControl) == 1:
286
- scaleFactorsControl = scaleFactorsControl * len(scaleFactors)
319
+ scaleFactorsControl = scaleFactorsControl * len(
320
+ scaleFactors
321
+ )
287
322
  else:
288
323
  raise ValueError(
289
324
  "control and treatment scale factors: must be equal length or 1 control"
@@ -308,12 +343,46 @@ def readConfig(config_path: str) -> Dict[str, Any]:
308
343
  genomeParams = getGenomeArgs(config_path)
309
344
  countingParams = getCountingArgs(config_path)
310
345
  minR_default = _getMinR(config, len(inputParams.bamFiles))
311
- minQ_default = (minR_default / (len(inputParams.bamFiles))) + 0.10 # protect condition number
346
+ minQ_default = (
347
+ minR_default / (len(inputParams.bamFiles))
348
+ ) + 0.10 # protect condition number
349
+
312
350
  matchingExcludeRegionsBedFile_default: Optional[str] = (
313
351
  genomeParams.blacklistFile
314
352
  )
353
+
354
+ # apply less aggressive *default* detrending/background removal
355
+ # ...IF input controls are present. In either case, respect
356
+ # ...user-specified params
357
+ detrendWindowLengthBP_: int = -1
358
+ detrendSavitzkyGolayDegree_: int = -1
359
+
360
+ if (
361
+ inputParams.bamFilesControl is not None
362
+ and len(inputParams.bamFilesControl) > 0
363
+ ):
364
+ detrendWindowLengthBP_ = config.get(
365
+ "detrendParams.detrendWindowLengthBP",
366
+ 25_000,
367
+ )
368
+ detrendSavitzkyGolayDegree_ = config.get(
369
+ "detrendParams.detrendSavitzkyGolayDegree",
370
+ 1,
371
+ )
372
+ else:
373
+ detrendWindowLengthBP_ = config.get(
374
+ "detrendParams.detrendWindowLengthBP",
375
+ 10_000,
376
+ )
377
+ detrendSavitzkyGolayDegree_ = config.get(
378
+ "detrendParams.detrendSavitzkyGolayDegree",
379
+ 2,
380
+ )
381
+
315
382
  return {
316
- "experimentName": config.get("experimentName", "consenrichExperiment"),
383
+ "experimentName": config.get(
384
+ "experimentName", "consenrichExperiment"
385
+ ),
317
386
  "genomeArgs": genomeParams,
318
387
  "inputArgs": inputParams,
319
388
  "countingArgs": countingParams,
@@ -338,60 +407,112 @@ def readConfig(config_path: str) -> Dict[str, Any]:
338
407
  ),
339
408
  noGlobal=config.get("observationParams.noGlobal", False),
340
409
  numNearest=config.get("observationParams.numNearest", 25),
341
- localWeight=config.get("observationParams.localWeight", 0.333),
342
- globalWeight=config.get("observationParams.globalWeight", 0.667),
410
+ localWeight=config.get(
411
+ "observationParams.localWeight",
412
+ 0.333,
413
+ ),
414
+ globalWeight=config.get(
415
+ "observationParams.globalWeight",
416
+ 0.667,
417
+ ),
343
418
  approximationWindowLengthBP=config.get(
344
- "observationParams.approximationWindowLengthBP", 10000
419
+ "observationParams.approximationWindowLengthBP",
420
+ 10000,
345
421
  ),
346
422
  lowPassWindowLengthBP=config.get(
347
- "observationParams.lowPassWindowLengthBP", 20000
423
+ "observationParams.lowPassWindowLengthBP",
424
+ 20000,
348
425
  ),
349
426
  lowPassFilterType=config.get(
350
- "observationParams.lowPassFilterType", "median"
427
+ "observationParams.lowPassFilterType",
428
+ "median",
429
+ ),
430
+ returnCenter=config.get(
431
+ "observationParams.returnCenter",
432
+ True,
351
433
  ),
352
- returnCenter=config.get("observationParams.returnCenter", True),
353
434
  ),
354
435
  "stateArgs": core.stateParams(
355
436
  stateInit=config.get("stateParams.stateInit", 0.0),
356
- stateCovarInit=config.get("stateParams.stateCovarInit", 100.0),
437
+ stateCovarInit=config.get(
438
+ "stateParams.stateCovarInit",
439
+ 100.0,
440
+ ),
357
441
  boundState=config.get("stateParams.boundState", True),
358
- stateLowerBound=config.get("stateParams.stateLowerBound", 0.0),
359
- stateUpperBound=config.get("stateParams.stateUpperBound", 10000.0),
442
+ stateLowerBound=config.get(
443
+ "stateParams.stateLowerBound",
444
+ 0.0,
445
+ ),
446
+ stateUpperBound=config.get(
447
+ "stateParams.stateUpperBound",
448
+ 10000.0,
449
+ ),
360
450
  ),
361
451
  "samArgs": core.samParams(
362
452
  samThreads=config.get("samParams.samThreads", 1),
363
- samFlagExclude=config.get("samParams.samFlagExclude", 3844),
453
+ samFlagExclude=config.get(
454
+ "samParams.samFlagExclude", 3844
455
+ ),
364
456
  oneReadPerBin=config.get("samParams.oneReadPerBin", 0),
365
457
  chunkSize=config.get("samParams.chunkSize", 1000000),
366
458
  offsetStr=config.get("samParams.offsetStr", "0,0"),
367
459
  extendBP=config.get("samParams.extendBP", []),
368
460
  maxInsertSize=config.get("samParams.maxInsertSize", 1000),
369
- pairedEndMode=config.get("samParams.pairedEndMode", 1 if inputParams.pairedEnd is not None and int(inputParams.pairedEnd) > 0 else 0),
370
- inferFragmentLength=config.get("samParams.inferFragmentLength", 1 if inputParams.pairedEnd is not None and int(inputParams.pairedEnd) == 0 else 0),
371
- countEndsOnly=config.get("samParams.countEndsOnly", False),
461
+ pairedEndMode=config.get(
462
+ "samParams.pairedEndMode",
463
+ 1
464
+ if inputParams.pairedEnd is not None
465
+ and int(inputParams.pairedEnd) > 0
466
+ else 0,
467
+ ),
468
+ inferFragmentLength=config.get(
469
+ "samParams.inferFragmentLength",
470
+ 1
471
+ if inputParams.pairedEnd is not None
472
+ and int(inputParams.pairedEnd) == 0
473
+ else 0,
474
+ ),
475
+ countEndsOnly=config.get(
476
+ "samParams.countEndsOnly",
477
+ False,
478
+ ),
372
479
  ),
373
480
  "detrendArgs": core.detrendParams(
374
- detrendWindowLengthBP=config.get(
375
- "detrendParams.detrendWindowLengthBP", 10000
376
- ),
481
+ detrendWindowLengthBP=detrendWindowLengthBP_,
377
482
  detrendTrackPercentile=config.get(
378
- "detrendParams.detrendTrackPercentile", 75.0
483
+ "detrendParams.detrendTrackPercentile",
484
+ 75,
485
+ ),
486
+ usePolyFilter=config.get(
487
+ "detrendParams.usePolyFilter",
488
+ False,
379
489
  ),
380
- usePolyFilter=config.get("detrendParams.usePolyFilter", False),
381
490
  detrendSavitzkyGolayDegree=config.get(
382
- "detrendParams.detrendSavitzkyGolayDegree", 2
491
+ "detrendParams.detrendSavitzkyGolayDegree",
492
+ detrendSavitzkyGolayDegree_,
383
493
  ),
384
494
  useOrderStatFilter=config.get(
385
- "detrendParams.useOrderStatFilter", True
495
+ "detrendParams.useOrderStatFilter",
496
+ True,
386
497
  ),
387
498
  ),
388
499
  "matchingArgs": core.matchingParams(
389
- templateNames=config.get("matchingParams.templateNames", []),
390
- cascadeLevels=config.get("matchingParams.cascadeLevels", [2]),
500
+ templateNames=config.get(
501
+ "matchingParams.templateNames",
502
+ [],
503
+ ),
504
+ cascadeLevels=config.get(
505
+ "matchingParams.cascadeLevels",
506
+ [],
507
+ ),
391
508
  iters=config.get("matchingParams.iters", 25_000),
392
509
  alpha=config.get("matchingParams.alpha", 0.05),
393
- minMatchLengthBP=config.get("matchingParams.minMatchLengthBP", 250),
394
- maxNumMatches=config.get("matchingParams.maxNumMatches", 100_000),
510
+ minMatchLengthBP=config.get(
511
+ "matchingParams.minMatchLengthBP", 250
512
+ ),
513
+ maxNumMatches=config.get(
514
+ "matchingParams.maxNumMatches", 100_000
515
+ ),
395
516
  minSignalAtMaxima=config.get(
396
517
  "matchingParams.minSignalAtMaxima", "q:0.75"
397
518
  ),
@@ -418,7 +539,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
418
539
  "OR install via conda (conda install -c bioconda ucsc-bedgraphtobigwig)."
419
540
  )
420
541
 
421
- logger.info("Attempting to generate bigWig files from bedGraph format...")
542
+ logger.info(
543
+ "Attempting to generate bigWig files from bedGraph format..."
544
+ )
422
545
  try:
423
546
  path_ = shutil.which("bedGraphToBigWig")
424
547
  except Exception as e:
@@ -429,7 +552,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
429
552
  return
430
553
  logger.info(f"Using bedGraphToBigWig from {path_}")
431
554
  for suffix in suffixes:
432
- bedgraph = f"consenrichOutput_{experimentName}_{suffix}.bedGraph"
555
+ bedgraph = (
556
+ f"consenrichOutput_{experimentName}_{suffix}.bedGraph"
557
+ )
433
558
  if not os.path.exists(bedgraph):
434
559
  logger.warning(
435
560
  f"bedGraph file {bedgraph} does not exist. Skipping bigWig conversion."
@@ -452,7 +577,9 @@ def convertBedGraphToBigWig(experimentName, chromSizesFile):
452
577
  )
453
578
  continue
454
579
  if os.path.exists(bigwig) and os.path.getsize(bigwig) > 100:
455
- logger.info(f"Finished: converted {bedgraph} to {bigwig}.")
580
+ logger.info(
581
+ f"Finished: converted {bedgraph} to {bigwig}."
582
+ )
456
583
 
457
584
 
458
585
  def main():
@@ -476,10 +603,16 @@ def main():
476
603
  "--match-template",
477
604
  type=str,
478
605
  default="haar",
479
- choices=[x for x in pywt.wavelist(kind="discrete") if "bio" not in x],
606
+ choices=[
607
+ x
608
+ for x in pywt.wavelist(kind="discrete")
609
+ if "bio" not in x
610
+ ],
480
611
  dest="matchTemplate",
481
612
  )
482
- parser.add_argument("--match-level", type=int, default=2, dest="matchLevel")
613
+ parser.add_argument(
614
+ "--match-level", type=int, default=2, dest="matchLevel"
615
+ )
483
616
  parser.add_argument(
484
617
  "--match-alpha", type=float, default=0.05, dest="matchAlpha"
485
618
  )
@@ -508,16 +641,24 @@ def main():
508
641
  "--match-no-merge", action="store_true", dest="matchNoMerge"
509
642
  )
510
643
  parser.add_argument(
511
- "--match-merge-gap", type=int, default=None, dest="matchMergeGapBP"
644
+ "--match-merge-gap",
645
+ type=int,
646
+ default=None,
647
+ dest="matchMergeGapBP",
512
648
  )
513
649
  parser.add_argument(
514
- "--match-use-wavelet", action="store_true", dest="matchUseWavelet"
650
+ "--match-use-wavelet",
651
+ action="store_true",
652
+ dest="matchUseWavelet",
515
653
  )
516
654
  parser.add_argument(
517
655
  "--match-seed", type=int, default=42, dest="matchRandSeed"
518
656
  )
519
657
  parser.add_argument(
520
- "--match-exclude-bed", type=str, default=None, dest="matchExcludeBed"
658
+ "--match-exclude-bed",
659
+ type=str,
660
+ default=None,
661
+ dest="matchExcludeBed",
521
662
  )
522
663
  parser.add_argument(
523
664
  "--verbose", action="store_true", help="If set, logs config"
@@ -595,13 +736,17 @@ def main():
595
736
  scaleDown = countingArgs.scaleDown
596
737
  extendBP_ = core.resolveExtendBP(samArgs.extendBP, bamFiles)
597
738
  initialTreatmentScaleFactors = []
739
+ minMatchLengthBP_: Optional[int] = matchingArgs.minMatchLengthBP
740
+ mergeGapBP_: Optional[int] = matchingArgs.mergeGapBP
741
+
598
742
  if args.verbose:
599
743
  try:
600
744
  logger.info("Configuration:\n")
601
745
  config_truncated = {
602
746
  k: v
603
747
  for k, v in config.items()
604
- if k not in ["inputArgs", "genomeArgs", "countingArgs"]
748
+ if k
749
+ not in ["inputArgs", "genomeArgs", "countingArgs"]
605
750
  }
606
751
  config_truncated["experimentName"] = experimentName
607
752
  config_truncated["inputArgs"] = inputArgs
@@ -619,7 +764,9 @@ def main():
619
764
  controlsPresent = checkControlsPresent(inputArgs)
620
765
  if args.verbose:
621
766
  logger.info(f"controlsPresent: {controlsPresent}")
622
- readLengthsBamFiles = getReadLengths(inputArgs, countingArgs, samArgs)
767
+ readLengthsBamFiles = getReadLengths(
768
+ inputArgs, countingArgs, samArgs
769
+ )
623
770
  effectiveGenomeSizes = getEffectiveGenomeSizes(
624
771
  genomeArgs, readLengthsBamFiles
625
772
  )
@@ -641,11 +788,16 @@ def main():
641
788
  for bamFile in bamFilesControl
642
789
  ]
643
790
  effectiveGenomeSizesControl = [
644
- constants.getEffectiveGenomeSize(genomeArgs.genomeName, readLength)
791
+ constants.getEffectiveGenomeSize(
792
+ genomeArgs.genomeName, readLength
793
+ )
645
794
  for readLength in readLengthsControlBamFiles
646
795
  ]
647
796
 
648
- if scaleFactors is not None and scaleFactorsControl is not None:
797
+ if (
798
+ scaleFactors is not None
799
+ and scaleFactorsControl is not None
800
+ ):
649
801
  treatScaleFactors = scaleFactors
650
802
  controlScaleFactors = scaleFactorsControl
651
803
  # still make sure this is accessible
@@ -662,7 +814,9 @@ def main():
662
814
  samArgs.samThreads,
663
815
  )
664
816
  for bamFile, effectiveGenomeSize, readLength in zip(
665
- bamFiles, effectiveGenomeSizes, readLengthsBamFiles
817
+ bamFiles,
818
+ effectiveGenomeSizes,
819
+ readLengthsBamFiles,
666
820
  )
667
821
  ]
668
822
  except Exception:
@@ -716,7 +870,8 @@ def main():
716
870
  )
717
871
  ]
718
872
  chromSizesDict = misc_util.getChromSizesDict(
719
- genomeArgs.chromSizesFile, excludeChroms=genomeArgs.excludeChroms
873
+ genomeArgs.chromSizesFile,
874
+ excludeChroms=genomeArgs.excludeChroms,
720
875
  )
721
876
  chromosomes = genomeArgs.chromosomes
722
877
 
@@ -731,11 +886,15 @@ def main():
731
886
  chromosomeStart = max(
732
887
  0, (chromosomeStart - (chromosomeStart % stepSize))
733
888
  )
734
- chromosomeEnd = max(0, (chromosomeEnd - (chromosomeEnd % stepSize)))
889
+ chromosomeEnd = max(
890
+ 0, (chromosomeEnd - (chromosomeEnd % stepSize))
891
+ )
735
892
  numIntervals = (
736
893
  ((chromosomeEnd - chromosomeStart) + stepSize) - 1
737
894
  ) // stepSize
738
- intervals = np.arange(chromosomeStart, chromosomeEnd, stepSize)
895
+ intervals = np.arange(
896
+ chromosomeStart, chromosomeEnd, stepSize
897
+ )
739
898
  chromMat: np.ndarray = np.empty(
740
899
  (numSamples, numIntervals), dtype=np.float32
741
900
  )
@@ -752,7 +911,10 @@ def main():
752
911
  chromosomeStart,
753
912
  chromosomeEnd,
754
913
  stepSize,
755
- [readLengthsBamFiles[j_], readLengthsControlBamFiles[j_]],
914
+ [
915
+ readLengthsBamFiles[j_],
916
+ readLengthsControlBamFiles[j_],
917
+ ],
756
918
  [treatScaleFactors[j_], controlScaleFactors[j_]],
757
919
  samArgs.oneReadPerBin,
758
920
  samArgs.samThreads,
@@ -764,10 +926,12 @@ def main():
764
926
  inferFragmentLength=samArgs.inferFragmentLength,
765
927
  applyAsinh=countingArgs.applyAsinh,
766
928
  applyLog=countingArgs.applyLog,
767
- countEndsOnly=samArgs.countEndsOnly
929
+ countEndsOnly=samArgs.countEndsOnly,
768
930
  )
769
931
  if countingArgs.rescaleToTreatmentCoverage:
770
- finalSF = max(1.0, initialTreatmentScaleFactors[j_])
932
+ finalSF = max(
933
+ 1.0, initialTreatmentScaleFactors[j_]
934
+ )
771
935
  chromMat[j_, :] = finalSF * (
772
936
  pairMatrix[0, :] - pairMatrix[1, :]
773
937
  )
@@ -791,18 +955,25 @@ def main():
791
955
  inferFragmentLength=samArgs.inferFragmentLength,
792
956
  applyAsinh=countingArgs.applyAsinh,
793
957
  applyLog=countingArgs.applyLog,
794
- countEndsOnly=samArgs.countEndsOnly
958
+ countEndsOnly=samArgs.countEndsOnly,
795
959
  )
796
960
  sparseMap = None
797
961
  if genomeArgs.sparseBedFile and not observationArgs.useALV:
798
- logger.info(f"Building sparse mapping for {chromosome}...")
962
+ logger.info(
963
+ f"Building sparse mapping for {chromosome}..."
964
+ )
799
965
  sparseMap = core.getSparseMap(
800
- chromosome, intervals, numNearest, genomeArgs.sparseBedFile
966
+ chromosome,
967
+ intervals,
968
+ numNearest,
969
+ genomeArgs.sparseBedFile,
801
970
  )
802
971
 
803
972
  muncMat = np.empty_like(chromMat, dtype=np.float32)
804
973
  for j in range(numSamples):
805
- logger.info(f"Muncing {j + 1}/{numSamples} for {chromosome}...")
974
+ logger.info(
975
+ f"Muncing {j + 1}/{numSamples} for {chromosome}..."
976
+ )
806
977
  muncMat[j, :] = core.getMuncTrack(
807
978
  chromosome,
808
979
  intervals,
@@ -873,8 +1044,11 @@ def main():
873
1044
  )
874
1045
  if c_ == 0 and len(chromosomes) > 1:
875
1046
  for file_ in os.listdir("."):
876
- if file_.startswith(f"consenrichOutput_{experimentName}") and (
877
- file_.endswith(".bedGraph") or file_.endswith(".narrowPeak")
1047
+ if file_.startswith(
1048
+ f"consenrichOutput_{experimentName}"
1049
+ ) and (
1050
+ file_.endswith(".bedGraph")
1051
+ or file_.endswith(".narrowPeak")
878
1052
  ):
879
1053
  logger.warning(f"Overwriting: {file_}")
880
1054
  os.remove(file_)
@@ -894,6 +1068,18 @@ def main():
894
1068
  )
895
1069
  try:
896
1070
  if matchingEnabled:
1071
+ if (
1072
+ minMatchLengthBP_ is None
1073
+ or minMatchLengthBP_ <= 0
1074
+ ):
1075
+ minMatchLengthBP_ = (
1076
+ matching.autoMinLengthIntervals(x_)
1077
+ * (intervals[1] - intervals[0])
1078
+ )
1079
+
1080
+ if mergeGapBP_ is None:
1081
+ mergeGapBP_ = int(minMatchLengthBP_ / 2) + 1
1082
+
897
1083
  matchingDF = matching.matchWavelet(
898
1084
  chromosome,
899
1085
  intervals,
@@ -902,7 +1088,7 @@ def main():
902
1088
  matchingArgs.cascadeLevels,
903
1089
  matchingArgs.iters,
904
1090
  matchingArgs.alpha,
905
- matchingArgs.minMatchLengthBP,
1091
+ minMatchLengthBP_,
906
1092
  matchingArgs.maxNumMatches,
907
1093
  matchingArgs.minSignalAtMaxima,
908
1094
  useScalingFunction=matchingArgs.useScalingFunction,
@@ -927,13 +1113,23 @@ def main():
927
1113
  convertBedGraphToBigWig(experimentName, genomeArgs.chromSizesFile)
928
1114
  if matchingEnabled and matchingArgs.merge:
929
1115
  try:
1116
+ mergeGapBP_ = matchingArgs.mergeGapBP
1117
+ if mergeGapBP_ is None or mergeGapBP_ <= 0:
1118
+ mergeGapBP_ = (
1119
+ int(minMatchLengthBP_ / 2) + 1
1120
+ if minMatchLengthBP_ is not None
1121
+ and minMatchLengthBP_ >= 0
1122
+ else 75
1123
+ )
930
1124
  matching.mergeMatches(
931
1125
  f"consenrichOutput_{experimentName}_matches.narrowPeak",
932
- mergeGapBP=matchingArgs.mergeGapBP,
1126
+ mergeGapBP=mergeGapBP_,
933
1127
  )
934
1128
 
935
1129
  except Exception as e:
936
- logger.warning(f"Failed to merge matches...SKIPPING:\n{e}\n\n")
1130
+ logger.warning(
1131
+ f"Failed to merge matches...SKIPPING:\n{e}\n\n"
1132
+ )
937
1133
  logger.info("Done.")
938
1134
 
939
1135
 
consenrich/core.py CHANGED
@@ -317,25 +317,27 @@ class matchingParams(NamedTuple):
317
317
 
318
318
  See :ref:`matching` for an overview of the approach.
319
319
 
320
- :param templateNames: A list of str values -- wavelet bases used for matching, e.g., `[haar, db2, sym4]`
320
+ :param templateNames: A list of str values -- each entry references a mother wavelet (or its corresponding scaling function). e.g., `[haar, db2]`
321
321
  :type templateNames: List[str]
322
- :param cascadeLevels: A list of int values -- the number of cascade iterations used for approximating
323
- the scaling/wavelet functions.
322
+ :param cascadeLevels: Number of cascade iterations used to approximate each template (wavelet or scaling function).
323
+ Must have the same length as `templateNames`, with each entry aligned to the
324
+ corresponding template. e.g., given templateNames `[haar, db2]`, then `[2,2]` would use 2 cascade levels for both templates.
324
325
  :type cascadeLevels: List[int]
325
326
  :param iters: Number of random blocks to sample in the response sequence while building
326
327
  an empirical null to test significance. See :func:`cconsenrich.csampleBlockStats`.
327
328
  :type iters: int
328
329
  :param alpha: Primary significance threshold on detected matches. Specifically, the
329
- :math:`1 - \alpha` quantile of an empirical null distribution. The empirical null
330
- distribution is built from cross-correlation values over randomly sampled blocks.
330
+ minimum corr. empirical p-value approximated from randomly sampled blocks in the
331
+ response sequence.
331
332
  :type alpha: float
332
333
  :param minMatchLengthBP: Within a window of `minMatchLengthBP` length (bp), relative maxima in
333
334
  the signal-template convolution must be greater in value than others to qualify as matches.
334
- :type minMatchLengthBP: int
335
+ If set to a value less than 1, the minimum length is determined via :func:`consenrich.matching.autoMinLengthIntervals`.
336
+ If set to `None`, defaults to 250 bp.
335
337
  :param minSignalAtMaxima: Secondary significance threshold coupled with `alpha`. Requires the *signal value*
336
- at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale.
337
- If a `float` value is provided, the minimum signal value must be greater than this (absolute) value. *Set to a
338
- negative value to disable the threshold*.
338
+ at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale
339
+ to temper genome-wide dynamic range. If a `float` value is provided, the minimum signal value must be greater
340
+ than this (absolute) value. *Set to a negative value to disable the threshold*.
339
341
  If a `str` value is provided, looks for 'q:quantileValue', e.g., 'q:0.90'. The
340
342
  threshold is then set to the corresponding quantile of the non-zero signal estimates.
341
343
  :type minSignalAtMaxima: Optional[str | float]