consenrich 0.7.1b1__cp314-cp314-macosx_11_0_arm64.whl → 0.7.1b2__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of consenrich might be problematic. Click here for more details.

consenrich/consenrich.py CHANGED
@@ -346,9 +346,39 @@ def readConfig(config_path: str) -> Dict[str, Any]:
346
346
  minQ_default = (
347
347
  minR_default / (len(inputParams.bamFiles))
348
348
  ) + 0.10 # protect condition number
349
+
349
350
  matchingExcludeRegionsBedFile_default: Optional[str] = (
350
351
  genomeParams.blacklistFile
351
352
  )
353
+
354
+ # apply less aggressive *default* detrending/background removal
355
+ # ...IF input controls are present. In either case, respect
356
+ # ...user-specified params
357
+ detrendWindowLengthBP_: int = -1
358
+ detrendSavitzkyGolayDegree_: int = -1
359
+
360
+ if (
361
+ inputParams.bamFilesControl is not None
362
+ and len(inputParams.bamFilesControl) > 0
363
+ ):
364
+ detrendWindowLengthBP_ = config.get(
365
+ "detrendParams.detrendWindowLengthBP",
366
+ 25_000,
367
+ )
368
+ detrendSavitzkyGolayDegree_ = config.get(
369
+ "detrendParams.detrendSavitzkyGolayDegree",
370
+ 1,
371
+ )
372
+ else:
373
+ detrendWindowLengthBP_ = config.get(
374
+ "detrendParams.detrendWindowLengthBP",
375
+ 10_000,
376
+ )
377
+ detrendSavitzkyGolayDegree_ = config.get(
378
+ "detrendParams.detrendSavitzkyGolayDegree",
379
+ 2,
380
+ )
381
+
352
382
  return {
353
383
  "experimentName": config.get(
354
384
  "experimentName", "consenrichExperiment"
@@ -378,35 +408,44 @@ def readConfig(config_path: str) -> Dict[str, Any]:
378
408
  noGlobal=config.get("observationParams.noGlobal", False),
379
409
  numNearest=config.get("observationParams.numNearest", 25),
380
410
  localWeight=config.get(
381
- "observationParams.localWeight", 0.333
411
+ "observationParams.localWeight",
412
+ 0.333,
382
413
  ),
383
414
  globalWeight=config.get(
384
- "observationParams.globalWeight", 0.667
415
+ "observationParams.globalWeight",
416
+ 0.667,
385
417
  ),
386
418
  approximationWindowLengthBP=config.get(
387
- "observationParams.approximationWindowLengthBP", 10000
419
+ "observationParams.approximationWindowLengthBP",
420
+ 10000,
388
421
  ),
389
422
  lowPassWindowLengthBP=config.get(
390
- "observationParams.lowPassWindowLengthBP", 20000
423
+ "observationParams.lowPassWindowLengthBP",
424
+ 20000,
391
425
  ),
392
426
  lowPassFilterType=config.get(
393
- "observationParams.lowPassFilterType", "median"
427
+ "observationParams.lowPassFilterType",
428
+ "median",
394
429
  ),
395
430
  returnCenter=config.get(
396
- "observationParams.returnCenter", True
431
+ "observationParams.returnCenter",
432
+ True,
397
433
  ),
398
434
  ),
399
435
  "stateArgs": core.stateParams(
400
436
  stateInit=config.get("stateParams.stateInit", 0.0),
401
437
  stateCovarInit=config.get(
402
- "stateParams.stateCovarInit", 100.0
438
+ "stateParams.stateCovarInit",
439
+ 100.0,
403
440
  ),
404
441
  boundState=config.get("stateParams.boundState", True),
405
442
  stateLowerBound=config.get(
406
- "stateParams.stateLowerBound", 0.0
443
+ "stateParams.stateLowerBound",
444
+ 0.0,
407
445
  ),
408
446
  stateUpperBound=config.get(
409
- "stateParams.stateUpperBound", 10000.0
447
+ "stateParams.stateUpperBound",
448
+ 10000.0,
410
449
  ),
411
450
  ),
412
451
  "samArgs": core.samParams(
@@ -434,32 +473,37 @@ def readConfig(config_path: str) -> Dict[str, Any]:
434
473
  else 0,
435
474
  ),
436
475
  countEndsOnly=config.get(
437
- "samParams.countEndsOnly", False
476
+ "samParams.countEndsOnly",
477
+ False,
438
478
  ),
439
479
  ),
440
480
  "detrendArgs": core.detrendParams(
441
- detrendWindowLengthBP=config.get(
442
- "detrendParams.detrendWindowLengthBP", 10000
443
- ),
481
+ detrendWindowLengthBP=detrendWindowLengthBP_,
444
482
  detrendTrackPercentile=config.get(
445
- "detrendParams.detrendTrackPercentile", 75.0
483
+ "detrendParams.detrendTrackPercentile",
484
+ 75,
446
485
  ),
447
486
  usePolyFilter=config.get(
448
- "detrendParams.usePolyFilter", False
487
+ "detrendParams.usePolyFilter",
488
+ False,
449
489
  ),
450
490
  detrendSavitzkyGolayDegree=config.get(
451
- "detrendParams.detrendSavitzkyGolayDegree", 2
491
+ "detrendParams.detrendSavitzkyGolayDegree",
492
+ detrendSavitzkyGolayDegree_,
452
493
  ),
453
494
  useOrderStatFilter=config.get(
454
- "detrendParams.useOrderStatFilter", True
495
+ "detrendParams.useOrderStatFilter",
496
+ True,
455
497
  ),
456
498
  ),
457
499
  "matchingArgs": core.matchingParams(
458
500
  templateNames=config.get(
459
- "matchingParams.templateNames", []
501
+ "matchingParams.templateNames",
502
+ [],
460
503
  ),
461
504
  cascadeLevels=config.get(
462
- "matchingParams.cascadeLevels", [2]
505
+ "matchingParams.cascadeLevels",
506
+ [],
463
507
  ),
464
508
  iters=config.get("matchingParams.iters", 25_000),
465
509
  alpha=config.get("matchingParams.alpha", 0.05),
@@ -692,6 +736,9 @@ def main():
692
736
  scaleDown = countingArgs.scaleDown
693
737
  extendBP_ = core.resolveExtendBP(samArgs.extendBP, bamFiles)
694
738
  initialTreatmentScaleFactors = []
739
+ minMatchLengthBP_: Optional[int] = matchingArgs.minMatchLengthBP
740
+ mergeGapBP_: Optional[int] = matchingArgs.mergeGapBP
741
+
695
742
  if args.verbose:
696
743
  try:
697
744
  logger.info("Configuration:\n")
@@ -1021,6 +1068,18 @@ def main():
1021
1068
  )
1022
1069
  try:
1023
1070
  if matchingEnabled:
1071
+ if (
1072
+ minMatchLengthBP_ is None
1073
+ or minMatchLengthBP_ <= 0
1074
+ ):
1075
+ minMatchLengthBP_ = (
1076
+ matching.autoMinLengthIntervals(x_)
1077
+ * (intervals[1] - intervals[0])
1078
+ )
1079
+
1080
+ if mergeGapBP_ is None:
1081
+ mergeGapBP_ = int(minMatchLengthBP_ / 2) + 1
1082
+
1024
1083
  matchingDF = matching.matchWavelet(
1025
1084
  chromosome,
1026
1085
  intervals,
@@ -1029,7 +1088,7 @@ def main():
1029
1088
  matchingArgs.cascadeLevels,
1030
1089
  matchingArgs.iters,
1031
1090
  matchingArgs.alpha,
1032
- matchingArgs.minMatchLengthBP,
1091
+ minMatchLengthBP_,
1033
1092
  matchingArgs.maxNumMatches,
1034
1093
  matchingArgs.minSignalAtMaxima,
1035
1094
  useScalingFunction=matchingArgs.useScalingFunction,
@@ -1055,10 +1114,11 @@ def main():
1055
1114
  if matchingEnabled and matchingArgs.merge:
1056
1115
  try:
1057
1116
  mergeGapBP_ = matchingArgs.mergeGapBP
1058
- if mergeGapBP_ is None:
1117
+ if mergeGapBP_ is None or mergeGapBP_ <= 0:
1059
1118
  mergeGapBP_ = (
1060
- int(matchingArgs.minMatchLengthBP/2) + 1
1061
- if matchingArgs.minMatchLengthBP is not None
1119
+ int(minMatchLengthBP_ / 2) + 1
1120
+ if minMatchLengthBP_ is not None
1121
+ and minMatchLengthBP_ >= 0
1062
1122
  else 75
1063
1123
  )
1064
1124
  matching.mergeMatches(
consenrich/core.py CHANGED
@@ -317,10 +317,11 @@ class matchingParams(NamedTuple):
317
317
 
318
318
  See :ref:`matching` for an overview of the approach.
319
319
 
320
- :param templateNames: A list of str values -- wavelet bases used for matching, e.g., `[haar, db2, sym4]`
320
+ :param templateNames: A list of str values -- each entry references a mother wavelet (or its corresponding scaling function). e.g., `[haar, db2]`
321
321
  :type templateNames: List[str]
322
- :param cascadeLevels: A list of int values -- the number of cascade iterations used for approximating
323
- the scaling/wavelet functions.
322
+ :param cascadeLevels: Number of cascade iterations used to approximate each template (wavelet or scaling function).
323
+ Must have the same length as `templateNames`, with each entry aligned to the
324
+ corresponding template. e.g., given templateNames `[haar, db2]`, then `[2,2]` would use 2 cascade levels for both templates.
324
325
  :type cascadeLevels: List[int]
325
326
  :param iters: Number of random blocks to sample in the response sequence while building
326
327
  an empirical null to test significance. See :func:`cconsenrich.csampleBlockStats`.
@@ -331,7 +332,8 @@ class matchingParams(NamedTuple):
331
332
  :type alpha: float
332
333
  :param minMatchLengthBP: Within a window of `minMatchLengthBP` length (bp), relative maxima in
333
334
  the signal-template convolution must be greater in value than others to qualify as matches.
334
- :type minMatchLengthBP: int
335
+ If set to a value less than 1, the minimum length is determined via :func:`consenrich.matching.autoMinLengthIntervals`.
336
+ If set to `None`, defaults to 250 bp.
335
337
  :param minSignalAtMaxima: Secondary significance threshold coupled with `alpha`. Requires the *signal value*
336
338
  at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale
337
339
  to temper genome-wide dynamic range. If a `float` value is provided, the minimum signal value must be greater
consenrich/matching.py CHANGED
@@ -24,6 +24,45 @@ logging.basicConfig(
24
24
  logger = logging.getLogger(__name__)
25
25
 
26
26
 
27
+ def autoMinLengthIntervals(
28
+ values: np.ndarray, initLen: int = 3
29
+ ) -> int:
30
+ r"""Determines a minimum matching length (in interval units) based on the input signal values.
31
+
32
+ Returns the mean length of non-zero contiguous segments in a log-scaled/centered version of `values`
33
+
34
+ :param values: A 1D array of signal-like values.
35
+ :type values: np.ndarray
36
+ :param initLen: Initial minimum length (in intervals). Defaults to 3.
37
+ :type initLen: int
38
+ :return: Estimated minimum matching length (in intervals)
39
+ :rtype: int
40
+
41
+ """
42
+ trValues = np.asinh(values) - signal.medfilt(
43
+ np.asinh(values),
44
+ kernel_size=
45
+ max(
46
+ (2 * initLen) + 1,
47
+ 2 * (int(len(values) * 0.005)) + 1,
48
+ )
49
+ )
50
+ nz = trValues[trValues > 0]
51
+ if len(nz) == 0:
52
+ return initLen
53
+ thr = np.quantile(nz, 0.90, method="interpolated_inverted_cdf")
54
+ mask = nz >= thr
55
+ if not np.any(mask):
56
+ return initLen
57
+ idx = np.flatnonzero(np.diff(np.r_[False, mask, False]))
58
+ runs = idx.reshape(-1, 2)
59
+ widths = runs[:, 1] - runs[:, 0]
60
+ widths = widths[widths >= initLen]
61
+ if len(widths) == 0:
62
+ return initLen
63
+ return int(np.mean(widths))
64
+
65
+
27
66
  def scalarClip(value: float, low: float, high: float) -> float:
28
67
  return low if value < low else high if value > high else value
29
68
 
@@ -124,7 +163,7 @@ def matchExistingBedGraph(
124
163
  for chrom_ in sorted(bedGraphDF["chromosome"].unique()):
125
164
  df_ = bedGraphDF[bedGraphDF["chromosome"] == chrom_]
126
165
  if len(df_) < 5:
127
- logger.info(f"Skipping {chrom_}: fewer than 5 rows.")
166
+ logger.info(f"Skipping {chrom_}: less than 5 intervals.")
128
167
  continue
129
168
 
130
169
  try:
@@ -234,17 +273,18 @@ def matchWavelet(
234
273
  excludeRegionsBedFile: Optional[str] = None,
235
274
  weights: Optional[npt.NDArray[np.float64]] = None,
236
275
  ) -> pd.DataFrame:
237
- r"""Detect structured peaks by cross-correlating Consenrich tracks with wavelet- or scaling-function templates.
276
+ r"""Detect structured peaks in Consenrich tracks by matching wavelet- or scaling-function–based templates.
238
277
 
239
278
  :param chromosome: Chromosome name for the input intervals and values.
240
279
  :type chromosome: str
241
280
  :param values: A 1D array of signal-like values. In this documentation, we refer to values derived from Consenrich,
242
281
  but other continuous-valued tracks at evenly spaced genomic intervals may be suitable, too.
243
282
  :type values: npt.NDArray[np.float64]
244
- :param templateNames: A list of str values -- wavelet bases used for matching, e.g., `[haar, db2, sym4]`
283
+ :param templateNames: A list of str values -- each entry references a mother wavelet (or its corresponding scaling function). e.g., `[haar, db2]`
245
284
  :type templateNames: List[str]
246
- :param cascadeLevels: A list of int values -- the number of cascade iterations used for approximating
247
- the scaling/wavelet functions.
285
+ :param cascadeLevels: Number of cascade iterations used to approximate each template (wavelet or scaling function).
286
+ Must have the same length as `templateNames`, with each entry aligned to the
287
+ corresponding template. e.g., given templateNames `[haar, db2]`, then `[2,2]` would use 2 cascade levels for both templates.
248
288
  :type cascadeLevels: List[int]
249
289
  :param iters: Number of random blocks to sample in the response sequence while building
250
290
  an empirical null to test significance. See :func:`cconsenrich.csampleBlockStats`.
@@ -255,7 +295,9 @@ def matchWavelet(
255
295
  :type alpha: float
256
296
  :param minMatchLengthBP: Within a window of `minMatchLengthBP` length (bp), relative maxima in
257
297
  the signal-template convolution must be greater in value than others to qualify as matches.
258
- :type minMatchLengthBP: int
298
+ If set to a value less than 1, the minimum length is determined via :func:`consenrich.matching.autoMinLengthIntervals`.
299
+ If set to `None`, defaults to 250 bp.
300
+ :type minMatchLengthBP: Optional[int]
259
301
  :param minSignalAtMaxima: Secondary significance threshold coupled with `alpha`. Requires the *signal value*
260
302
  at relative maxima in the response sequence to be greater than this threshold. Comparisons are made in log-scale
261
303
  to temper genome-wide dynamic range. If a `float` value is provided, the minimum signal value must be greater
@@ -274,19 +316,47 @@ def matchWavelet(
274
316
  :return: A pandas DataFrame with detected matches
275
317
  :rtype: pd.DataFrame
276
318
  """
319
+
320
+ rng = np.random.default_rng(int(randSeed))
277
321
  if len(intervals) < 5:
278
322
  raise ValueError("`intervals` must be at least length 5")
323
+
279
324
  if len(values) != len(intervals):
280
325
  raise ValueError(
281
326
  "`values` must have the same length as `intervals`"
282
327
  )
328
+
329
+ if len(templateNames) != len(cascadeLevels):
330
+ raise ValueError(
331
+ "\n\t`templateNames` and `cascadeLevels` must have the same length."
332
+ "\n\tSet products are not supported, i.e., each template needs an explicitly defined cascade level."
333
+ "\t\ne.g., for `templateNames = [haar, db2]`, use `cascadeLevels = [2, 2]`, not `[2]`.\n"
334
+ )
335
+
283
336
  intervalLengthBp = intervals[1] - intervals[0]
337
+
338
+ if minMatchLengthBP is not None and minMatchLengthBP < 1:
339
+ minMatchLengthBP = (
340
+ autoMinLengthIntervals(values) * int(intervalLengthBp)
341
+ )
342
+ elif minMatchLengthBP is None:
343
+ minMatchLengthBP = 250
344
+
345
+ logger.info(
346
+ f"\n\tUsing minMatchLengthBP: {minMatchLengthBP}"
347
+ )
348
+
284
349
  if not np.all(np.abs(np.diff(intervals)) == intervalLengthBp):
285
350
  raise ValueError("`intervals` must be evenly spaced.")
286
- rng = np.random.default_rng(int(randSeed))
287
- cascadeLevels = sorted(list(set(cascadeLevels)))
288
- if weights is not None and len(weights) == len(values):
289
- values = values * weights
351
+
352
+ if weights is not None:
353
+ if len(weights) != len(values):
354
+ logger.warning(
355
+ f"`weights` length {len(weights)} does not match `values` length {len(values)}. Ignoring..."
356
+ )
357
+ else:
358
+ values = values * weights
359
+
290
360
  asinhValues = np.asinh(values, dtype=np.float32)
291
361
  asinhNonZeroValues = asinhValues[asinhValues > 0]
292
362
  iters = max(int(iters), 1000)
@@ -383,147 +453,142 @@ def matchWavelet(
383
453
  high = np.quantile(vals, 0.999)
384
454
  return vals[(vals > low) & (vals < high)]
385
455
 
386
- for cascadeLevel in cascadeLevels:
387
- for templateName in templateNames:
388
- if templateName not in pw.wavelist(kind="discrete"):
389
- logger.warning(
390
- f"Skipping unknown wavelet template: {templateName}"
391
- )
392
- continue
393
-
394
- wav = pw.Wavelet(str(templateName))
395
- scalingFunc, waveletFunc, _ = wav.wavefun(
396
- level=int(cascadeLevel)
397
- )
398
- template = np.array(
399
- scalingFunc if useScalingFunction else waveletFunc,
400
- dtype=np.float64,
456
+ for templateName, cascadeLevel in zip(
457
+ templateNames, cascadeLevels
458
+ ):
459
+ if templateName not in pw.wavelist(kind="discrete"):
460
+ logger.warning(
461
+ f"Skipping unknown wavelet template: {templateName}"
401
462
  )
402
- template /= np.linalg.norm(template)
463
+ continue
403
464
 
404
- logger.info(
405
- f"\n\tMatching template: {templateName}"
406
- f"\n\tcascade level: {cascadeLevel}"
407
- f"\n\ttemplate length: {len(template)}"
408
- )
465
+ wav = pw.Wavelet(str(templateName))
466
+ scalingFunc, waveletFunc, _ = wav.wavefun(
467
+ level=int(cascadeLevel)
468
+ )
469
+ template = np.array(
470
+ scalingFunc if useScalingFunction else waveletFunc,
471
+ dtype=np.float64,
472
+ )
473
+ template /= np.linalg.norm(template)
409
474
 
410
- # efficient FFT-based cross-correlation
411
- # (OA may be better for smaller templates, TODO add a check)
412
- response = signal.fftconvolve(
413
- values, template[::-1], mode="same"
414
- )
415
- thisMinMatchBp = minMatchLengthBP
416
- if thisMinMatchBp is None or thisMinMatchBp < 1:
417
- thisMinMatchBp = len(template) * intervalLengthBp
418
- if thisMinMatchBp % intervalLengthBp != 0:
419
- thisMinMatchBp += intervalLengthBp - (
420
- thisMinMatchBp % intervalLengthBp
421
- )
422
- relWindowBins = int(
423
- ((thisMinMatchBp / intervalLengthBp) / 2) + 1
475
+ logger.info(
476
+ f"\n\tMatching template: {templateName}"
477
+ f"\n\tcascade level: {cascadeLevel}"
478
+ f"\n\ttemplate length: {len(template)}"
479
+ )
480
+
481
+ # efficient FFT-based cross-correlation
482
+ # (OA may be better for smaller templates, TODO add a check)
483
+ response = signal.fftconvolve(
484
+ values, template[::-1], mode="same"
485
+ )
486
+ thisMinMatchBp = minMatchLengthBP
487
+ if thisMinMatchBp is None or thisMinMatchBp < 1:
488
+ thisMinMatchBp = len(template) * intervalLengthBp
489
+ if thisMinMatchBp % intervalLengthBp != 0:
490
+ thisMinMatchBp += intervalLengthBp - (
491
+ thisMinMatchBp % intervalLengthBp
424
492
  )
425
- relWindowBins = max(relWindowBins, 1)
426
- asinhThreshold = parseMinSignalThreshold(
427
- minSignalAtMaxima
493
+ relWindowBins = int(
494
+ ((thisMinMatchBp / intervalLengthBp) / 2) + 1
495
+ )
496
+ relWindowBins = max(relWindowBins, 1)
497
+ asinhThreshold = parseMinSignalThreshold(minSignalAtMaxima)
498
+ for nullMask, testMask, tag in [
499
+ (halfLeftMask, halfRightMask, "R"),
500
+ (halfRightMask, halfLeftMask, "L"),
501
+ ]:
502
+ blockMaxima = sampleBlockMaxima(
503
+ response,
504
+ nullMask,
505
+ relWindowBins,
506
+ nsamp=max(iters, 1000),
507
+ seed=rng.integers(1, 10_000),
428
508
  )
429
- for nullMask, testMask, tag in [
430
- (halfLeftMask, halfRightMask, "R"),
431
- (halfRightMask, halfLeftMask, "L"),
432
- ]:
509
+ if len(blockMaxima) < 25:
510
+ pooledMask = ~excludeMaskGlobal.astype(bool)
433
511
  blockMaxima = sampleBlockMaxima(
434
512
  response,
435
- nullMask,
513
+ pooledMask,
436
514
  relWindowBins,
437
515
  nsamp=max(iters, 1000),
438
516
  seed=rng.integers(1, 10_000),
439
517
  )
440
- if len(blockMaxima) < 25:
441
- pooledMask = ~excludeMaskGlobal.astype(bool)
442
- blockMaxima = sampleBlockMaxima(
443
- response,
444
- pooledMask,
445
- relWindowBins,
446
- nsamp=max(iters, 1000),
447
- seed=rng.integers(1, 10_000),
448
- )
449
- ecdfSf = stats.ecdf(blockMaxima).sf
450
- candidateIdx = relativeMaxima(response, relWindowBins)
451
-
452
- candidateMask = (
453
- (candidateIdx >= relWindowBins)
454
- & (candidateIdx < len(response) - relWindowBins)
455
- & (testMask[candidateIdx])
456
- & (excludeMaskGlobal[candidateIdx] == 0)
457
- & (asinhValues[candidateIdx] > asinhThreshold)
458
- )
518
+ ecdfSf = stats.ecdf(blockMaxima).sf
519
+ candidateIdx = relativeMaxima(response, relWindowBins)
520
+
521
+ candidateMask = (
522
+ (candidateIdx >= relWindowBins)
523
+ & (candidateIdx < len(response) - relWindowBins)
524
+ & (testMask[candidateIdx])
525
+ & (excludeMaskGlobal[candidateIdx] == 0)
526
+ & (asinhValues[candidateIdx] > asinhThreshold)
527
+ )
459
528
 
460
- candidateIdx = candidateIdx[candidateMask]
461
- if len(candidateIdx) == 0:
462
- continue
463
- if (
464
- maxNumMatches is not None
465
- and len(candidateIdx) > maxNumMatches
466
- ):
467
- candidateIdx = candidateIdx[
468
- np.argsort(asinhValues[candidateIdx])[
469
- -maxNumMatches:
470
- ]
529
+ candidateIdx = candidateIdx[candidateMask]
530
+ if len(candidateIdx) == 0:
531
+ continue
532
+ if (
533
+ maxNumMatches is not None
534
+ and len(candidateIdx) > maxNumMatches
535
+ ):
536
+ candidateIdx = candidateIdx[
537
+ np.argsort(asinhValues[candidateIdx])[
538
+ -maxNumMatches:
471
539
  ]
472
- pEmp = np.clip(
473
- ecdfSf.evaluate(response[candidateIdx]),
474
- 1.0e-10,
475
- 1.0,
476
- )
477
- startsIdx = np.maximum(
478
- candidateIdx - relWindowBins, 0
540
+ ]
541
+ pEmp = np.clip(
542
+ ecdfSf.evaluate(response[candidateIdx]),
543
+ 1.0e-10,
544
+ 1.0,
545
+ )
546
+ startsIdx = np.maximum(candidateIdx - relWindowBins, 0)
547
+ endsIdx = np.minimum(
548
+ len(values) - 1, candidateIdx + relWindowBins
549
+ )
550
+ pointSourcesIdx = []
551
+ for s, e in zip(startsIdx, endsIdx):
552
+ pointSourcesIdx.append(
553
+ np.argmax(values[s : e + 1]) + s
479
554
  )
480
- endsIdx = np.minimum(
481
- len(values) - 1, candidateIdx + relWindowBins
555
+ pointSourcesIdx = np.array(pointSourcesIdx)
556
+ starts = intervals[startsIdx]
557
+ ends = intervals[endsIdx]
558
+ pointSourcesAbs = (intervals[pointSourcesIdx]) + max(
559
+ 1, intervalLengthBp // 2
560
+ )
561
+ if recenterAtPointSource:
562
+ starts = pointSourcesAbs - (
563
+ relWindowBins * intervalLengthBp
482
564
  )
483
- pointSourcesIdx = []
484
- for s, e in zip(startsIdx, endsIdx):
485
- pointSourcesIdx.append(
486
- np.argmax(values[s : e + 1]) + s
487
- )
488
- pointSourcesIdx = np.array(pointSourcesIdx)
489
- starts = intervals[startsIdx]
490
- ends = intervals[endsIdx]
491
- pointSourcesAbs = (intervals[pointSourcesIdx]) + max(
492
- 1, intervalLengthBp // 2
565
+ ends = pointSourcesAbs + (
566
+ relWindowBins * intervalLengthBp
493
567
  )
494
- if recenterAtPointSource:
495
- starts = pointSourcesAbs - (
496
- relWindowBins * intervalLengthBp
497
- )
498
- ends = pointSourcesAbs + (
499
- relWindowBins * intervalLengthBp
500
- )
501
- pointSourcesRel = (
502
- intervals[pointSourcesIdx] - starts
503
- ) + max(1, intervalLengthBp // 2)
504
- sqScores = (1 + response[candidateIdx]) ** 2
505
- minR, maxR = (
506
- float(np.min(sqScores)),
507
- float(np.max(sqScores)),
568
+ pointSourcesRel = (
569
+ intervals[pointSourcesIdx] - starts
570
+ ) + max(1, intervalLengthBp // 2)
571
+ sqScores = (1 + response[candidateIdx]) ** 2
572
+ minR, maxR = (
573
+ float(np.min(sqScores)),
574
+ float(np.max(sqScores)),
575
+ )
576
+ rangeR = max(maxR - minR, 1.0)
577
+ scores = (250 + 750 * (sqScores - minR) / rangeR).astype(int)
578
+ for i, idxVal in enumerate(candidateIdx):
579
+ allRows.append(
580
+ {
581
+ "chromosome": chromosome,
582
+ "start": int(starts[i]),
583
+ "end": int(ends[i]),
584
+ "name": f"{templateName}_{cascadeLevel}_{idxVal}_{tag}",
585
+ "score": int(scores[i]),
586
+ "strand": ".",
587
+ "signal": float(response[idxVal]),
588
+ "p_raw": float(pEmp[i]),
589
+ "pointSource": int(pointSourcesRel[i]),
590
+ }
508
591
  )
509
- rangeR = max(maxR - minR, 1.0)
510
- scores = (
511
- 250 + 750 * (sqScores - minR) / rangeR
512
- ).astype(int)
513
- for i, idxVal in enumerate(candidateIdx):
514
- allRows.append(
515
- {
516
- "chromosome": chromosome,
517
- "start": int(starts[i]),
518
- "end": int(ends[i]),
519
- "name": f"{templateName}_{cascadeLevel}_{idxVal}_{tag}",
520
- "score": int(scores[i]),
521
- "strand": ".",
522
- "signal": float(response[idxVal]),
523
- "p_raw": float(pEmp[i]),
524
- "pointSource": int(pointSourcesRel[i]),
525
- }
526
- )
527
592
 
528
593
  if not allRows:
529
594
  logger.warning(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: consenrich
3
- Version: 0.7.1b1
3
+ Version: 0.7.1b2
4
4
  Summary: Genome-wide estimation of signals hidden in noisy multi-sample HTS datasets
5
5
  Author-email: "Nolan H. Hamilton" <nolan.hamilton@unc.edu>
6
6
  Requires-Python: >=3.11