coolbox 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of coolbox might be problematic. Click here for more details.
- coolbox/__init__.py +1 -1
- coolbox/cli.py +0 -2
- coolbox/core/browser/base.py +5 -2
- coolbox/core/coverage/__init__.py +1 -1
- coolbox/core/coverage/highlights.py +4 -4
- coolbox/core/frame/frame.py +16 -6
- coolbox/core/track/__init__.py +2 -1
- coolbox/core/track/arcs/plot.py +6 -2
- coolbox/core/track/bed/__init__.py +0 -1
- coolbox/core/track/bed/base.py +93 -85
- coolbox/core/track/bed/bed.py +37 -16
- coolbox/core/track/bed/fetch.py +1 -1
- coolbox/core/track/bed/plot.py +71 -221
- coolbox/core/track/gtf.py +11 -9
- coolbox/core/track/hicmat/base.py +12 -9
- coolbox/core/track/hicmat/cool.py +6 -5
- coolbox/core/track/hicmat/dothic.py +4 -3
- coolbox/core/track/hicmat/hicmat.py +8 -9
- coolbox/core/track/hicmat/plot.py +12 -6
- coolbox/core/track/hist/__init__.py +10 -3
- coolbox/core/track/hist/bigwig.py +0 -16
- coolbox/core/track/hist/plot.py +13 -5
- coolbox/core/track/ideogram.py +19 -10
- coolbox/core/track/pseudo.py +6 -2
- coolbox/core/track/tad.py +237 -0
- coolbox/utilities/bed.py +1 -1
- coolbox/utilities/hic/straw.py +532 -329
- coolbox/utilities/hic/wrap.py +55 -24
- {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/METADATA +20 -11
- {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/RECORD +34 -34
- {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/WHEEL +1 -1
- coolbox/core/track/bed/tad.py +0 -18
- {coolbox-0.3.7.data → coolbox-0.3.9.data}/scripts/coolbox +0 -0
- {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/LICENSE +0 -0
- {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/top_level.txt +0 -0
coolbox/core/track/bed/plot.py
CHANGED
|
@@ -13,60 +13,67 @@ from coolbox.utilities.genome import GenomeRange
|
|
|
13
13
|
log = get_logger(__name__)
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
class
|
|
17
|
-
def init_for_plot(self):
|
|
18
|
-
from matplotlib import font_manager
|
|
16
|
+
class PlotGenes(object):
|
|
19
17
|
|
|
18
|
+
def __init__(self, *args, **kwargs):
|
|
19
|
+
self.init_colormap()
|
|
20
|
+
from matplotlib import font_manager
|
|
20
21
|
properties = self.properties
|
|
21
22
|
self.len_w = None # this is the length of the letter 'w' given the font size
|
|
22
23
|
self.counter = None
|
|
23
24
|
self.small_relative = None
|
|
24
25
|
self.is_draw_labels = properties['labels'] == 'on'
|
|
25
26
|
self.fp = font_manager.FontProperties(size=properties['fontsize'])
|
|
26
|
-
# set the distance between rows
|
|
27
27
|
self.row_scale = properties['interval_height'] * 2.3
|
|
28
|
-
|
|
29
|
-
self.
|
|
30
|
-
if not matplotlib.colors.is_color_like(self.properties['color']) and self.properties['color'] != 'bed_rgb':
|
|
31
|
-
# check if the color is a valid colormap name
|
|
32
|
-
if self.properties['color'] not in matplotlib.cm.datad:
|
|
33
|
-
log.warning("*WARNING* color: '{}' for Track {} is not valid. Color has "
|
|
34
|
-
"been set to {}".format(self.properties['color'], self.properties['name'],
|
|
35
|
-
self.COLOR))
|
|
36
|
-
self.properties['color'] = self.COLOR
|
|
37
|
-
else:
|
|
38
|
-
self.colormap = self.properties['color']
|
|
28
|
+
self.cache_gr = None
|
|
29
|
+
self.cache_res = None
|
|
39
30
|
|
|
40
|
-
def
|
|
31
|
+
def fetch_plot_data(self, gr: GenomeRange, **kwargs) -> pd.DataFrame:
|
|
32
|
+
if gr == self.cache_gr:
|
|
33
|
+
return self.cache_res
|
|
34
|
+
else:
|
|
35
|
+
self.cache_gr = gr
|
|
36
|
+
self.cache_res = self.fetch_data(gr, **kwargs)
|
|
37
|
+
return self.cache_res
|
|
38
|
+
|
|
39
|
+
def get_track_height(self, frame_width, current_range):
|
|
40
|
+
props = self.properties
|
|
41
|
+
if (props.get('height', 'auto') == 'auto') and\
|
|
42
|
+
('row_height' in props) and\
|
|
43
|
+
(props.get('display', 'stacked') == 'stacked'):
|
|
44
|
+
ov_genes = self.fetch_plot_data(current_range)
|
|
45
|
+
self.plot_genes(None, current_range, ov_genes, dry_run=True, fig_width=frame_width)
|
|
46
|
+
return max(props['row_height'] * self.current_row_num, props['row_height'])
|
|
47
|
+
else:
|
|
48
|
+
try:
|
|
49
|
+
height = float(self.properties['height'])
|
|
50
|
+
except:
|
|
51
|
+
height = 1.0
|
|
52
|
+
return height
|
|
53
|
+
|
|
54
|
+
def __set_plot_params(self, gr: GenomeRange, ov_genes: pd.DataFrame):
|
|
41
55
|
properties = self.properties
|
|
42
56
|
# bed_type
|
|
43
57
|
self.properties['bed_type'] = properties['bed_type'] or self.infer_bed_type(ov_genes)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if self.colormap is not None:
|
|
54
|
-
norm = matplotlib.colors.Normalize(vmin=min_score, vmax=max_score)
|
|
55
|
-
cmap = matplotlib.cm.get_cmap(properties['color'])
|
|
56
|
-
self.colormap = matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap)
|
|
57
|
-
if properties['color'] == 'bed_rgb' and properties['bed_type'] not in ['bed12', 'bed9']:
|
|
58
|
-
log.warning("*WARNING* Color set to 'bed_rgb', but bed file does not have the rgb field. The color has "
|
|
59
|
-
"been set to {}".format(self.COLOR))
|
|
60
|
-
self.properties['color'] = self.COLOR
|
|
61
|
-
self.colormap = None
|
|
58
|
+
self.set_colormap(ov_genes)
|
|
59
|
+
# turn labels off when too many intervals are visible.
|
|
60
|
+
if properties['labels'] == 'auto':
|
|
61
|
+
if len(ov_genes) > 60:
|
|
62
|
+
self.is_draw_labels = False
|
|
63
|
+
else:
|
|
64
|
+
self.is_draw_labels = True
|
|
65
|
+
self.small_relative = 0.004 * (gr.end - gr.start)
|
|
66
|
+
self.counter = 0
|
|
62
67
|
|
|
68
|
+
def plot_genes(self, ax, gr: GenomeRange, ov_genes: pd.DataFrame, dry_run = False, fig_width = None):
|
|
69
|
+
properties = self.properties
|
|
70
|
+
self.__set_plot_params(gr, ov_genes)
|
|
63
71
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
self.is_draw_labels = False
|
|
72
|
+
assert (not dry_run) or (fig_width is not None)
|
|
73
|
+
if dry_run:
|
|
74
|
+
self.__get_length_w(fig_width, gr.start, gr.end)
|
|
75
|
+
else:
|
|
76
|
+
self.__get_length_w(ax.get_figure().get_figwidth(), gr.start, gr.end)
|
|
70
77
|
|
|
71
78
|
num_rows = properties['num_rows']
|
|
72
79
|
max_num_row_local = 1
|
|
@@ -145,43 +152,45 @@ class PlotBed(object):
|
|
|
145
152
|
max_num_row_local = free_row
|
|
146
153
|
if ypos > max_ypos:
|
|
147
154
|
max_ypos = ypos
|
|
148
|
-
|
|
149
|
-
if
|
|
150
|
-
if properties['
|
|
151
|
-
|
|
155
|
+
|
|
156
|
+
if not dry_run:
|
|
157
|
+
if properties['bed_type'] == 'bed12':
|
|
158
|
+
if properties['gene_style'] == 'flybase':
|
|
159
|
+
self.draw_gene_with_introns_flybase_style(ax, bed, ypos, rgb, edgecolor)
|
|
160
|
+
else:
|
|
161
|
+
self.draw_gene_with_introns(ax, bed, ypos, rgb, edgecolor)
|
|
152
162
|
else:
|
|
153
|
-
self.
|
|
154
|
-
else:
|
|
155
|
-
self.draw_gene_simple(ax, bed, ypos, rgb, edgecolor)
|
|
163
|
+
self.draw_gene_simple(ax, bed, ypos, rgb, edgecolor)
|
|
156
164
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
165
|
+
if self.is_draw_labels and bed.start > gr.start and bed.end < gr.end:
|
|
166
|
+
ax.text(bed.end + self.small_relative,
|
|
167
|
+
ypos + (float(properties['interval_height']) / 2),
|
|
168
|
+
bed.name,
|
|
169
|
+
horizontalalignment='left',
|
|
170
|
+
verticalalignment='center',
|
|
171
|
+
fontproperties=self.fp)
|
|
164
172
|
|
|
165
173
|
if self.counter == 0:
|
|
166
|
-
log.
|
|
174
|
+
log.debug(f"*Warning* No intervals were found for file {properties['file']} "
|
|
167
175
|
f"in Track \'{properties['name']}\' for the interval plotted ({gr}).\n")
|
|
168
176
|
|
|
169
177
|
ymax = 0
|
|
170
178
|
if num_rows:
|
|
171
179
|
ymin = float(num_rows) * self.row_scale
|
|
180
|
+
self.current_row_num = num_rows
|
|
172
181
|
else:
|
|
173
182
|
ymin = max_ypos + properties['interval_height']
|
|
183
|
+
self.current_row_num = len(row_last_position)
|
|
174
184
|
|
|
175
185
|
log.debug("ylim {},{}".format(ymin, ymax))
|
|
176
186
|
# the axis is inverted (thus, ymax < ymin)
|
|
177
|
-
|
|
187
|
+
if not dry_run:
|
|
188
|
+
ax.set_ylim(ymin, ymax)
|
|
178
189
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
elif properties['display'] == 'collapsed':
|
|
182
|
-
ax.set_ylim(-5, 105)
|
|
190
|
+
if properties['display'] == 'collapsed':
|
|
191
|
+
ax.set_ylim(-5, 105)
|
|
183
192
|
|
|
184
|
-
|
|
193
|
+
ax.set_xlim(gr.start, gr.end)
|
|
185
194
|
|
|
186
195
|
def draw_gene_with_introns(self, ax, bed, ypos, rgb, edgecolor):
|
|
187
196
|
"""
|
|
@@ -369,7 +378,7 @@ class PlotBed(object):
|
|
|
369
378
|
|
|
370
379
|
return vertices
|
|
371
380
|
|
|
372
|
-
def
|
|
381
|
+
def __get_length_w(self, fig_width, region_start, region_end):
|
|
373
382
|
"""
|
|
374
383
|
to improve the visualization of the genes it is good to have an estimation of the label
|
|
375
384
|
length. In the following code I try to get the length of a 'W' in base pairs.
|
|
@@ -410,162 +419,3 @@ class PlotBed(object):
|
|
|
410
419
|
else:
|
|
411
420
|
return free_row * self.row_scale
|
|
412
421
|
|
|
413
|
-
def get_rgb_and_edge_color(self, bed):
|
|
414
|
-
# TODO need simplification
|
|
415
|
-
rgb = self.properties['color']
|
|
416
|
-
edgecolor = self.properties['border_color']
|
|
417
|
-
|
|
418
|
-
if self.colormap:
|
|
419
|
-
# translate value field (in the example above is 0 or 0.2686...) into a color
|
|
420
|
-
rgb = self.colormap.to_rgba(bed.score)
|
|
421
|
-
|
|
422
|
-
# for tad coverage
|
|
423
|
-
if self.properties['style'] == 'tad' and self.properties['border_only'] == 'yes':
|
|
424
|
-
rgb = 'none'
|
|
425
|
-
elif self.properties['color'] == 'bed_rgb':
|
|
426
|
-
# if rgb is set in the bed line, this overrides the previously
|
|
427
|
-
# defined colormap
|
|
428
|
-
if self.properties['bed_type'] in ['bed9', 'bed12'] and len(bed.rgb) == 3:
|
|
429
|
-
try:
|
|
430
|
-
rgb = [float(x) / 255 for x in bed.rgb]
|
|
431
|
-
if 'border_color' in self.properties:
|
|
432
|
-
edgecolor = self.properties['border_color']
|
|
433
|
-
else:
|
|
434
|
-
edgecolor = self.properties['color']
|
|
435
|
-
except IndexError:
|
|
436
|
-
rgb = self.COLOR
|
|
437
|
-
else:
|
|
438
|
-
rgb = self.COLOR
|
|
439
|
-
return rgb, edgecolor
|
|
440
|
-
|
|
441
|
-
@staticmethod
|
|
442
|
-
def infer_bed_type(df: pd.DataFrame) -> Union[str, None]:
|
|
443
|
-
# bed_type of dataframe are store in dataframe's __dict__ in FetchBed.fetch_intervals
|
|
444
|
-
if 'bed_type' in df.__dict__:
|
|
445
|
-
bed_type = df.bed_type
|
|
446
|
-
else:
|
|
447
|
-
bed_types = {
|
|
448
|
-
12: 'bed12',
|
|
449
|
-
9: 'bed9',
|
|
450
|
-
6: 'bed6',
|
|
451
|
-
3: 'bed3'
|
|
452
|
-
}
|
|
453
|
-
num_col = len(df.columns)
|
|
454
|
-
bed_type = bed_types[num_col] if num_col in bed_types else 'bed3'
|
|
455
|
-
if bed_type == 'bed3' and num_col < 3:
|
|
456
|
-
raise ValueError(f"Invalid dataframe for bed3 with columns: {df.columns}")
|
|
457
|
-
return bed_type
|
|
458
|
-
|
|
459
|
-
def plot_tads(self, ax, gr: GenomeRange, tads: pd.DataFrame):
|
|
460
|
-
"""
|
|
461
|
-
Plots the boundaries as triangles in the given ax.
|
|
462
|
-
"""
|
|
463
|
-
from coolbox.core.track.hicmat import HicMatBase
|
|
464
|
-
# coverage only
|
|
465
|
-
assert 'track' in self.__dict__ and isinstance(self.track, HicMatBase), \
|
|
466
|
-
f"The parent track should be instance of {HicMatBase}"
|
|
467
|
-
|
|
468
|
-
hicmat_tri_style = (HicMatBase.STYLE_WINDOW, HicMatBase.STYLE_TRIANGULAR)
|
|
469
|
-
hicmat_ma_style = (HicMatBase.STYLE_MATRIX,)
|
|
470
|
-
|
|
471
|
-
hictrack = self.track
|
|
472
|
-
hicmat_style = hictrack.properties['style']
|
|
473
|
-
|
|
474
|
-
# TODO Should we add plotting in BigWig, BedGraph, ABCCompartment, Arcs support?(The original codes supports)
|
|
475
|
-
for region in tads.itertuples():
|
|
476
|
-
if hicmat_style in hicmat_tri_style:
|
|
477
|
-
depth = (gr.end - gr.start) / 2
|
|
478
|
-
ymax = (gr.end - gr.start)
|
|
479
|
-
self.plot_triangular(ax, gr, region, ymax, depth)
|
|
480
|
-
elif hicmat_style in hicmat_ma_style:
|
|
481
|
-
self.plot_box(ax, gr, region)
|
|
482
|
-
else:
|
|
483
|
-
raise ValueError(f"unsupported hicmat style {hicmat_style}")
|
|
484
|
-
|
|
485
|
-
if len(tads) == 0:
|
|
486
|
-
log.warning("No regions found for Coverage {}.".format(self.properties['name']))
|
|
487
|
-
|
|
488
|
-
def plot_triangular(self, ax, gr, region, ymax, depth):
|
|
489
|
-
"""
|
|
490
|
-
/\
|
|
491
|
-
/ \
|
|
492
|
-
/ \
|
|
493
|
-
_____________________
|
|
494
|
-
x1 x2 x3
|
|
495
|
-
"""
|
|
496
|
-
|
|
497
|
-
from matplotlib.patches import Polygon
|
|
498
|
-
x1 = region.start
|
|
499
|
-
x2 = x1 + float(region.end - region.start) / 2
|
|
500
|
-
x3 = region.end
|
|
501
|
-
y1 = 0
|
|
502
|
-
y2 = (region.end - region.start)
|
|
503
|
-
|
|
504
|
-
y = (y2 / ymax) * depth
|
|
505
|
-
|
|
506
|
-
rgb, edgecolor = self.get_rgb_and_edge_color(region)
|
|
507
|
-
|
|
508
|
-
triangle = Polygon(np.array([[x1, y1], [x2, y], [x3, y1]]), closed=True,
|
|
509
|
-
facecolor=rgb, edgecolor=edgecolor,
|
|
510
|
-
alpha=self.properties['alpha'],
|
|
511
|
-
linestyle=self.properties['border_style'],
|
|
512
|
-
linewidth=self.properties['border_width'])
|
|
513
|
-
ax.add_artist(triangle)
|
|
514
|
-
self.plot_score(ax, gr, region, 'triangular', ymax, depth)
|
|
515
|
-
|
|
516
|
-
def plot_box(self, ax, gr, region):
|
|
517
|
-
from matplotlib.patches import Rectangle
|
|
518
|
-
|
|
519
|
-
x1 = region.start
|
|
520
|
-
x2 = region.end
|
|
521
|
-
x = y = x1
|
|
522
|
-
w = h = (x2 - x1)
|
|
523
|
-
|
|
524
|
-
rgb, edgecolor = self.get_rgb_and_edge_color(region)
|
|
525
|
-
|
|
526
|
-
fill = self.properties['border_only'] == 'no'
|
|
527
|
-
|
|
528
|
-
rec = Rectangle((x, y), w, h,
|
|
529
|
-
fill=fill,
|
|
530
|
-
facecolor=rgb,
|
|
531
|
-
edgecolor=edgecolor,
|
|
532
|
-
alpha=self.properties['alpha'],
|
|
533
|
-
linestyle=self.properties['border_style'],
|
|
534
|
-
linewidth=self.properties['border_width'])
|
|
535
|
-
ax.add_patch(rec)
|
|
536
|
-
self.plot_score(ax, gr, region, 'box')
|
|
537
|
-
|
|
538
|
-
def plot_score(self, ax, gr, region, style, ymax=None, depth=None):
|
|
539
|
-
properties = self.properties
|
|
540
|
-
|
|
541
|
-
if properties['show_score'] != 'yes':
|
|
542
|
-
return
|
|
543
|
-
bed = region
|
|
544
|
-
score = bed.score
|
|
545
|
-
if not isinstance(score, (float, int)):
|
|
546
|
-
# score is not number not plot
|
|
547
|
-
return
|
|
548
|
-
region_length = region.end - region.start
|
|
549
|
-
if region_length / gr.length < 0.05:
|
|
550
|
-
# region too small not plot score
|
|
551
|
-
return
|
|
552
|
-
font_size = properties['score_font_size']
|
|
553
|
-
if font_size == 'auto':
|
|
554
|
-
# inference the font size
|
|
555
|
-
from math import log2
|
|
556
|
-
base_size = 18
|
|
557
|
-
s_ = (region_length / gr.length) * 10
|
|
558
|
-
s_ = int(log2(s_))
|
|
559
|
-
font_size = base_size + s_
|
|
560
|
-
ratio = properties['score_height_ratio']
|
|
561
|
-
color = properties['score_font_color']
|
|
562
|
-
if style == 'box':
|
|
563
|
-
x1 = region.start
|
|
564
|
-
x2 = region.end
|
|
565
|
-
w = x2 - x1
|
|
566
|
-
x = x2 - w * ratio
|
|
567
|
-
y = x1 + w * ratio
|
|
568
|
-
else: # triangular
|
|
569
|
-
x = region.begin + region_length * 0.4
|
|
570
|
-
y = (region_length / ymax) * depth * ratio
|
|
571
|
-
ax.text(x, y, "{0:.3f}".format(score), fontsize=font_size, color=color)
|
coolbox/core/track/gtf.py
CHANGED
|
@@ -33,7 +33,7 @@ class GTF(Track):
|
|
|
33
33
|
color : {str, 'random'}, optional
|
|
34
34
|
When the color is random, color for each gene will be randomly selected.
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
name_attr : {'auto', 'gene_name', 'gene_id', str}, optional
|
|
37
37
|
Use which attribute to show feature's name.
|
|
38
38
|
Default use 'auto'(try 'gene_name' -> 'gene_id' -> 'position_string')
|
|
39
39
|
"""
|
|
@@ -50,7 +50,7 @@ class GTF(Track):
|
|
|
50
50
|
"color": "random",
|
|
51
51
|
"row_filter": 'feature == "gene"',
|
|
52
52
|
"length_ratio_thresh": 0.005,
|
|
53
|
-
"
|
|
53
|
+
"name_attr": "auto",
|
|
54
54
|
}
|
|
55
55
|
|
|
56
56
|
def __init__(self, file, **kwargs):
|
|
@@ -112,14 +112,16 @@ class GTF(Track):
|
|
|
112
112
|
if gene_name.hasnans:
|
|
113
113
|
gene_id = df['attribute'].str.extract(".*gene_id (.*?) ").iloc[:, 0].str.strip('\";')
|
|
114
114
|
gene_name.fillna(gene_id, inplace=True)
|
|
115
|
-
if gene_name.hasnans:
|
|
116
|
-
pos_str = df['seqname'].astype(str) + ":" +\
|
|
117
|
-
df['start'].astype(str) + "-" +\
|
|
118
|
-
df['end'].astype(str)
|
|
119
|
-
gene_name.fillna(pos_str, inplace=True)
|
|
120
|
-
df['feature_name'] = gene_name
|
|
121
115
|
else:
|
|
122
|
-
|
|
116
|
+
gene_name = df['attribute'].str.extract(f".*{name_attr} (.*?)(?:[ ;])").iloc[:, 0].str.strip('\";')
|
|
117
|
+
|
|
118
|
+
if gene_name.hasnans:
|
|
119
|
+
pos_str = df['seqname'].astype(str) + ":" +\
|
|
120
|
+
df['start'].astype(str) + "-" +\
|
|
121
|
+
df['end'].astype(str)
|
|
122
|
+
gene_name.fillna(pos_str, inplace=True)
|
|
123
|
+
|
|
124
|
+
df['feature_name'] = gene_name
|
|
123
125
|
return df
|
|
124
126
|
|
|
125
127
|
def plot(self, ax, gr: GenomeRange, **kwargs):
|
|
@@ -54,6 +54,10 @@ class HicMatBase(Track, PlotHiCMat, ProcessHicMat):
|
|
|
54
54
|
process_func : {callable, str, False}, optional
|
|
55
55
|
Process matrix with a user-defined function(receive a matrix, return a processed matrix). default False.
|
|
56
56
|
|
|
57
|
+
aspect_ratio : {'equal', 'auto'}, optional
|
|
58
|
+
When set to 'equal', it ensures that matrix pixels are actually squares. When set to 'auto', it allows
|
|
59
|
+
matrix pixels to be stretched to completely fill the subplot. Ignored when height parameter is
|
|
60
|
+
provided. default 'equal'.
|
|
57
61
|
|
|
58
62
|
"""
|
|
59
63
|
|
|
@@ -77,6 +81,7 @@ class HicMatBase(Track, PlotHiCMat, ProcessHicMat):
|
|
|
77
81
|
'height': 'hic_auto',
|
|
78
82
|
'cmap': "JuiceBoxLike",
|
|
79
83
|
"color_bar": "vertical",
|
|
84
|
+
"aspect_ratio": "equal",
|
|
80
85
|
"max_value": "auto",
|
|
81
86
|
"min_value": "auto",
|
|
82
87
|
"depth_ratio": DEPTH_FULL,
|
|
@@ -93,7 +98,7 @@ class HicMatBase(Track, PlotHiCMat, ProcessHicMat):
|
|
|
93
98
|
self.fetched_gr = None
|
|
94
99
|
self.fetched_gr2 = None
|
|
95
100
|
|
|
96
|
-
def fetch_data(self, gr: GenomeRange, **kwargs) -> np.ndarray:
|
|
101
|
+
def fetch_data(self, gr: GenomeRange, gr2=None, **kwargs) -> np.ndarray:
|
|
97
102
|
"""
|
|
98
103
|
Fetch the raw matrix should be plotted. Normally it's a matrix with raw contacts
|
|
99
104
|
|
|
@@ -109,7 +114,7 @@ class HicMatBase(Track, PlotHiCMat, ProcessHicMat):
|
|
|
109
114
|
"""
|
|
110
115
|
raise NotImplementedError
|
|
111
116
|
|
|
112
|
-
def fetch_plot_data(self, gr: GenomeRange, **kwargs) -> np.ndarray:
|
|
117
|
+
def fetch_plot_data(self, gr: GenomeRange, gr2=None, **kwargs) -> np.ndarray:
|
|
113
118
|
"""
|
|
114
119
|
Fetch 2d contact matrix, the matrix may be processed in case
|
|
115
120
|
'transform', 'normalize', 'gaussian_sigma', 'process_func' exits in properties.
|
|
@@ -127,17 +132,15 @@ class HicMatBase(Track, PlotHiCMat, ProcessHicMat):
|
|
|
127
132
|
matrix : np.array
|
|
128
133
|
Processed hic matrix used for plotting.
|
|
129
134
|
"""
|
|
130
|
-
gr2 = kwargs.get('gr2')
|
|
131
135
|
if self.properties['style'] == self.STYLE_WINDOW and not kwargs.get("gr_updated", False):
|
|
132
136
|
gr, gr2 = self.fetch_window_genome_range(gr, gr2)
|
|
133
|
-
|
|
134
|
-
arr = self.fetch_data(gr, **kwargs)
|
|
137
|
+
arr = self.fetch_data(gr, gr2=gr2, **kwargs)
|
|
135
138
|
# store fetched_gr
|
|
136
139
|
self.fetched_gr = gr
|
|
137
140
|
self.fetched_gr2 = gr2
|
|
138
141
|
return self.process_matrix(arr)
|
|
139
142
|
|
|
140
|
-
def plot(self, ax, gr: GenomeRange, **kwargs):
|
|
143
|
+
def plot(self, ax, gr: GenomeRange, gr2=None, **kwargs):
|
|
141
144
|
"""
|
|
142
145
|
Plot matrix
|
|
143
146
|
|
|
@@ -148,10 +151,10 @@ class HicMatBase(Track, PlotHiCMat, ProcessHicMat):
|
|
|
148
151
|
"""
|
|
149
152
|
self.ax = ax
|
|
150
153
|
# fetch processed plot_data
|
|
151
|
-
self.matrix = self.fetch_plot_data(gr, **kwargs)
|
|
154
|
+
self.matrix = self.fetch_plot_data(gr, gr2=gr2, **kwargs)
|
|
152
155
|
# plot matrix
|
|
153
|
-
img = self.plot_matrix(gr,
|
|
154
|
-
self.adjust_figure(gr,
|
|
156
|
+
img = self.plot_matrix(gr, gr2)
|
|
157
|
+
self.adjust_figure(gr, gr2)
|
|
155
158
|
self.draw_colorbar(img)
|
|
156
159
|
self.plot_label()
|
|
157
160
|
|
|
@@ -34,18 +34,19 @@ class Cool(HicMatBase):
|
|
|
34
34
|
})
|
|
35
35
|
super().__init__(**properties)
|
|
36
36
|
|
|
37
|
-
def fetch_data(self, gr: GenomeRange, **kwargs) -> np.ndarray:
|
|
37
|
+
def fetch_data(self, gr: GenomeRange, gr2=None, **kwargs) -> np.ndarray:
|
|
38
38
|
from coolbox.utilities.hic.wrap import CoolerWrap
|
|
39
39
|
|
|
40
40
|
path = self.properties['file']
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
binsize = kwargs.get('resolution', self.properties.get('resolution', 'auto'))
|
|
42
|
+
wrap = CoolerWrap(path, balance=self.balance, binsize=binsize)
|
|
43
|
+
arr = wrap.fetch(gr, gr2)
|
|
43
44
|
|
|
44
45
|
self.fetched_binsize = wrap.fetched_binsize # expose fetched binsize
|
|
45
46
|
|
|
46
47
|
return self.fill_zero_nan(arr)
|
|
47
48
|
|
|
48
|
-
def fetch_pixels(self, gr: GenomeRange, **kwargs):
|
|
49
|
+
def fetch_pixels(self, gr: GenomeRange, gr2=None, **kwargs):
|
|
49
50
|
"""
|
|
50
51
|
Fetch the pixels table of upper triangle of the original contact matrix(not processed).
|
|
51
52
|
|
|
@@ -79,7 +80,7 @@ class Cool(HicMatBase):
|
|
|
79
80
|
balance = kwargs.get('balance', self.is_balance)
|
|
80
81
|
wrap = CoolerWrap(path, balance=balance, binsize=kwargs.get('resolution', 'auto'))
|
|
81
82
|
|
|
82
|
-
return wrap.fetch_pixels(gr,
|
|
83
|
+
return wrap.fetch_pixels(gr, gr2, join=kwargs.get('join', True))
|
|
83
84
|
|
|
84
85
|
def infer_binsize(self, gr: GenomeRange, **kwargs) -> int:
|
|
85
86
|
from coolbox.utilities.hic.wrap import CoolerWrap
|
|
@@ -33,13 +33,14 @@ class DotHiC(HicMatBase):
|
|
|
33
33
|
})
|
|
34
34
|
super().__init__(**properties)
|
|
35
35
|
|
|
36
|
-
def fetch_data(self,
|
|
36
|
+
def fetch_data(self, gr, gr2=None, **kwargs) -> np.ndarray:
|
|
37
37
|
from coolbox.utilities.hic.wrap import StrawWrap
|
|
38
38
|
|
|
39
39
|
path = self.properties['file']
|
|
40
|
-
|
|
40
|
+
binsize = kwargs.get('resolution', self.properties.get('resolution', 'auto'))
|
|
41
|
+
wrap = StrawWrap(path, normalization=self.balance, binsize=binsize)
|
|
41
42
|
|
|
42
|
-
arr = wrap.fetch(
|
|
43
|
+
arr = wrap.fetch(gr, gr2)
|
|
43
44
|
|
|
44
45
|
self.fetched_binsize = wrap.fetched_binsize # expose fetched binsize
|
|
45
46
|
|
|
@@ -6,20 +6,19 @@ from .cool import Cool
|
|
|
6
6
|
from .dothic import DotHiC
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
def HiCMat(
|
|
9
|
+
def HiCMat(file: Union[str, HicMatBase], *args, **kwargs) -> HicMatBase:
|
|
10
10
|
"""
|
|
11
11
|
Compose DotHic or Cool track automatically based on tpye of file extension (.cool, .mcool, .hic)
|
|
12
12
|
"""
|
|
13
|
-
if isinstance(
|
|
14
|
-
return
|
|
15
|
-
elif not Path(
|
|
13
|
+
if isinstance(file, HicMatBase):
|
|
14
|
+
return file
|
|
15
|
+
elif not Path(file).is_file():
|
|
16
16
|
raise ValueError("The file path does not exist.")
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
return Cool(file_or_hicmat, *args, **kwargs)
|
|
18
|
+
if file.endswith(".hic"):
|
|
19
|
+
return DotHiC(file, *args, **kwargs)
|
|
20
|
+
elif file.endswith((".cool", ".mcool")):
|
|
21
|
+
return Cool(file, *args, **kwargs)
|
|
23
22
|
else:
|
|
24
23
|
raise NotImplementedError(f"File type of {p} not supported for HicMat. "
|
|
25
24
|
f"The file type should be one of .cool/.mcool/.hic")
|
|
@@ -58,6 +58,11 @@ class PlotHiCMat(object):
|
|
|
58
58
|
ax = self.ax
|
|
59
59
|
arr = self.matrix
|
|
60
60
|
c_min, c_max = self.matrix_val_range
|
|
61
|
+
|
|
62
|
+
aspect = self.properties['aspect_ratio']
|
|
63
|
+
if self.properties.get('height'):
|
|
64
|
+
aspect = 'auto'
|
|
65
|
+
|
|
61
66
|
if gr2 is None and self.style == self.STYLE_TRIANGULAR:
|
|
62
67
|
# triangular style
|
|
63
68
|
scale_r = 1 / math.sqrt(2)
|
|
@@ -71,7 +76,7 @@ class PlotHiCMat(object):
|
|
|
71
76
|
img = ax.matshow(arr, cmap=cmap,
|
|
72
77
|
transform=tr + ax.transData,
|
|
73
78
|
extent=(gr.start, gr.end, gr.start, gr.end),
|
|
74
|
-
aspect=
|
|
79
|
+
aspect=aspect)
|
|
75
80
|
elif gr2 is None and self.style == self.STYLE_WINDOW:
|
|
76
81
|
# window style
|
|
77
82
|
# exist in HicMatBase
|
|
@@ -88,14 +93,14 @@ class PlotHiCMat(object):
|
|
|
88
93
|
img = ax.matshow(arr, cmap=cmap,
|
|
89
94
|
transform=tr + ax.transData,
|
|
90
95
|
extent=(gr.start, gr.end, gr.start, gr.end),
|
|
91
|
-
aspect=
|
|
96
|
+
aspect=aspect)
|
|
92
97
|
else:
|
|
93
98
|
if gr2 is None:
|
|
94
99
|
gr2 = gr
|
|
95
100
|
# matrix style
|
|
96
101
|
img = ax.matshow(arr, cmap=cmap,
|
|
97
102
|
extent=(gr.start, gr.end, gr2.end, gr2.start),
|
|
98
|
-
aspect=
|
|
103
|
+
aspect=aspect)
|
|
99
104
|
|
|
100
105
|
if self.norm == 'log':
|
|
101
106
|
img.set_norm(colors.LogNorm(vmin=c_min, vmax=c_max))
|
|
@@ -170,7 +175,7 @@ class PlotHiCMat(object):
|
|
|
170
175
|
|
|
171
176
|
c_bar.ax.yaxis.set_ticks_position('left')
|
|
172
177
|
|
|
173
|
-
def get_track_height(self, frame_width):
|
|
178
|
+
def get_track_height(self, frame_width, *args):
|
|
174
179
|
"""
|
|
175
180
|
calculate track height dynamically.
|
|
176
181
|
"""
|
|
@@ -182,7 +187,7 @@ class PlotHiCMat(object):
|
|
|
182
187
|
else:
|
|
183
188
|
height = frame_width * 0.5
|
|
184
189
|
else:
|
|
185
|
-
height = frame_width
|
|
190
|
+
height = frame_width
|
|
186
191
|
|
|
187
192
|
if (
|
|
188
193
|
'depth_ratio' in self.properties
|
|
@@ -192,7 +197,8 @@ class PlotHiCMat(object):
|
|
|
192
197
|
height = height * self.properties['depth_ratio']
|
|
193
198
|
|
|
194
199
|
if 'color_bar' in self.properties and self.properties['color_bar'] != 'no':
|
|
195
|
-
|
|
200
|
+
if self.properties["aspect_ratio"] != 'equal':
|
|
201
|
+
height += 1.5
|
|
196
202
|
|
|
197
203
|
return height
|
|
198
204
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from .base import HistBase
|
|
2
2
|
from .bedgraph import BedGraph
|
|
3
|
-
from .bigwig import BigWig
|
|
3
|
+
from .bigwig import BigWig
|
|
4
4
|
from .hicfeature import DiScore, InsuScore, Virtual4C
|
|
5
5
|
from .bam import BAMCov
|
|
6
6
|
from .snp import SNP
|
|
@@ -12,11 +12,18 @@ def Hist(file, *args, **kwargs) -> HistBase:
|
|
|
12
12
|
"""
|
|
13
13
|
if file.endswith((".bw", ".bigwig", ".bigWig")):
|
|
14
14
|
return BigWig(file, *args, **kwargs)
|
|
15
|
-
elif file.endswith(('.bedgraph', ".bg", ".bedgraph.bgz", ".bg.bgz")):
|
|
15
|
+
elif file.endswith(('.bedgraph', ".bg", ".bedgraph.bgz", ".bg.bgz", ".bedGraph", "bedGraph.bgz")):
|
|
16
16
|
return BedGraph(file, *args, **kwargs)
|
|
17
17
|
elif file.endswith((".snp", ".vcf")):
|
|
18
18
|
return SNP(file, *args, **kwargs)
|
|
19
|
-
elif file.endswith(".bam", ".sam"):
|
|
19
|
+
elif file.endswith((".bam", ".sam")):
|
|
20
20
|
return BAMCov(file, *args, **kwargs)
|
|
21
21
|
else:
|
|
22
22
|
raise NotImplementedError("Hist only support .bigwig or .bedgraph file now.")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def ABCompartment(file, *args, **kwargs):
|
|
26
|
+
kwargs['threshold'] = 0
|
|
27
|
+
kwargs['color'] = '#0000ff'
|
|
28
|
+
kwargs['threshold_color'] = '#ff0000'
|
|
29
|
+
return Hist(file, *args, **kwargs)
|
|
@@ -104,19 +104,3 @@ class BigWig(HistBase):
|
|
|
104
104
|
"{}\n\nPlease check that the chromosome name is part of the bigwig file "
|
|
105
105
|
"and that the region is valid".format(str(genome_range), self.properties['file']))
|
|
106
106
|
|
|
107
|
-
|
|
108
|
-
class ABCompartment(BigWig):
|
|
109
|
-
"""
|
|
110
|
-
A/B Compartment BigWig track.
|
|
111
|
-
"""
|
|
112
|
-
|
|
113
|
-
DEFAULT_PROPERTIES = {
|
|
114
|
-
"color": "#66ccff",
|
|
115
|
-
"threshold_color": "#ff9c9c",
|
|
116
|
-
"threshold": 0,
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
def __init__(self, file, **kwargs):
|
|
120
|
-
properties = ABCompartment.DEFAULT_PROPERTIES.copy()
|
|
121
|
-
properties.update(kwargs)
|
|
122
|
-
super().__init__(file, **properties)
|