coolbox 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of coolbox might be problematic. Click here for more details.
- coolbox/__init__.py +1 -1
- coolbox/cli.py +0 -2
- coolbox/core/browser/base.py +5 -2
- coolbox/core/coverage/__init__.py +1 -1
- coolbox/core/coverage/highlights.py +4 -4
- coolbox/core/frame/frame.py +16 -6
- coolbox/core/track/__init__.py +2 -1
- coolbox/core/track/arcs/plot.py +6 -2
- coolbox/core/track/bed/__init__.py +0 -1
- coolbox/core/track/bed/base.py +93 -85
- coolbox/core/track/bed/bed.py +37 -16
- coolbox/core/track/bed/fetch.py +1 -1
- coolbox/core/track/bed/plot.py +71 -221
- coolbox/core/track/gtf.py +11 -9
- coolbox/core/track/hicmat/base.py +12 -9
- coolbox/core/track/hicmat/cool.py +6 -5
- coolbox/core/track/hicmat/dothic.py +4 -3
- coolbox/core/track/hicmat/hicmat.py +8 -9
- coolbox/core/track/hicmat/plot.py +12 -6
- coolbox/core/track/hist/__init__.py +10 -3
- coolbox/core/track/hist/bigwig.py +0 -16
- coolbox/core/track/hist/plot.py +13 -5
- coolbox/core/track/ideogram.py +19 -10
- coolbox/core/track/pseudo.py +6 -2
- coolbox/core/track/tad.py +237 -0
- coolbox/utilities/bed.py +1 -1
- coolbox/utilities/hic/straw.py +532 -329
- coolbox/utilities/hic/wrap.py +55 -24
- {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/METADATA +20 -11
- {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/RECORD +34 -34
- {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/WHEEL +1 -1
- coolbox/core/track/bed/tad.py +0 -18
- {coolbox-0.3.7.data → coolbox-0.3.9.data}/scripts/coolbox +0 -0
- {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/LICENSE +0 -0
- {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/top_level.txt +0 -0
coolbox/core/track/hist/plot.py
CHANGED
|
@@ -55,10 +55,13 @@ class PlotHist(object):
|
|
|
55
55
|
color = self.properties['color']
|
|
56
56
|
size = self.properties['size']
|
|
57
57
|
alpha = self.properties['alpha']
|
|
58
|
+
fmt = self.properties.get('fmt')
|
|
59
|
+
if fmt == '-':
|
|
60
|
+
fmt = '.'
|
|
58
61
|
mask = values > threshold
|
|
59
|
-
ax.scatter(indexes[mask], values[mask], s=size, alpha=alpha, c=self.properties.get('threshold_color', color))
|
|
62
|
+
ax.scatter(indexes[mask], values[mask], marker=fmt, s=size, alpha=alpha, c=self.properties.get('threshold_color', color))
|
|
60
63
|
mask = ~mask
|
|
61
|
-
ax.scatter(indexes[mask], values[mask], s=size, alpha=alpha, c=color)
|
|
64
|
+
ax.scatter(indexes[mask], values[mask], marker=fmt, s=size, alpha=alpha, c=color)
|
|
62
65
|
|
|
63
66
|
def plot_line(self, ax, indexes, values):
|
|
64
67
|
# reference for plotting with threshold: https://stackoverflow.com/a/30122991/10336496
|
|
@@ -71,10 +74,15 @@ class PlotHist(object):
|
|
|
71
74
|
alpha = self.properties.get("alpha", 1.0)
|
|
72
75
|
threshold = float(self.properties.get("threshold"))
|
|
73
76
|
threshold_color = self.properties.get("threshold_color")
|
|
74
|
-
ax.plot(indexes, values, fmt, linewidth=line_width, color=color, alpha=alpha)
|
|
75
77
|
if threshold and np.sum(values > threshold) > 0:
|
|
76
|
-
|
|
77
|
-
|
|
78
|
+
masked_g = values.copy()
|
|
79
|
+
masked_g[masked_g < threshold] = np.nan
|
|
80
|
+
ax.plot(indexes, masked_g, fmt, linewidth=line_width, color=threshold_color, alpha=alpha)
|
|
81
|
+
masked_l = values.copy()
|
|
82
|
+
masked_l[masked_l >= threshold] = np.nan
|
|
83
|
+
ax.plot(indexes, masked_l, fmt, linewidth=line_width, color=color, alpha=alpha)
|
|
84
|
+
else:
|
|
85
|
+
ax.plot(indexes, values, fmt, linewidth=line_width, color=color, alpha=alpha)
|
|
78
86
|
|
|
79
87
|
def plot_stairs(self, ax, gr, values, fill=True):
|
|
80
88
|
if len(values.shape) == 2:
|
coolbox/core/track/ideogram.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
|
-
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
3
|
|
|
4
4
|
from coolbox.utilities import (
|
|
5
5
|
get_logger, GenomeRange, file_to_intervaltree, hex2rgb,
|
|
@@ -24,7 +24,7 @@ class Ideogram(Track):
|
|
|
24
24
|
color_scheme : dict, optional
|
|
25
25
|
Color scheme of ideogram, default: Ideogram.DEFAULT_COLOR_SCHEME
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
show_band_name : bool, optional
|
|
28
28
|
Show band name or not. default True.
|
|
29
29
|
|
|
30
30
|
font_size : int, optional
|
|
@@ -55,7 +55,7 @@ class Ideogram(Track):
|
|
|
55
55
|
properties_dict = {
|
|
56
56
|
'file': file_,
|
|
57
57
|
'color_scheme': Ideogram.DEFAULT_COLOR_SCHEME,
|
|
58
|
-
'
|
|
58
|
+
'show_band_name': True,
|
|
59
59
|
'font_size': Ideogram.DEFAULT_FONT_SIZE,
|
|
60
60
|
'border_color': '#000000',
|
|
61
61
|
'border_width': Ideogram.DEFAULT_BORDER_WIDTH,
|
|
@@ -83,7 +83,7 @@ class Ideogram(Track):
|
|
|
83
83
|
start, end = itv.begin, itv.end
|
|
84
84
|
band_name, band_type = itv.data[:2]
|
|
85
85
|
rows.append([gr.chrom, start, end, band_name, band_type])
|
|
86
|
-
fields = ['chrom', 'start', 'end', '
|
|
86
|
+
fields = ['chrom', 'start', 'end', 'band_name', 'band_type']
|
|
87
87
|
return pd.DataFrame(rows, columns=fields)
|
|
88
88
|
|
|
89
89
|
def plot(self, ax, gr: GenomeRange, **kwargs):
|
|
@@ -101,19 +101,28 @@ class Ideogram(Track):
|
|
|
101
101
|
self.properties['show_band_name'] != 'no'
|
|
102
102
|
and gr.length < 80_000_000
|
|
103
103
|
):
|
|
104
|
-
self.plot_text(band_name, start, end, band_color)
|
|
105
|
-
coll =
|
|
106
|
-
|
|
107
|
-
|
|
104
|
+
self.plot_text(band_name, start, end, gr, band_color)
|
|
105
|
+
coll = plt.broken_barh(
|
|
106
|
+
xranges, (0, band_height), facecolors=colors,
|
|
107
|
+
linewidth=self.properties['border_width'],
|
|
108
|
+
edgecolor=self.properties['border_color'])
|
|
108
109
|
ax.add_collection(coll)
|
|
109
110
|
ax.set_ylim(-0.1, band_height + 0.1)
|
|
110
111
|
ax.set_xlim(gr.start, gr.end)
|
|
111
112
|
self.plot_label()
|
|
112
113
|
|
|
113
|
-
def plot_text(self, band_name, start, end, band_color):
|
|
114
|
+
def plot_text(self, band_name, start, end, gr, band_color):
|
|
114
115
|
band_height = self.properties['height']
|
|
115
116
|
x_pos = start + (end - start) * 0.15
|
|
117
|
+
if x_pos < gr.start:
|
|
118
|
+
x_pos = gr.start + 0.5 * (end - gr.start)
|
|
119
|
+
if (end - gr.start) < gr.length * 0.10:
|
|
120
|
+
return
|
|
121
|
+
elif x_pos > gr.end:
|
|
122
|
+
x_pos = start + 0.5 * (gr.end - start)
|
|
123
|
+
if (gr.end - start) < gr.length * 0.10:
|
|
124
|
+
return
|
|
116
125
|
y_pos = band_height / 2
|
|
117
126
|
rgb = hex2rgb(band_color) if isinstance(band_color, str) else band_color
|
|
118
|
-
color = '#e2e2e2' if sum(rgb) <
|
|
127
|
+
color = '#e2e2e2' if sum(rgb) < 200 else '#000000'
|
|
119
128
|
self.ax.text(x_pos, y_pos, band_name, fontsize=self.properties['font_size'], color=color)
|
coolbox/core/track/pseudo.py
CHANGED
|
@@ -128,11 +128,15 @@ class XAxis(Track):
|
|
|
128
128
|
|
|
129
129
|
ax.set_xlim(gr.start, gr.end)
|
|
130
130
|
ticks = ax.get_xticks()
|
|
131
|
-
|
|
131
|
+
|
|
132
|
+
if ticks[-1] - ticks[1] <= 1000:
|
|
133
|
+
labels = ["{:.0f}".format((x))
|
|
134
|
+
for x in ticks]
|
|
135
|
+
labels[-2] += " bp"
|
|
136
|
+
elif ticks[-1] - ticks[1] <= 1e5:
|
|
132
137
|
labels = ["{:,.0f}".format((x / 1e3))
|
|
133
138
|
for x in ticks]
|
|
134
139
|
labels[-2] += " Kb"
|
|
135
|
-
|
|
136
140
|
elif 1e5 < ticks[-1] - ticks[1] < 4e6:
|
|
137
141
|
labels = ["{:,.0f}".format((x / 1e3))
|
|
138
142
|
for x in ticks]
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
from coolbox.utilities import (
|
|
5
|
+
get_logger
|
|
6
|
+
)
|
|
7
|
+
from coolbox.utilities.genome import GenomeRange
|
|
8
|
+
from .bed.base import BedBase
|
|
9
|
+
from .bed.fetch import FetchBed
|
|
10
|
+
|
|
11
|
+
log = get_logger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PlotTAD(object):
|
|
15
|
+
def __init__(self):
|
|
16
|
+
self.init_colormap()
|
|
17
|
+
self.cache_gr = None
|
|
18
|
+
self.cache_res = None
|
|
19
|
+
|
|
20
|
+
def fetch_plot_data(self, gr: GenomeRange, **kwargs):
|
|
21
|
+
if gr == self.cache_gr:
|
|
22
|
+
return self.cache_res
|
|
23
|
+
else:
|
|
24
|
+
self.cache_gr = gr
|
|
25
|
+
self.cache_res = self.fetch_data(gr, **kwargs)
|
|
26
|
+
return self.cache_res
|
|
27
|
+
|
|
28
|
+
def get_depth_ratio(self, gr=None):
|
|
29
|
+
if 'depth_ratio' not in self.properties:
|
|
30
|
+
return 1.0
|
|
31
|
+
dr = self.properties['depth_ratio']
|
|
32
|
+
if dr == 'full':
|
|
33
|
+
return 1.0
|
|
34
|
+
if dr == 'auto':
|
|
35
|
+
assert gr is not None
|
|
36
|
+
min_dr = 0.1
|
|
37
|
+
tads = self.fetch_plot_data(gr)
|
|
38
|
+
tads = tads[(tads['start'] >= gr.start) & (tads['end'] <= gr.end)]
|
|
39
|
+
if tads.shape[0] > 0:
|
|
40
|
+
dr = (tads['end'] - tads['start']).max() / (gr.end - gr.start)
|
|
41
|
+
return max(dr, min_dr)
|
|
42
|
+
else:
|
|
43
|
+
return min_dr
|
|
44
|
+
return dr
|
|
45
|
+
|
|
46
|
+
def get_track_height(self, frame_width, gr):
|
|
47
|
+
height = frame_width * 0.5
|
|
48
|
+
height *= self.get_depth_ratio(gr)
|
|
49
|
+
return height
|
|
50
|
+
|
|
51
|
+
def plot_tads(self, ax, gr: GenomeRange, tads: pd.DataFrame):
|
|
52
|
+
"""
|
|
53
|
+
Plots the boundaries as triangles in the given ax.
|
|
54
|
+
"""
|
|
55
|
+
self.set_colormap(tads)
|
|
56
|
+
depth = (gr.end - gr.start) / 2
|
|
57
|
+
ymax = (gr.end - gr.start)
|
|
58
|
+
if 'track' in self.__dict__:
|
|
59
|
+
from coolbox.core.track.hicmat import HicMatBase
|
|
60
|
+
assert isinstance(self.track, HicMatBase), f"The parent track should be instance of {HicMatBase}"
|
|
61
|
+
|
|
62
|
+
hicmat_tri_style = (HicMatBase.STYLE_WINDOW, HicMatBase.STYLE_TRIANGULAR)
|
|
63
|
+
hicmat_ma_style = (HicMatBase.STYLE_MATRIX,)
|
|
64
|
+
|
|
65
|
+
hictrack = self.track
|
|
66
|
+
hicmat_style = hictrack.properties['style']
|
|
67
|
+
|
|
68
|
+
# TODO Should we add plotting in BigWig, BedGraph, ABCCompartment, Arcs support?(The original codes supports)
|
|
69
|
+
for region in tads.itertuples():
|
|
70
|
+
if hicmat_style in hicmat_tri_style:
|
|
71
|
+
self.plot_triangular(ax, gr, region, ymax, depth)
|
|
72
|
+
elif hicmat_style in hicmat_ma_style:
|
|
73
|
+
self.plot_box(ax, gr, region)
|
|
74
|
+
else:
|
|
75
|
+
raise ValueError(f"unsupported hicmat style {hicmat_style}")
|
|
76
|
+
else:
|
|
77
|
+
for region in tads.itertuples():
|
|
78
|
+
self.plot_triangular(ax, gr, region, ymax, depth)
|
|
79
|
+
dr = self.get_depth_ratio(gr)
|
|
80
|
+
if self.properties['orientation'] == 'inverted':
|
|
81
|
+
ax.set_ylim(depth * dr, 0)
|
|
82
|
+
else:
|
|
83
|
+
ax.set_ylim(0, depth * dr)
|
|
84
|
+
ax.set_xlim(gr.start, gr.end)
|
|
85
|
+
|
|
86
|
+
if len(tads) == 0:
|
|
87
|
+
log.debug("No regions found for Coverage {}.".format(self.properties['name']))
|
|
88
|
+
|
|
89
|
+
def plot_triangular(self, ax, gr, region, ymax, depth):
|
|
90
|
+
"""
|
|
91
|
+
/\
|
|
92
|
+
/ \
|
|
93
|
+
/ \
|
|
94
|
+
_____________________
|
|
95
|
+
x1 x2 x3
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
from matplotlib.patches import Polygon
|
|
99
|
+
x1 = region.start
|
|
100
|
+
x2 = x1 + float(region.end - region.start) / 2
|
|
101
|
+
x3 = region.end
|
|
102
|
+
y1 = 0
|
|
103
|
+
y2 = (region.end - region.start)
|
|
104
|
+
|
|
105
|
+
y = (y2 / ymax) * depth
|
|
106
|
+
|
|
107
|
+
rgb, edgecolor = self.get_rgb_and_edge_color(region)
|
|
108
|
+
|
|
109
|
+
triangle = Polygon(np.array([[x1, y1], [x2, y], [x3, y1]]), closed=True,
|
|
110
|
+
facecolor=rgb, edgecolor=edgecolor,
|
|
111
|
+
alpha=self.properties['alpha'],
|
|
112
|
+
linestyle=self.properties['border_style'],
|
|
113
|
+
linewidth=self.properties['border_width'])
|
|
114
|
+
ax.add_artist(triangle)
|
|
115
|
+
self.plot_score(ax, gr, region, 'triangular', ymax, depth)
|
|
116
|
+
|
|
117
|
+
def plot_box(self, ax, gr, region):
|
|
118
|
+
from matplotlib.patches import Rectangle
|
|
119
|
+
|
|
120
|
+
x1 = region.start
|
|
121
|
+
x2 = region.end
|
|
122
|
+
x = y = x1
|
|
123
|
+
w = h = (x2 - x1)
|
|
124
|
+
|
|
125
|
+
rgb, edgecolor = self.get_rgb_and_edge_color(region)
|
|
126
|
+
|
|
127
|
+
fill = self.properties['border_only'] == 'no'
|
|
128
|
+
|
|
129
|
+
rec = Rectangle((x, y), w, h,
|
|
130
|
+
fill=fill,
|
|
131
|
+
facecolor=rgb,
|
|
132
|
+
edgecolor=edgecolor,
|
|
133
|
+
alpha=self.properties['alpha'],
|
|
134
|
+
linestyle=self.properties['border_style'],
|
|
135
|
+
linewidth=self.properties['border_width'])
|
|
136
|
+
ax.add_patch(rec)
|
|
137
|
+
self.plot_score(ax, gr, region, 'box')
|
|
138
|
+
|
|
139
|
+
def plot_score(self, ax, gr, region, style, ymax=None, depth=None):
|
|
140
|
+
properties = self.properties
|
|
141
|
+
|
|
142
|
+
if properties['show_score'] != 'yes':
|
|
143
|
+
return
|
|
144
|
+
bed = region
|
|
145
|
+
score = bed.score
|
|
146
|
+
if not isinstance(score, (float, int)):
|
|
147
|
+
# score is not number not plot
|
|
148
|
+
return
|
|
149
|
+
region_length = region.end - region.start
|
|
150
|
+
if region_length / gr.length < 0.05:
|
|
151
|
+
# region too small not plot score
|
|
152
|
+
return
|
|
153
|
+
font_size = properties['score_font_size']
|
|
154
|
+
if font_size == 'auto':
|
|
155
|
+
# inference the font size
|
|
156
|
+
from math import log2
|
|
157
|
+
base_size = 18
|
|
158
|
+
s_ = (region_length / gr.length) * 10
|
|
159
|
+
s_ = int(log2(s_))
|
|
160
|
+
font_size = base_size + s_
|
|
161
|
+
ratio = properties['score_height_ratio']
|
|
162
|
+
color = properties['score_font_color']
|
|
163
|
+
if style == 'box':
|
|
164
|
+
x1 = region.start
|
|
165
|
+
x2 = region.end
|
|
166
|
+
w = x2 - x1
|
|
167
|
+
x = x2 - w * ratio
|
|
168
|
+
y = x1 + w * ratio
|
|
169
|
+
else: # triangular
|
|
170
|
+
x = region.begin + region_length * 0.4
|
|
171
|
+
y = (region_length / ymax) * depth * ratio
|
|
172
|
+
ax.text(x, y, "{0:.3f}".format(score), fontsize=font_size, color=color)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class TAD(BedBase, PlotTAD, FetchBed):
|
|
176
|
+
"""
|
|
177
|
+
Tad tack from bed file
|
|
178
|
+
|
|
179
|
+
Parameters
|
|
180
|
+
----------
|
|
181
|
+
border_style: str, optional
|
|
182
|
+
Border style of tad. (Default: 'solid')
|
|
183
|
+
|
|
184
|
+
border_width: int, optional
|
|
185
|
+
Border width of tad. (Default: '2.0')
|
|
186
|
+
|
|
187
|
+
show_score : bool
|
|
188
|
+
Show bed score or not.
|
|
189
|
+
default False.
|
|
190
|
+
|
|
191
|
+
score_font_size : {'auto', int}
|
|
192
|
+
Score text font size.
|
|
193
|
+
default 'auto'
|
|
194
|
+
|
|
195
|
+
score_font_color : str
|
|
196
|
+
Score text color.
|
|
197
|
+
default '#000000'
|
|
198
|
+
|
|
199
|
+
score_height_ratio : float
|
|
200
|
+
(text tag height) / (TAD height). used for adjust the position of Score text.
|
|
201
|
+
default 0.5
|
|
202
|
+
|
|
203
|
+
border_only : bool
|
|
204
|
+
Only show border, default False
|
|
205
|
+
|
|
206
|
+
depth_ratio : {float, 'auto', 'full'}
|
|
207
|
+
Depth ratio of triangular, use 'full' for full depth, use 'auto' for calculate depth by current genome_range. default 'auto'.
|
|
208
|
+
|
|
209
|
+
orientation : {'normal', 'inverted'}
|
|
210
|
+
Invert y-axis or not, default 'normal'
|
|
211
|
+
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
DEFAULT_PROPERTIES = {
|
|
215
|
+
'alpha': 0.3,
|
|
216
|
+
'border_style': "--",
|
|
217
|
+
'border_width': 2.0,
|
|
218
|
+
"show_score": False,
|
|
219
|
+
"score_font_size": 'auto',
|
|
220
|
+
"score_font_color": "#000000",
|
|
221
|
+
"score_height_ratio": 0.4,
|
|
222
|
+
"border_only": False,
|
|
223
|
+
"depth_ratio": 'auto',
|
|
224
|
+
"orientation": 'inverted',
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
def __init__(self, file, **kwargs):
|
|
228
|
+
properties = TAD.DEFAULT_PROPERTIES.copy()
|
|
229
|
+
properties.update(kwargs)
|
|
230
|
+
super().__init__(file, **properties)
|
|
231
|
+
PlotTAD.__init__(self)
|
|
232
|
+
|
|
233
|
+
def plot(self, ax, gr: GenomeRange, **kwargs):
|
|
234
|
+
self.ax = ax
|
|
235
|
+
ov_intervals: pd.DataFrame = self.fetch_plot_data(gr, **kwargs)
|
|
236
|
+
self.plot_tads(ax, gr, ov_intervals)
|
|
237
|
+
self.plot_label()
|
coolbox/utilities/bed.py
CHANGED
|
@@ -170,7 +170,7 @@ class ReadBed(object):
|
|
|
170
170
|
"""
|
|
171
171
|
fields = self.get_no_comment_line(iter=file_iter, count=False)
|
|
172
172
|
fields = to_string(fields)
|
|
173
|
-
line_values = fields.split()
|
|
173
|
+
line_values = fields.split("\t")
|
|
174
174
|
|
|
175
175
|
if len(line_values) == 3:
|
|
176
176
|
self.file_type = 'bed3'
|