coolbox 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of coolbox might be problematic. Click here for more details.

Files changed (35) hide show
  1. coolbox/__init__.py +1 -1
  2. coolbox/cli.py +0 -2
  3. coolbox/core/browser/base.py +5 -2
  4. coolbox/core/coverage/__init__.py +1 -1
  5. coolbox/core/coverage/highlights.py +4 -4
  6. coolbox/core/frame/frame.py +16 -6
  7. coolbox/core/track/__init__.py +2 -1
  8. coolbox/core/track/arcs/plot.py +6 -2
  9. coolbox/core/track/bed/__init__.py +0 -1
  10. coolbox/core/track/bed/base.py +93 -85
  11. coolbox/core/track/bed/bed.py +37 -16
  12. coolbox/core/track/bed/fetch.py +1 -1
  13. coolbox/core/track/bed/plot.py +71 -221
  14. coolbox/core/track/gtf.py +11 -9
  15. coolbox/core/track/hicmat/base.py +12 -9
  16. coolbox/core/track/hicmat/cool.py +6 -5
  17. coolbox/core/track/hicmat/dothic.py +4 -3
  18. coolbox/core/track/hicmat/hicmat.py +8 -9
  19. coolbox/core/track/hicmat/plot.py +12 -6
  20. coolbox/core/track/hist/__init__.py +10 -3
  21. coolbox/core/track/hist/bigwig.py +0 -16
  22. coolbox/core/track/hist/plot.py +13 -5
  23. coolbox/core/track/ideogram.py +19 -10
  24. coolbox/core/track/pseudo.py +6 -2
  25. coolbox/core/track/tad.py +237 -0
  26. coolbox/utilities/bed.py +1 -1
  27. coolbox/utilities/hic/straw.py +532 -329
  28. coolbox/utilities/hic/wrap.py +55 -24
  29. {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/METADATA +20 -11
  30. {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/RECORD +34 -34
  31. {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/WHEEL +1 -1
  32. coolbox/core/track/bed/tad.py +0 -18
  33. {coolbox-0.3.7.data → coolbox-0.3.9.data}/scripts/coolbox +0 -0
  34. {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/LICENSE +0 -0
  35. {coolbox-0.3.7.dist-info → coolbox-0.3.9.dist-info}/top_level.txt +0 -0
coolbox/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = '0.3.7'
1
+ __version__ = '0.3.9'
coolbox/cli.py CHANGED
@@ -250,7 +250,6 @@ class CLI(object):
250
250
  def print_source(self):
251
251
  """Print the browser composing code."""
252
252
  print(self.source())
253
- return self
254
253
 
255
254
  def gen_notebook(self, notebook_path, notes=True, figsave=True):
256
255
  """Generate The notebook contain codes for run coolbox browser.
@@ -288,7 +287,6 @@ class CLI(object):
288
287
  )
289
288
  nb['cells'] = cells
290
289
  nbf.write(nb, notebook_path)
291
- return self
292
290
 
293
291
  def run_jupyter(self, jupyter_args="--ip=0.0.0.0"):
294
292
  """Create a notebook according to command line, then start a jupyter process.
@@ -43,8 +43,9 @@ class Browser(object):
43
43
  dpi : int, optional
44
44
  The dpi of frame's image.
45
45
 
46
- img_format : str, optional
47
- Frame image format, default svg.
46
+ img_format : {'svg', 'png'}
47
+ Image format for browser display, default svg.
48
+ NOTE: This argument not for saving image.
48
49
  """
49
50
 
50
51
  self.dpi = dpi
@@ -251,6 +252,8 @@ class Browser(object):
251
252
  def save(self, path, dpi=None):
252
253
  """
253
254
  Save current frame's image to file.
255
+ The suffix of the path will changing the saving format.
256
+ For example: `bsr.save("fig.png")` will save file to 'png' format.
254
257
  """
255
258
  c_fig = self.frame.show()
256
259
  dpi = dpi or self.dpi
@@ -3,7 +3,7 @@ from .vlines import Vlines, VlinesFromFile
3
3
  from .hlines import HLines
4
4
 
5
5
  from .base import track_to_coverage
6
- from ..track.bed import TAD
6
+ from ..track.tad import TAD
7
7
  from ..track.hist import Hist, BigWig, BedGraph
8
8
  from ..track.arcs import Arcs, Pairs, BEDPE, HiCPeaks
9
9
 
@@ -87,9 +87,9 @@ class HighLightsFromFile(Coverage, _Highlights):
87
87
  "file": file_,
88
88
  "color": "bed_rgb",
89
89
  "alpha": 0.1,
90
- "border_line": True,
90
+ "border_line": False,
91
91
  "border_line_style": "dashed",
92
- "border_line_width": 0,
92
+ "border_line_width": 2,
93
93
  "border_line_color": "#000000",
94
94
  "border_line_alpha": 0.8,
95
95
  }
@@ -168,9 +168,9 @@ class HighLights(Coverage, _Highlights):
168
168
  "highlight_regions": highlight_regions,
169
169
  "color": HighLights.DEFAULT_COLOR,
170
170
  "alpha": 0.25,
171
- "border_line": True,
171
+ "border_line": False,
172
172
  "border_line_style": "dashed",
173
- "border_line_width": 0,
173
+ "border_line_width": 2,
174
174
  "border_line_color": "#000000",
175
175
  "border_line_alpha": 0.8,
176
176
  }
@@ -137,8 +137,18 @@ class Frame(FrameBase):
137
137
  heights = []
138
138
  for track in self.tracks.values():
139
139
  if hasattr(track, 'get_track_height'):
140
- frame_width = self.properties['width'] * self.properties['width_ratios'][1]
141
- height = track.get_track_height(frame_width)
140
+ # The actual width is given by multiplying:
141
+ # - the actual width in units cm/inch
142
+ # - the fraction of the figure not covered by margins
143
+ # - the fraction of the grid allotted to the middle column
144
+ margins = self.properties["margins"]
145
+ margins_fraction = margins["right"] - margins["left"]
146
+ frame_width = (
147
+ self.properties["width"]
148
+ * self.properties["width_ratios"][1]
149
+ * margins_fraction
150
+ )
151
+ height = track.get_track_height(frame_width, self.current_range)
142
152
  heights.append(height)
143
153
  elif 'height' in track.properties:
144
154
  heights.append(track.properties['height'])
@@ -146,7 +156,7 @@ class Frame(FrameBase):
146
156
  heights.append(default_height)
147
157
  return heights
148
158
 
149
- def plot(self, *args):
159
+ def plot(self, *args, close_fig=True):
150
160
  """
151
161
  Plot all tracks.
152
162
 
@@ -186,8 +196,7 @@ class Frame(FrameBase):
186
196
  grids = matplotlib.gridspec.GridSpec(
187
197
  len(tracks_height), 3,
188
198
  height_ratios=tracks_height,
189
- width_ratios=self.properties['width_ratios'],
190
- wspace=0.01)
199
+ width_ratios=self.properties['width_ratios'])
191
200
 
192
201
  axis_list = []
193
202
  for idx, track in enumerate(self.tracks.values()):
@@ -231,7 +240,8 @@ class Frame(FrameBase):
231
240
  bottom=margins['bottom'],
232
241
  top=margins['top'])
233
242
 
234
- plt.close()
243
+ if close_fig:
244
+ plt.close()
235
245
 
236
246
  return fig
237
247
 
@@ -3,7 +3,8 @@ from .bam import BAM
3
3
  from .gtf import GTF
4
4
  from .ideogram import Ideogram
5
5
  from .pseudo import Spacer, HLine, XAxis, ChromName
6
- from .bed import BedBase, BED, TAD # no all-in class/function
6
+ from .bed import BedBase, BED # no all-in class/function
7
+ from .tad import TAD
7
8
  from .hicmat import HicMatBase, Cool, DotHiC, HiCDiff, Selfish, HiCMat
8
9
  from .hist import HistBase, BedGraph, BigWig, ABCompartment, DiScore, InsuScore, Virtual4C, BAMCov, SNP, Hist
9
10
  from .arcs import ArcsBase, Pairs, BEDPE, HiCPeaks, Arcs
@@ -103,8 +103,12 @@ class PlotContacts(object):
103
103
  center = (start + end) / 2
104
104
  ax.plot([center], [diameter])
105
105
  arc = Arc(
106
- (center, 0), diameter,
107
- height, 0, 0, 180,
106
+ xy=(center, 0),
107
+ width=diameter,
108
+ height=height,
109
+ angle=0,
110
+ theta1=0,
111
+ theta2=180,
108
112
  color=color,
109
113
  alpha=alpha,
110
114
  lw=line_width,
@@ -1,3 +1,2 @@
1
1
  from .bed import BedBase
2
2
  from .bed import BED
3
- from .tad import TAD
@@ -1,28 +1,24 @@
1
+ from typing import Union
2
+
1
3
  import pandas as pd
4
+ import matplotlib
2
5
 
3
- from coolbox.utilities import (
4
- get_logger
5
- )
6
+ from coolbox.utilities import get_logger
7
+ from coolbox.utilities.bed import build_bed_index
6
8
  from coolbox.utilities.genome import GenomeRange
7
-
8
9
  from coolbox.core.track.base import Track
9
- from .plot import PlotBed
10
10
 
11
11
  log = get_logger(__name__)
12
12
 
13
13
 
14
- class BedBase(Track, PlotBed):
14
+ class BedBase(Track):
15
15
  """
16
16
  BED Base track.
17
17
 
18
18
  Parameters
19
19
  ----------
20
- style : {'gene', 'tad'}
21
-
22
- gene_style: {'flybase', 'normal'}
23
-
24
- display : {'stacked', 'interlaced', 'collapsed'}, optional
25
- Display mode. (Default: 'stacked')
20
+ file: str
21
+ The file path of `.bed` file.
26
22
 
27
23
  color : str, optional
28
24
  Track color, 'bed_rgb' for auto specify color according to bed record.
@@ -31,86 +27,32 @@ class BedBase(Track, PlotBed):
31
27
  border_color : str, optional
32
28
  Border_color of gene. (Default: 'black')
33
29
 
34
- fontsize : int, optional
35
- Font size. (Default: BED.DEFAULT_FONTSIZE)
36
-
37
- labels : {True, False, 'auto'}, optional
38
- Draw bed name or not. 'auto' for automate decision according to density.
39
- (Default: 'auto')
40
-
41
- interval_height : int, optional
42
- The height of the interval. (Default: 100)
43
-
44
- num_rows : int, optional
45
- Set the max interval rows. (Default: unlimited interval rows)
46
-
47
30
  max_value : float, optional
48
31
  Max score. (Default: inf)
49
32
 
50
33
  min_value : float, optional
51
34
  Min score. (Default: -inf)
52
35
 
53
- border_style: str, optional
54
- Border style of tad. (Default: 'solid')
55
-
56
- border_width: int, optional
57
- Border width of tad. (Default: '2.0')
58
-
59
- show_score : bool
60
- Show bed score or not.
61
- default False.
62
-
63
- score_font_size : {'auto', int}
64
- Score text font size.
65
- default 'auto'
66
-
67
- score_font_color : str
68
- Score text color.
69
- default '#000000'
70
-
71
- score_height_ratio : float
72
- (text tag height) / (TAD height). used for adjust the position of Score text.
73
- default 0.5
74
-
75
- border_only : bool
76
- Only show border, default False
77
-
78
36
  """
79
37
 
80
- STYLE_GENE = "gene"
81
- STYLE_TAD = "tad"
82
-
83
38
  COLOR = "#1f78b4"
84
39
 
85
40
  DEFAULT_PROPERTIES = {
86
- 'style': STYLE_GENE,
87
- # gene
88
- 'gene_style': 'flybase',
89
- 'display': 'stacked',
90
41
  'color': "bed_rgb",
91
42
  'border_color': "#1f78b4",
92
- 'fontsize': 12,
93
- 'interval_height': 100,
94
- 'num_rows': None,
95
- 'labels': 'off',
96
43
  'min_score': '-inf',
97
44
  'max_score': 'inf',
98
45
  'bed_type': None,
99
- # tad
100
- 'border_style': "--",
101
- 'border_width': 2.0,
102
- "show_score": False,
103
- "score_font_size": 'auto',
104
- "score_font_color": "#000000",
105
- "score_height_ratio": 0.4,
106
- "border_only": False,
107
46
  }
108
47
 
109
- def __init__(self, **kwargs):
48
+ def __init__(self, file, **kwargs):
110
49
  properties = BedBase.DEFAULT_PROPERTIES.copy()
111
- properties.update(kwargs)
50
+ properties.update({
51
+ 'file': file,
52
+ **kwargs
53
+ })
112
54
  super().__init__(properties)
113
- self.init_for_plot()
55
+ self.bgz_file = build_bed_index(file)
114
56
 
115
57
  def fetch_data(self, gr: GenomeRange, **kwargs) -> pd.DataFrame:
116
58
  """
@@ -128,17 +70,83 @@ class BedBase(Track, PlotBed):
128
70
  The table can be in bed6/bed9/bed12 format and the trailing columns can be omited.
129
71
 
130
72
  """
131
- raise NotImplementedError
132
-
133
- def plot(self, ax, gr: GenomeRange, **kwargs):
134
- self.ax = ax
135
- ov_intervals: pd.DataFrame = self.fetch_plot_data(gr, **kwargs)
136
-
137
- style = self.properties['style']
138
- if style == self.STYLE_TAD:
139
- self.plot_tads(ax, gr, ov_intervals)
140
- elif style == self.STYLE_GENE:
141
- self.plot_genes(ax, gr, ov_intervals)
73
+ return self.fetch_intervals(self.bgz_file, gr)
74
+
75
+ def init_colormap(self):
76
+ self.colormap = None
77
+ if not matplotlib.colors.is_color_like(self.properties['color']) and self.properties['color'] != 'bed_rgb':
78
+ if self.properties['color'] not in matplotlib.cm.datad:
79
+ log.debug("*WARNING* color: '{}' for Track {} is not valid. Color has "
80
+ "been set to {}".format(self.properties['color'], self.properties['name'],
81
+ self.COLOR))
82
+ self.properties['color'] = self.COLOR
83
+ else:
84
+ self.colormap = self.properties['color']
85
+
86
+ def set_colormap(self, df):
87
+ """As min_score and max_score change every plot, we compute them for every plot"""
88
+ props = self.properties
89
+ min_score, max_score = props['min_score'], props['max_score']
90
+ has_score_col = props['bed_type'] in ('bed6', 'bed9', 'bed12')
91
+ if has_score_col and (df.shape[0] > 0):
92
+ min_score = (min_score != 'inf') or df['score'].min()
93
+ max_score = (max_score != '-inf') or df['score'].max()
94
+ min_score, max_score = float(min_score), float(max_score)
95
+ # set colormap
96
+ if self.colormap is not None:
97
+ norm = matplotlib.colors.Normalize(vmin=min_score, vmax=max_score)
98
+ cmap = matplotlib.cm.get_cmap(props['color'])
99
+ self.colormap = matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap)
100
+ if props['color'] == 'bed_rgb' and props['bed_type'] not in ['bed12', 'bed9']:
101
+ log.debug("*WARNING* Color set to 'bed_rgb', but bed file does not have the rgb field. The color has "
102
+ "been set to {}".format(self.COLOR))
103
+ self.properties['color'] = self.COLOR
104
+ self.colormap = None
105
+
106
+ def get_rgb_and_edge_color(self, bed):
107
+ # TODO need simplification
108
+ props = self.properties
109
+ rgb = props['color']
110
+ edgecolor = props['border_color']
111
+
112
+ if self.colormap:
113
+ # translate value field (in the example above is 0 or 0.2686...) into a color
114
+ rgb = self.colormap.to_rgba(bed.score)
115
+
116
+ # for tad coverage
117
+ if props.get('border_only', 'no') == 'yes':
118
+ rgb = 'none'
119
+ elif props['color'] == 'bed_rgb':
120
+ # if rgb is set in the bed line, this overrides the previously
121
+ # defined colormap
122
+ if props['bed_type'] in ['bed9', 'bed12'] and len(bed.rgb) == 3:
123
+ try:
124
+ rgb = [float(x) / 255 for x in bed.rgb]
125
+ if 'border_color' in props:
126
+ edgecolor = props['border_color']
127
+ else:
128
+ edgecolor = props['color']
129
+ except IndexError:
130
+ rgb = self.COLOR
131
+ else:
132
+ rgb = self.COLOR
133
+ return rgb, edgecolor
134
+
135
+ @staticmethod
136
+ def infer_bed_type(df: pd.DataFrame) -> Union[str, None]:
137
+ # bed_type of dataframe are store in dataframe's __dict__ in FetchBed.fetch_intervals
138
+ if 'bed_type' in df.__dict__:
139
+ bed_type = df.bed_type
142
140
  else:
143
- raise ValueError("style not supportted, should be one of 'gene' 'tad' ")
144
- self.plot_label()
141
+ bed_types = {
142
+ 12: 'bed12',
143
+ 9: 'bed9',
144
+ 6: 'bed6',
145
+ 3: 'bed3'
146
+ }
147
+ num_col = len(df.columns)
148
+ bed_type = bed_types[num_col] if num_col in bed_types else 'bed3'
149
+ if bed_type == 'bed3' and num_col < 3:
150
+ raise ValueError(f"Invalid dataframe for bed3 with columns: {df.columns}")
151
+ return bed_type
152
+
@@ -3,39 +3,60 @@ from coolbox.utilities import (
3
3
  get_logger
4
4
  )
5
5
  from coolbox.utilities.genome import GenomeRange
6
- from coolbox.utilities.bed import build_bed_index
7
6
  from .base import BedBase
7
+ from .plot import PlotGenes
8
8
 
9
9
  log = get_logger(__name__)
10
10
 
11
11
 
12
- class BED(BedBase, FetchBed):
12
+ class BED(BedBase, PlotGenes, FetchBed):
13
13
  """
14
14
  Bed Track for plotting 1d intervals data from .bed file.
15
15
  The input bed file can be bed3/bed6/bed9/bed12
16
16
 
17
17
  Parameters
18
18
  ----------
19
- file: str
20
- The file path of `.bed` file.
19
+ gene_style: {'flybase', 'normal'}
21
20
 
21
+ display : {'stacked', 'interlaced', 'collapsed'}, optional
22
+ Display mode. (Default: 'stacked')
22
23
 
24
+ fontsize : int, optional
25
+ Font size. (Default: BED.DEFAULT_FONTSIZE)
26
+
27
+ labels : {True, False, 'auto'}, optional
28
+ Draw bed name or not. 'auto' for automate decision according to density.
29
+ (Default: 'auto')
30
+
31
+ interval_height : int, optional
32
+ The height of the interval. (Default: 100)
33
+
34
+ num_rows : int, optional
35
+ Set the max interval rows. (Default: unlimited interval rows)
36
+
37
+ row_height : float
38
+ Height of a row. default 0.5
23
39
  """
24
40
 
25
41
  DEFAULT_PROPERTIES = {
26
- 'labels': "on",
42
+ 'labels': 'auto',
43
+ 'height': 'auto',
44
+ 'gene_style': 'flybase',
45
+ 'display': 'stacked',
46
+ 'fontsize': 12,
47
+ 'interval_height': 100,
48
+ 'num_rows': None,
49
+ 'row_height': 0.5,
27
50
  }
28
51
 
29
52
  def __init__(self, file, **kwargs):
30
53
  properties = BED.DEFAULT_PROPERTIES.copy()
31
- properties.update({
32
- 'file': file,
33
- **kwargs
34
- })
35
- super().__init__(**properties)
36
- self.bgz_file = build_bed_index(file)
37
-
38
- def fetch_data(self, gr: GenomeRange, **kwargs):
39
- return self.fetch_intervals(self.bgz_file, gr)
40
-
41
-
54
+ properties.update(kwargs)
55
+ super().__init__(file, **properties)
56
+ PlotGenes.__init__(self)
57
+
58
+ def plot(self, ax, gr: GenomeRange, **kwargs):
59
+ self.ax = ax
60
+ ov_intervals: pd.DataFrame = self.fetch_plot_data(gr, **kwargs)
61
+ self.plot_genes(ax, gr, ov_intervals)
62
+ self.plot_label()
@@ -34,7 +34,7 @@ class FetchBed(object):
34
34
  try:
35
35
  bed_iterator = ReadBed(query_bed(bgz_file, gr.chrom, gr.start, gr.end))
36
36
  except StopIteration:
37
- log.info(f"No records in the range {str(gr)}")
37
+ log.debug(f"No records in the range {str(gr)}")
38
38
  return [], None
39
39
 
40
40
  intervals = [bed for bed in bed_iterator]