cocoatree 0.1.0rc0.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoatree/__init__.py +8 -0
- cocoatree/__params.py +80 -0
- cocoatree/_pipeline.py +144 -0
- cocoatree/_scraper.py +23 -0
- cocoatree/_version.py +1 -0
- cocoatree/datasets/__init__.py +3 -0
- cocoatree/datasets/_base.py +188 -0
- cocoatree/datasets/data/DHFR/3QL0.pdb +3507 -0
- cocoatree/datasets/data/DHFR/DHFR_sectors.npz +0 -0
- cocoatree/datasets/data/DHFR/alignment.faa.gz +0 -0
- cocoatree/datasets/data/S1A_serine_proteases/3tgi.pdb +2844 -0
- cocoatree/datasets/data/S1A_serine_proteases/halabi_alignment.fasta +20580 -0
- cocoatree/datasets/data/S1A_serine_proteases/halabi_metadata.csv +1471 -0
- cocoatree/datasets/data/S1A_serine_proteases/halabi_sectors.npz +0 -0
- cocoatree/datasets/data/S1A_serine_proteases/rivoire_alignment.fasta +19460 -0
- cocoatree/datasets/data/S1A_serine_proteases/rivoire_metadata.csv +1391 -0
- cocoatree/datasets/data/S1A_serine_proteases/rivoire_sectors.npz +0 -0
- cocoatree/datasets/data/rhomboid_proteases/2NRF.pdb +3300 -0
- cocoatree/datasets/data/rhomboid_proteases/Data_S1_Rhomboid_MSA_short_names.fasta +5534 -0
- cocoatree/datasets/data/rhomboid_proteases/rhomboid_metadata_clean.csv +2766 -0
- cocoatree/datasets/data/rhomboid_proteases/rhomboid_sectors.npz +0 -0
- cocoatree/datasets/tests/test_datasets.py +14 -0
- cocoatree/decomposition.py +263 -0
- cocoatree/io.py +185 -0
- cocoatree/msa.py +579 -0
- cocoatree/pysca.py +238 -0
- cocoatree/randomize.py +30 -0
- cocoatree/scripts/cocoatree-sca.py +6 -0
- cocoatree/statistics/__init__.py +58 -0
- cocoatree/statistics/pairwise.py +318 -0
- cocoatree/statistics/position.py +258 -0
- cocoatree/tests/test_init.py +24 -0
- cocoatree/tests/test_msa.py +14 -0
- cocoatree/visualization.py +440 -0
- cocoatree-0.1.0rc0.dev2.dist-info/METADATA +66 -0
- cocoatree-0.1.0rc0.dev2.dist-info/RECORD +39 -0
- cocoatree-0.1.0rc0.dev2.dist-info/WHEEL +5 -0
- cocoatree-0.1.0rc0.dev2.dist-info/licenses/LICENSE +28 -0
- cocoatree-0.1.0rc0.dev2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,440 @@
|
|
|
1
|
+
"""Module to visualize phylogenetic trees along with XCoRs"""
|
|
2
|
+
|
|
3
|
+
# User provided file:
|
|
4
|
+
# - phylogenetic tree in newick format
|
|
5
|
+
# - multiple sequence alignment used to generate the tree in fasta format
|
|
6
|
+
# - annotation table in csv format
|
|
7
|
+
|
|
8
|
+
# Import necessary packages
|
|
9
|
+
from ete3 import ProfileFace, TreeStyle, NodeStyle, TextFace, \
|
|
10
|
+
add_face_to_node, SeqMotifFace, RectFace
|
|
11
|
+
from pandas.api.types import is_numeric_dtype # type: ignore
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import numpy as np
|
|
14
|
+
from PyQt5 import QtGui
|
|
15
|
+
import matplotlib.colors as colors
|
|
16
|
+
import matplotlib.cm as cmx
|
|
17
|
+
import matplotlib.pyplot as plt
|
|
18
|
+
|
|
19
|
+
from .msa import compute_seq_identity, compute_seq_similarity, \
|
|
20
|
+
compute_normalized_seq_similarity
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _annot_to_color(attribute, tree, df_annot, cmap='jet'):
|
|
24
|
+
"""
|
|
25
|
+
Reads in the attributes specified by the user in the annotation csv file
|
|
26
|
+
and attributes a color palette for each.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
tree : ete3's tree object,
|
|
31
|
+
as imported by io.load_tree_ete3()
|
|
32
|
+
|
|
33
|
+
attributes : list of column names to grab
|
|
34
|
+
|
|
35
|
+
df_annot : pandas dataframe of the annotation file
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
att_dict : dictionnary in which keys are the sequence IDs and the values
|
|
40
|
+
are the colors associated with it
|
|
41
|
+
|
|
42
|
+
color_dict : dictionnary in which keys are the attribute's categories and
|
|
43
|
+
the values are the colors associated to each category
|
|
44
|
+
"""
|
|
45
|
+
id_lst = tree.get_leaf_names()
|
|
46
|
+
df_annot = df_annot.fillna('unknown')
|
|
47
|
+
if is_numeric_dtype(df_annot['Seq_ID']):
|
|
48
|
+
df_annot['Seq_ID'] = df_annot['Seq_ID'].astype('str')
|
|
49
|
+
df_annot = df_annot[df_annot['Seq_ID'].isin(id_lst)]
|
|
50
|
+
|
|
51
|
+
att_dict = {}
|
|
52
|
+
df_annot = df_annot[['Seq_ID', attribute]]
|
|
53
|
+
|
|
54
|
+
if isinstance(cmap, str):
|
|
55
|
+
color_dict = _get_color_palette(
|
|
56
|
+
list(df_annot[attribute].unique()), cmap)
|
|
57
|
+
else:
|
|
58
|
+
color_dict = {n: cmap[n] for n in list(df_annot[attribute].unique())}
|
|
59
|
+
|
|
60
|
+
df_annot[str(attribute + '_color')] = df_annot.apply(
|
|
61
|
+
lambda row: color_dict[row[attribute]], axis=1)
|
|
62
|
+
for i in range(0, len(df_annot['Seq_ID'])):
|
|
63
|
+
row = df_annot.iloc[i].tolist()
|
|
64
|
+
att_dict[row[0]] = row[2]
|
|
65
|
+
|
|
66
|
+
return att_dict, color_dict
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _generate_colors_from_colormaps(n_colors, cmap="jet", as_hex=True):
|
|
70
|
+
"""
|
|
71
|
+
Generate a list of n colors from colormap
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
colormap = plt.get_cmap(str(cmap))
|
|
75
|
+
indx = np.linspace(0, 1, n_colors)
|
|
76
|
+
indexed_colors = [colormap(i) for i in indx]
|
|
77
|
+
if as_hex:
|
|
78
|
+
indexed_colors = [colors.to_hex(i) for i in indexed_colors]
|
|
79
|
+
return indexed_colors
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# TO DO: allow user to choose which color to use for 'unknown'
|
|
83
|
+
# (currently: white by default)
|
|
84
|
+
def _get_color_palette(values, cmap):
|
|
85
|
+
|
|
86
|
+
nvals = len(values)
|
|
87
|
+
colors = _generate_colors_from_colormaps(nvals, cmap=cmap, as_hex=True)
|
|
88
|
+
|
|
89
|
+
color_dict = {} # key = value, value = colour id
|
|
90
|
+
for i in range(0, nvals):
|
|
91
|
+
if values[i] == 'unknown':
|
|
92
|
+
color_dict[values[i]] = '#FFFFFF'
|
|
93
|
+
else:
|
|
94
|
+
color_dict[values[i]] = colors[i]
|
|
95
|
+
|
|
96
|
+
return color_dict
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _get_color_gradient(self):
|
|
100
|
+
"""
|
|
101
|
+
Function which allows to use matplotlib colormaps in ete3 heatmap
|
|
102
|
+
Adapted from:
|
|
103
|
+
https://github.com/lthiberiol/virfac/blob/master/get_color_gradient.py
|
|
104
|
+
"""
|
|
105
|
+
cNorm = colors.Normalize(vmin=0, vmax=1)
|
|
106
|
+
scalarMap = cmx.ScalarMappable(norm=cNorm,
|
|
107
|
+
cmap=plt.get_cmap(self.colorscheme))
|
|
108
|
+
color_scale = []
|
|
109
|
+
for scale in np.linspace(0, 1, 201):
|
|
110
|
+
[r, g, b, a] = scalarMap.to_rgba(scale, bytes=True)
|
|
111
|
+
color_scale.append(QtGui.QColor(r, g, b, a))
|
|
112
|
+
|
|
113
|
+
return color_scale
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def update_tree_ete3_and_return_style(
|
|
117
|
+
tree_ete3, df_annot,
|
|
118
|
+
xcor_id=None,
|
|
119
|
+
xcor_seq=None,
|
|
120
|
+
meta_data=None,
|
|
121
|
+
show_leaf_name=True,
|
|
122
|
+
fig_title='',
|
|
123
|
+
linewidth=1,
|
|
124
|
+
linecolor="#000000",
|
|
125
|
+
bootstrap_style={},
|
|
126
|
+
tree_scale=200,
|
|
127
|
+
metadata_colors=None,
|
|
128
|
+
t_xcor_seq=False,
|
|
129
|
+
t_xcor_heatmap=False,
|
|
130
|
+
matrix_type='identity',
|
|
131
|
+
colormap='inferno'
|
|
132
|
+
):
|
|
133
|
+
"""
|
|
134
|
+
Update ete3 tree with XCoR info and attributes and return tree_style for
|
|
135
|
+
further visualization.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
tree_ete3 : ete3's tree object,
|
|
140
|
+
as imported by io.load_tree_ete3()
|
|
141
|
+
|
|
142
|
+
annot_file : pandas dataframe of the annotation file
|
|
143
|
+
|
|
144
|
+
xcor_id : list of XCoR sequence identifiers, as imported by io.load_msa()
|
|
145
|
+
the ids must match with the tree's leaves id
|
|
146
|
+
|
|
147
|
+
xcor_seq : corresponding list of xcor sequences to display,
|
|
148
|
+
as imported by io.load_msa()
|
|
149
|
+
|
|
150
|
+
meta_data : tuple of annotations to display
|
|
151
|
+
(from annotation file's header)
|
|
152
|
+
|
|
153
|
+
show_leaf_name : boolean, optional, default: True
|
|
154
|
+
whether to show leaf names.
|
|
155
|
+
|
|
156
|
+
linewidth : int, optional, default: 1
|
|
157
|
+
width of the lines in the tree
|
|
158
|
+
|
|
159
|
+
linecolor : str, optional, default: "#000000"
|
|
160
|
+
color of the lines
|
|
161
|
+
|
|
162
|
+
bootstrap_style : dict, optional,
|
|
163
|
+
`fgcolor`: color of the bootstrap node, default: "darkred"
|
|
164
|
+
`size`: size of the bootstrap node, default: 10
|
|
165
|
+
`support`: int between 0 and 100, minimum support level for display
|
|
166
|
+
|
|
167
|
+
tree_scale : int, optional, default: 200
|
|
168
|
+
sets the scale of the tree in ETE3: the higher, the larger the tree
|
|
169
|
+
will be (in width)
|
|
170
|
+
|
|
171
|
+
metadata_colors : dict, str, or None, optional, default: None
|
|
172
|
+
colors for the metadata:
|
|
173
|
+
- None: generates automatically the colors
|
|
174
|
+
- str: uses a Matplotlib colormap to generate the colors
|
|
175
|
+
- dict: specifies colors for each matadata entry
|
|
176
|
+
{key: color}
|
|
177
|
+
|
|
178
|
+
fig_title : figure title (str)
|
|
179
|
+
|
|
180
|
+
t_xcor_seq : boolean,
|
|
181
|
+
whether to show the sequences of the XCoR
|
|
182
|
+
|
|
183
|
+
t_xcor_heatmap : boolean,
|
|
184
|
+
whether to add a heatmap of the identity or similarity matrix between
|
|
185
|
+
XCoR sequences
|
|
186
|
+
|
|
187
|
+
matrix_type : str, default='identity'
|
|
188
|
+
whether to compute pairwise sequence identity ('identity'), similarity
|
|
189
|
+
('similarity'), or normalized similarity ('norm_similarity').
|
|
190
|
+
|
|
191
|
+
colormap : str, default='inferno'
|
|
192
|
+
the matplotlib colormap to use for the heatmap
|
|
193
|
+
|
|
194
|
+
Returns
|
|
195
|
+
-------
|
|
196
|
+
tree_style : TreeStyle class from ete3
|
|
197
|
+
|
|
198
|
+
column_end : int, the number of columns after the tree. If you want to
|
|
199
|
+
plot anything else alongside the tree, the column number should be
|
|
200
|
+
equal to this value.
|
|
201
|
+
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
tree_style = TreeStyle()
|
|
205
|
+
tree_style.scale = tree_scale
|
|
206
|
+
tree_style.layout_fn = []
|
|
207
|
+
# tree_style.branch_vertical_margin = 20
|
|
208
|
+
tree_style.show_leaf_name = show_leaf_name
|
|
209
|
+
|
|
210
|
+
# Add bootstrap support NodeStyle
|
|
211
|
+
boot_style = NodeStyle()
|
|
212
|
+
boot_style["fgcolor"] = \
|
|
213
|
+
bootstrap_style["fgcolor"] if "fgcolor" in bootstrap_style \
|
|
214
|
+
else "darkred"
|
|
215
|
+
boot_style["size"] = \
|
|
216
|
+
bootstrap_style["size"] if "size" in bootstrap_style else 10
|
|
217
|
+
support = \
|
|
218
|
+
bootstrap_style["support"] if "support" in bootstrap_style else 95
|
|
219
|
+
|
|
220
|
+
boot_style["hz_line_width"] = linewidth
|
|
221
|
+
boot_style["vt_line_width"] = linewidth
|
|
222
|
+
boot_style["vt_line_color"] = linecolor
|
|
223
|
+
boot_style["hz_line_color"] = linecolor
|
|
224
|
+
|
|
225
|
+
empty_style = NodeStyle()
|
|
226
|
+
empty_style["size"] = 0
|
|
227
|
+
empty_style["vt_line_width"] = linewidth
|
|
228
|
+
empty_style["hz_line_width"] = linewidth
|
|
229
|
+
empty_style["vt_line_color"] = linecolor
|
|
230
|
+
empty_style["hz_line_color"] = linecolor
|
|
231
|
+
|
|
232
|
+
for node in tree_ete3.traverse():
|
|
233
|
+
if node.support >= support:
|
|
234
|
+
node.set_style(boot_style)
|
|
235
|
+
else:
|
|
236
|
+
node.set_style(empty_style)
|
|
237
|
+
|
|
238
|
+
column_layout = 0
|
|
239
|
+
col_legend_rectface = 0
|
|
240
|
+
|
|
241
|
+
if metadata_colors is None:
|
|
242
|
+
metadata_colors = "jet"
|
|
243
|
+
|
|
244
|
+
# If no metadata, do nothing
|
|
245
|
+
if meta_data:
|
|
246
|
+
|
|
247
|
+
def layout_attribute(node, column=column_layout):
|
|
248
|
+
if node.is_leaf():
|
|
249
|
+
name = node.name
|
|
250
|
+
rect_faces = [None for i in range(len(meta_data))]
|
|
251
|
+
for i, col in enumerate(meta_data):
|
|
252
|
+
colors, _ = _annot_to_color(col,
|
|
253
|
+
tree_ete3,
|
|
254
|
+
df_annot,
|
|
255
|
+
cmap=metadata_colors)
|
|
256
|
+
|
|
257
|
+
rect_faces[i] = RectFace(50, 20,
|
|
258
|
+
fgcolor=colors[name],
|
|
259
|
+
bgcolor=colors[name])
|
|
260
|
+
rect_faces[i].margin_left = 5
|
|
261
|
+
rect_faces[i].margin_right = 0
|
|
262
|
+
if i == len(meta_data) - 1:
|
|
263
|
+
rect_faces[i].margin_right = 30
|
|
264
|
+
add_face_to_node(rect_faces[i], node, column=column,
|
|
265
|
+
position='aligned')
|
|
266
|
+
column += 1
|
|
267
|
+
|
|
268
|
+
tree_style.layout_fn.append(layout_attribute)
|
|
269
|
+
|
|
270
|
+
# Add legend
|
|
271
|
+
legend_face = [None for i in range(len(meta_data))]
|
|
272
|
+
for i, col in enumerate(meta_data):
|
|
273
|
+
_, col_dict = _annot_to_color(col, tree_ete3,
|
|
274
|
+
df_annot, cmap=metadata_colors)
|
|
275
|
+
tree_style.legend.add_face(TextFace(col,
|
|
276
|
+
fsize=10,
|
|
277
|
+
bold=True),
|
|
278
|
+
column=col_legend_rectface)
|
|
279
|
+
# otherwise text is not in front of RectFace
|
|
280
|
+
tree_style.legend.add_face(TextFace(""),
|
|
281
|
+
column=col_legend_rectface + 1)
|
|
282
|
+
|
|
283
|
+
legend_face[i] = {key: None for key in col_dict.keys()}
|
|
284
|
+
for key in col_dict.keys():
|
|
285
|
+
legend_face[i][key] = RectFace(50, 20, fgcolor=col_dict[key],
|
|
286
|
+
bgcolor=col_dict[key])
|
|
287
|
+
legend_face[i][key].margin_right = 5
|
|
288
|
+
legend_face[i][key].margin_left = 10
|
|
289
|
+
tree_style.legend.add_face(legend_face[i][key],
|
|
290
|
+
column=col_legend_rectface)
|
|
291
|
+
tree_style.legend.add_face(TextFace(key, fsize=10),
|
|
292
|
+
column=col_legend_rectface + 1)
|
|
293
|
+
col_legend_rectface += 2
|
|
294
|
+
column_layout += len(meta_data) if meta_data else 0
|
|
295
|
+
|
|
296
|
+
if t_xcor_seq:
|
|
297
|
+
tree_style, column_layout = add_xcor_sequences_to_tree(
|
|
298
|
+
tree_style, tree_ete3, xcor_id,
|
|
299
|
+
xcor_seq, column_start=column_layout)
|
|
300
|
+
|
|
301
|
+
if t_xcor_heatmap:
|
|
302
|
+
tree_style, column_layout = add_heatmap_to_tree(
|
|
303
|
+
tree_style, tree_ete3, xcor_id, xcor_seq,
|
|
304
|
+
matrix_type=matrix_type,
|
|
305
|
+
column_start=column_layout, colormap=colormap)
|
|
306
|
+
|
|
307
|
+
# Add title
|
|
308
|
+
tree_style.title.add_face(TextFace(fig_title, fsize=20), column=0)
|
|
309
|
+
|
|
310
|
+
return tree_style, column_layout
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def add_xcor_sequences_to_tree(tree_style, tree_ete3, xcor_id, xcor_seq,
|
|
314
|
+
column_start=0):
|
|
315
|
+
"""
|
|
316
|
+
Add XCoR sequence to ETE3's tree style
|
|
317
|
+
|
|
318
|
+
Parameters
|
|
319
|
+
----------
|
|
320
|
+
tree_style : ETE3's tree_style object
|
|
321
|
+
|
|
322
|
+
tree_ete3 : ete3's tree object,
|
|
323
|
+
as imported by io.load_tree_ete3()
|
|
324
|
+
|
|
325
|
+
xcor_id : list of XCoR sequence identifiers, as imported by io.load_msa()
|
|
326
|
+
the ids must match with the tree's leaves id
|
|
327
|
+
|
|
328
|
+
xcor_seq : corresponding list of XCoR sequences to display,
|
|
329
|
+
as imported by io.load_msa()
|
|
330
|
+
|
|
331
|
+
column_start : int, optional, default : 0
|
|
332
|
+
the column on which to start plotting
|
|
333
|
+
|
|
334
|
+
Returns
|
|
335
|
+
-------
|
|
336
|
+
tree_style : TreeStyle class from ete3
|
|
337
|
+
|
|
338
|
+
column_end : int, the number of columns after the tree. If you want to
|
|
339
|
+
plot anything else alongside the tree, the column number should be
|
|
340
|
+
equal to this value.
|
|
341
|
+
|
|
342
|
+
"""
|
|
343
|
+
xcor_dict = {
|
|
344
|
+
xcor_id[i]: str(xcor_seq[i]) for i in range(len(xcor_id))}
|
|
345
|
+
|
|
346
|
+
def layout_SeqMotifFace(node, column=column_start):
|
|
347
|
+
if node.is_leaf():
|
|
348
|
+
if node.name in xcor_dict:
|
|
349
|
+
seq = xcor_dict[node.name]
|
|
350
|
+
else:
|
|
351
|
+
seq = '-' * len(xcor_seq[0])
|
|
352
|
+
seqFace = SeqMotifFace(seq,
|
|
353
|
+
motifs=[[0, len(xcor_seq[0]), "seq",
|
|
354
|
+
20, 20, None, None, None]],
|
|
355
|
+
scale_factor=1)
|
|
356
|
+
seqFace.margin_right = 30
|
|
357
|
+
add_face_to_node(seqFace, node, column=column,
|
|
358
|
+
position='aligned')
|
|
359
|
+
tree_style.layout_fn.append(layout_SeqMotifFace)
|
|
360
|
+
column_start += 1
|
|
361
|
+
return tree_style, column_start
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def add_heatmap_to_tree(tree_style, tree_ete3, xcor_id, xcor_seq,
|
|
365
|
+
matrix_type="identity",
|
|
366
|
+
column_start=0, width=20, colormap="inferno"):
|
|
367
|
+
"""
|
|
368
|
+
Add heatmap to ETE3's tree style
|
|
369
|
+
|
|
370
|
+
Parameters
|
|
371
|
+
----------
|
|
372
|
+
tree_style : ETE3's tree_style object
|
|
373
|
+
|
|
374
|
+
tree_ete3 : ete3's tree object,
|
|
375
|
+
as imported by io.load_tree_ete3()
|
|
376
|
+
|
|
377
|
+
xcor_id : list of XCoR sequence identifiers, as imported by io.load_msa()
|
|
378
|
+
the ids must match with the tree's leaves id
|
|
379
|
+
|
|
380
|
+
xcor_seq : corresponding list of XCoR sequences to display,
|
|
381
|
+
as imported by io.load_msa()
|
|
382
|
+
|
|
383
|
+
matrix_type : str, default='identity'
|
|
384
|
+
whether to compute pairwise matrix identity ('identity'),
|
|
385
|
+
similarity ('similarity'), or normalized similarity
|
|
386
|
+
('norm_similarity')
|
|
387
|
+
|
|
388
|
+
column_start : int, optional, default : 0
|
|
389
|
+
the column on which to start plotting
|
|
390
|
+
|
|
391
|
+
width : int, optional, default : 20
|
|
392
|
+
the width of each square of the heatmap. If width == 20, the heatmap
|
|
393
|
+
will be squared.
|
|
394
|
+
|
|
395
|
+
colormap : str, optional, default: "inferno"
|
|
396
|
+
any Matplotlib's colormap
|
|
397
|
+
|
|
398
|
+
Returns
|
|
399
|
+
-------
|
|
400
|
+
tree_style : TreeStyle class from ete3
|
|
401
|
+
|
|
402
|
+
column_end : int, the number of columns after the tree. If you want to
|
|
403
|
+
plot anything else alongside the tree, the column number should be
|
|
404
|
+
equal to this value.
|
|
405
|
+
"""
|
|
406
|
+
|
|
407
|
+
leaves_id = tree_ete3.get_leaf_names()
|
|
408
|
+
nb_leaves = len(leaves_id)
|
|
409
|
+
|
|
410
|
+
# allow to chose among Matplotlib's colormaps
|
|
411
|
+
ProfileFace.get_color_gradient = _get_color_gradient
|
|
412
|
+
|
|
413
|
+
# Check that sequences in the similarity matrix are ordered as in the
|
|
414
|
+
# tree leaves and keep only sequences that are present in the tree
|
|
415
|
+
sequences = pd.DataFrame(index=xcor_id, data={"seq": xcor_seq})
|
|
416
|
+
reordered_sequences = sequences.loc[leaves_id, "seq"].values
|
|
417
|
+
|
|
418
|
+
if matrix_type == 'identity':
|
|
419
|
+
matrix = compute_seq_identity(reordered_sequences)
|
|
420
|
+
elif matrix_type == 'similarity':
|
|
421
|
+
matrix = compute_seq_similarity(reordered_sequences)
|
|
422
|
+
elif matrix_type == 'norm_similarity':
|
|
423
|
+
matrix = compute_normalized_seq_similarity(reordered_sequences)
|
|
424
|
+
# FIX to zero values appearing black in the heatmap whatever the cmap
|
|
425
|
+
matrix[matrix == 0] = 0.00000001
|
|
426
|
+
min_v = float(np.min(matrix))
|
|
427
|
+
max_v = float(np.max(matrix))
|
|
428
|
+
center_v = float(np.mean([min_v, max_v]))
|
|
429
|
+
|
|
430
|
+
# Add heatmap profile to each leaf
|
|
431
|
+
for i, lf in enumerate(tree_ete3.iter_leaves()):
|
|
432
|
+
lf.add_features(profile=matrix[i])
|
|
433
|
+
lf.add_features(deviation=[0 for x in range(matrix.shape[0])])
|
|
434
|
+
lf.add_face(ProfileFace(max_v=max_v, min_v=min_v, center_v=center_v,
|
|
435
|
+
width=(nb_leaves*width), height=20,
|
|
436
|
+
style='heatmap',
|
|
437
|
+
colorscheme=colormap),
|
|
438
|
+
column=column_start, position="aligned")
|
|
439
|
+
column_start += nb_leaves*width
|
|
440
|
+
return tree_style, column_start
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cocoatree
|
|
3
|
+
Version: 0.1.0rc0.dev2
|
|
4
|
+
Summary: Awesome coevolution stuff
|
|
5
|
+
Author:
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Bug Reports, https://github.com/FIXME
|
|
8
|
+
Project-URL: Source, https://github.com/FIXME/
|
|
9
|
+
Keywords: coevolution,MSA
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Requires-Python: <4.0,>=3.8
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: numpy
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: flake8; extra == "dev"
|
|
22
|
+
Provides-Extra: test
|
|
23
|
+
Requires-Dist: pytest; extra == "test"
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
# COCOA-Tree: COllaborative COevolution Analysis Toolbox
|
|
27
|
+
|
|
28
|
+
**COCOA-Tree** is a Python library to perform coevolution analyses of proteins and integrate phylogenetic information
|
|
29
|
+
to better understand the coevolution signals.
|
|
30
|
+
|
|
31
|
+
It regroups various coevolution metrics and corrections, such as statistical coevolution analysis (SCA) or mutual
|
|
32
|
+
information (MI).
|
|
33
|
+
|
|
34
|
+
Website: [tree-bioinfo-intra.timc.fr/projects/cocoa/](http://tree-bioinfo-intra.timc.fr/projects/cocoa/index.html)
|
|
35
|
+
|
|
36
|
+
## Library organization
|
|
37
|
+
|
|
38
|
+
COCOA-Tree is organized in several modules, each allowing to perform different tasks of a coevolution analysis
|
|
39
|
+
pipeline:
|
|
40
|
+
|
|
41
|
+
[cocoatree_orga](../cocoatree_orga.pdf)
|
|
42
|
+
|
|
43
|
+
## Installation
|
|
44
|
+
|
|
45
|
+
### Dependencies
|
|
46
|
+
|
|
47
|
+
COCOA-Tree requires:
|
|
48
|
+
- Python (>= 3.9)
|
|
49
|
+
- NumPy
|
|
50
|
+
- scikit-learn
|
|
51
|
+
- biopython
|
|
52
|
+
- ete3
|
|
53
|
+
|
|
54
|
+
Matplotlib is also required for running the examples.
|
|
55
|
+
|
|
56
|
+
### User installation
|
|
57
|
+
|
|
58
|
+
To install for development purposes, use::
|
|
59
|
+
|
|
60
|
+
python setup.py develop
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
All changes in the python module will be repercuted on the installed version
|
|
64
|
+
(in practice the installed version is a symlink towards the package in
|
|
65
|
+
development).
|
|
66
|
+
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
cocoatree/__init__.py,sha256=N4kid_VxCza0PPvsmiFQUNGsKBdDMlB5Y5fWcvUG4Ac,280
|
|
2
|
+
cocoatree/__params.py,sha256=Wd_XmFmNvwSMqV5O5pFkX_tFgUn0MMU9M_oOFhM5vHA,1155
|
|
3
|
+
cocoatree/_pipeline.py,sha256=DVJmB_hX6K-Rp7REyBp_3bFvy0R-EFPxL1dSuCkjIsA,5390
|
|
4
|
+
cocoatree/_scraper.py,sha256=8Y3czIjWi3uXmXYytxF8EoFU6-wm9RBl5oawHAUhfFo,885
|
|
5
|
+
cocoatree/_version.py,sha256=-5md4JgngJQMchad9Aj8GA2bYZUbhmpzrIjkE0hCrTI,31
|
|
6
|
+
cocoatree/decomposition.py,sha256=jDCWFtThYUiwENKbj_fBq8k4qXp6iFgj4fjCmGk8D24,8434
|
|
7
|
+
cocoatree/io.py,sha256=kRMT9lyF6-Yt-3Q3FS-ER64xsNSoI6pGaf9S7iM_pMo,5273
|
|
8
|
+
cocoatree/msa.py,sha256=pvbQBRSO6wx8ZucuoEpa479Uq5jeLPSq2jzyBmjTmT0,17957
|
|
9
|
+
cocoatree/pysca.py,sha256=sFz99LuAxe58hk5u1xHVpZiRAb4J9612m7dWUTxEJaM,7759
|
|
10
|
+
cocoatree/randomize.py,sha256=2fYz2-vfdjAUC2QZt_iZwmx6rxrhAqubBV-6o789ji8,836
|
|
11
|
+
cocoatree/visualization.py,sha256=MZYwwDY6YHVAC5jy-sM0DG_pMcRmS3AnRm0H6nj4iL0,15518
|
|
12
|
+
cocoatree/datasets/__init__.py,sha256=2WVOhg0xnba5Wu0bsQHqmohrVkCq6eBh047YfW08VH0,159
|
|
13
|
+
cocoatree/datasets/_base.py,sha256=tQmBGytIm4yf3yNyQYaO_BGxMWopCJbYEBSXGguZQWI,6454
|
|
14
|
+
cocoatree/datasets/data/DHFR/3QL0.pdb,sha256=ygJz5Jc6GBgtIdHrrEgz3iBX3UJFZL4Sw55dhMm71Ak,284067
|
|
15
|
+
cocoatree/datasets/data/DHFR/DHFR_sectors.npz,sha256=NpehLs_uwAGZR2QpN69_Rg4YqX-O8Mi0lJ4e3XgBWcY,1382
|
|
16
|
+
cocoatree/datasets/data/DHFR/alignment.faa.gz,sha256=VDaT0icYGUv47BvxwaYOpDPdLC2CtCJZWwQzT5A94XE,620140
|
|
17
|
+
cocoatree/datasets/data/S1A_serine_proteases/3tgi.pdb,sha256=UJOL4Yw4jJrMob50SXftZUsDlgKt3kNsBRdnIk0lJL0,230364
|
|
18
|
+
cocoatree/datasets/data/S1A_serine_proteases/halabi_alignment.fasta,sha256=f0oMVTNQ4pyeP9ycLbAZV4vPQGZRxgvXoxe8sK4IJfI,1256369
|
|
19
|
+
cocoatree/datasets/data/S1A_serine_proteases/halabi_metadata.csv,sha256=0m7PklH-k2t0p9pYNcRs-skk4HTeXFXMy-7n5HFSw9w,414403
|
|
20
|
+
cocoatree/datasets/data/S1A_serine_proteases/halabi_sectors.npz,sha256=XrbMn4E4SxOJYjJcdVk2T_3frA4gCW0w2Xv-y7NUCwU,1298
|
|
21
|
+
cocoatree/datasets/data/S1A_serine_proteases/rivoire_alignment.fasta,sha256=zSRlES16bbxN57S03V79Wss6Y3oOt6NZltCvifwM3N0,1307330
|
|
22
|
+
cocoatree/datasets/data/S1A_serine_proteases/rivoire_metadata.csv,sha256=N_0z7SSz9zG3wnd_Y0XMVuoyqkj6aLrdugoeB4-Sf08,344632
|
|
23
|
+
cocoatree/datasets/data/S1A_serine_proteases/rivoire_sectors.npz,sha256=zW1GJ2WGfy-K0GvXU6rjothMFAQJXOQFjgzuxTdRyjE,3954
|
|
24
|
+
cocoatree/datasets/data/rhomboid_proteases/2NRF.pdb,sha256=oaClH9ONw65GyJ1_k54xkfRSXrNZvBBE4gweYImFvwQ,267300
|
|
25
|
+
cocoatree/datasets/data/rhomboid_proteases/Data_S1_Rhomboid_MSA_short_names.fasta,sha256=3C6MNTtGzm3IH5m9R3JxlmNIyShUcRAgDu6gqeNRn8s,417557
|
|
26
|
+
cocoatree/datasets/data/rhomboid_proteases/rhomboid_metadata_clean.csv,sha256=MrtuBh_EWqVWW5mIkkbnQM8foAdpv6JI_Pes8q7Kgic,667111
|
|
27
|
+
cocoatree/datasets/data/rhomboid_proteases/rhomboid_sectors.npz,sha256=MbOzLpB4NnZtEHFuTqmDesQz-RklZ8eCSBsGM0lbd0k,1258
|
|
28
|
+
cocoatree/datasets/tests/test_datasets.py,sha256=-Yy9TpaXbl6jIWcwhA2GsPCPWex-FLyo8QoZGWTELw8,418
|
|
29
|
+
cocoatree/scripts/cocoatree-sca.py,sha256=lKftBeQPfbSlpCzhArmG7qdNeYKJplky8nzLV6WbJXk,52
|
|
30
|
+
cocoatree/statistics/__init__.py,sha256=P8HrXLKpcorn-IMvCyNT6WdEfs72rDqawcVPzJbn9TI,1826
|
|
31
|
+
cocoatree/statistics/pairwise.py,sha256=e8wOq1CqVVkhw2j3BZxIzDFolBnbI6aO0d4Na0vADIg,9305
|
|
32
|
+
cocoatree/statistics/position.py,sha256=MHMg7C9uNsN5H1Rniu4VnObDdF851cI8cbiJNiGMiZ8,7959
|
|
33
|
+
cocoatree/tests/test_init.py,sha256=oBTYk3cjJZ_uLvo7pVhVtHBPwVALuCYpwi6iFX_ukI0,616
|
|
34
|
+
cocoatree/tests/test_msa.py,sha256=yn0EeITIIwc4iBhnWp28WElqjDhD88ODqXQ3EtL8NKU,383
|
|
35
|
+
cocoatree-0.1.0rc0.dev2.dist-info/licenses/LICENSE,sha256=-csX-kOWZ0Ew72OZUq6Dpl7zF7FbxzMuib6ItKxEpZw,1496
|
|
36
|
+
cocoatree-0.1.0rc0.dev2.dist-info/METADATA,sha256=XvNWqa_rMcSleXIxjPY3GECXkbUZVizf1dOFaT87Mp4,1882
|
|
37
|
+
cocoatree-0.1.0rc0.dev2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
38
|
+
cocoatree-0.1.0rc0.dev2.dist-info/top_level.txt,sha256=1gUOukKDkF8RMx0GztzKIDHtoWbqc6ChHpZGUOl-D2M,10
|
|
39
|
+
cocoatree-0.1.0rc0.dev2.dist-info/RECORD,,
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Copyright (c) 2025 The COCOA-Tree developers.
|
|
2
|
+
All rights reserved.
|
|
3
|
+
|
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
|
6
|
+
|
|
7
|
+
1. Redistributions of source code must retain the above copyright notice,
|
|
8
|
+
this list of conditions and the following disclaimer.
|
|
9
|
+
|
|
10
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
|
12
|
+
and/or other materials provided with the distribution.
|
|
13
|
+
|
|
14
|
+
3. Neither the name of the copyright holder nor the names of its contributors
|
|
15
|
+
may be used to endorse or promote products derived from this software without
|
|
16
|
+
specific prior written permission.
|
|
17
|
+
|
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
21
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
22
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
23
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
24
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
25
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
26
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
27
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
|
28
|
+
THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
cocoatree
|