commonnexus 1.8.0__py2.py3-none-any.whl → 1.9.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonnexus/__init__.py +1 -1
- commonnexus/nexus.py +15 -2
- commonnexus/tools/normalise.py +34 -3
- {commonnexus-1.8.0.dist-info → commonnexus-1.9.1.dist-info}/METADATA +10 -9
- {commonnexus-1.8.0.dist-info → commonnexus-1.9.1.dist-info}/RECORD +9 -9
- {commonnexus-1.8.0.dist-info → commonnexus-1.9.1.dist-info}/LICENSE +0 -0
- {commonnexus-1.8.0.dist-info → commonnexus-1.9.1.dist-info}/WHEEL +0 -0
- {commonnexus-1.8.0.dist-info → commonnexus-1.9.1.dist-info}/entry_points.txt +0 -0
- {commonnexus-1.8.0.dist-info → commonnexus-1.9.1.dist-info}/top_level.txt +0 -0
commonnexus/__init__.py
CHANGED
commonnexus/nexus.py
CHANGED
|
@@ -5,6 +5,7 @@ import collections
|
|
|
5
5
|
import dataclasses
|
|
6
6
|
|
|
7
7
|
from .tokenizer import TokenType, iter_tokens, get_name
|
|
8
|
+
from .util import log_or_raise
|
|
8
9
|
from commonnexus.command import Command
|
|
9
10
|
from commonnexus.blocks import Block
|
|
10
11
|
|
|
@@ -45,7 +46,7 @@ class Config:
|
|
|
45
46
|
|
|
46
47
|
class Nexus(list):
|
|
47
48
|
"""
|
|
48
|
-
A NEXUS object implemented as list of
|
|
49
|
+
A NEXUS object implemented as list of commands with methods to read and write blocks.
|
|
49
50
|
|
|
50
51
|
From the spec:
|
|
51
52
|
|
|
@@ -95,6 +96,7 @@ class Nexus(list):
|
|
|
95
96
|
"""
|
|
96
97
|
self.cfg = config or Config(**kw)
|
|
97
98
|
self.trailing_whitespace = []
|
|
99
|
+
self.leading = []
|
|
98
100
|
self.block_implementations = {}
|
|
99
101
|
for cls in Block.__subclasses__():
|
|
100
102
|
self.block_implementations[cls.__name__.upper()] = cls
|
|
@@ -120,7 +122,10 @@ class Nexus(list):
|
|
|
120
122
|
if token.is_semicolon:
|
|
121
123
|
commands.append(Command(tuple(tokens)))
|
|
122
124
|
tokens = []
|
|
123
|
-
|
|
125
|
+
if commands:
|
|
126
|
+
self.trailing_whitespace = tokens
|
|
127
|
+
else:
|
|
128
|
+
self.leading = tokens
|
|
124
129
|
s = commands
|
|
125
130
|
list.__init__(self, s)
|
|
126
131
|
|
|
@@ -211,6 +216,7 @@ class Nexus(list):
|
|
|
211
216
|
END;
|
|
212
217
|
"""
|
|
213
218
|
return NEXUS \
|
|
219
|
+
+ ''.join(str(t) for t in self.leading) \
|
|
214
220
|
+ ''.join(''.join(str(t) for t in cmd) for cmd in self) \
|
|
215
221
|
+ ''.join(str(t) for t in self.trailing_whitespace)
|
|
216
222
|
|
|
@@ -224,8 +230,10 @@ class Nexus(list):
|
|
|
224
230
|
p.write_text(text, encoding=self.cfg.encoding)
|
|
225
231
|
|
|
226
232
|
def iter_comments(self):
|
|
233
|
+
yield from (t for t in self.leading if t.type == TokenType.COMMENT)
|
|
227
234
|
for cmd in self:
|
|
228
235
|
yield from (t for t in cmd if t.type == TokenType.COMMENT)
|
|
236
|
+
yield from (t for t in self.trailing_whitespace if t.type == TokenType.COMMENT)
|
|
229
237
|
|
|
230
238
|
@property
|
|
231
239
|
def comments(self) -> typing.List[str]:
|
|
@@ -262,6 +270,8 @@ class Nexus(list):
|
|
|
262
270
|
|
|
263
271
|
def validate(self, log=None):
|
|
264
272
|
valid = True
|
|
273
|
+
if any(t.type not in {TokenType.WHITESPACE, TokenType.COMMENT} for t in self.leading):
|
|
274
|
+
log_or_raise('Invalid token in preamble', log=log)
|
|
265
275
|
for block in self.iter_blocks():
|
|
266
276
|
#
|
|
267
277
|
# FIXME: we can do a lot of validation here! If block.__commands__ is a list, there is
|
|
@@ -269,6 +279,9 @@ class Nexus(list):
|
|
|
269
279
|
# If Payload.__multivalued__ == False, only one command instance is allowed, ...
|
|
270
280
|
#
|
|
271
281
|
valid = valid and block.validate(log=log)
|
|
282
|
+
if any(t.type not in {TokenType.WHITESPACE, TokenType.COMMENT}
|
|
283
|
+
for t in self.trailing_whitespace):
|
|
284
|
+
log_or_raise('Invalid token in text after the last command', log=log)
|
|
272
285
|
return valid
|
|
273
286
|
|
|
274
287
|
def get_numbers(self, object_name, items):
|
commonnexus/tools/normalise.py
CHANGED
|
@@ -20,6 +20,8 @@ In addition, after normalisation, the following assumptions hold:
|
|
|
20
20
|
import typing
|
|
21
21
|
import collections
|
|
22
22
|
|
|
23
|
+
import newick
|
|
24
|
+
|
|
23
25
|
from commonnexus import Nexus
|
|
24
26
|
from commonnexus.blocks.characters import Data
|
|
25
27
|
from commonnexus.blocks import Taxa, Distances, Characters, Trees
|
|
@@ -28,15 +30,28 @@ from commonnexus.blocks import Taxa, Distances, Characters, Trees
|
|
|
28
30
|
def normalise(nexus: Nexus,
|
|
29
31
|
data_to_characters: bool = False,
|
|
30
32
|
strip_comments: bool = False,
|
|
31
|
-
remove_taxa: typing.Optional[typing.Container[str]] = None
|
|
33
|
+
remove_taxa: typing.Optional[typing.Container[str]] = None,
|
|
34
|
+
rename_taxa: typing.Optional[
|
|
35
|
+
typing.Union[typing.Callable[[str], str], typing.Dict[str, str]]] = None,
|
|
36
|
+
) -> Nexus:
|
|
32
37
|
"""
|
|
38
|
+
Normalise a `Nexus` object as described above.
|
|
39
|
+
|
|
33
40
|
:param nexus: A `Nexus` object to be normalised in-place.
|
|
34
41
|
:param data_to_characters: Flag signaling whether DATA blocks should be converted to CHARACTER \
|
|
35
42
|
blocks.
|
|
36
43
|
:param strip_comments: Flag signaling whether to remove all non-command comments.
|
|
37
44
|
:param remove_taxa: Container of taxon labels specifying taxa to remove from relevant blocks.
|
|
45
|
+
:param rename_taxa: Specification of taxa to rename; either a ``dict``, mapping old names to \
|
|
46
|
+
new names, or a callable, accepting the old name as sole argument and returning the new name.
|
|
38
47
|
:return: The modified `Nexus` object.
|
|
39
48
|
|
|
49
|
+
.. warning::
|
|
50
|
+
|
|
51
|
+
``remove_taxa`` and ``rename_taxa`` only operate on TAXA, CHARACTERS/DATA, DISTANCES and
|
|
52
|
+
TREES blocks. Thus, normalisation may result in an inconsistent NEXUS file, if the file
|
|
53
|
+
contains other blocks which reference taxa (e.g. NOTES).
|
|
54
|
+
|
|
40
55
|
.. code-block:: python
|
|
41
56
|
|
|
42
57
|
>>> from commonnexus import Nexus
|
|
@@ -94,6 +109,13 @@ def normalise(nexus: Nexus,
|
|
|
94
109
|
"""
|
|
95
110
|
remove_taxa = remove_taxa or []
|
|
96
111
|
|
|
112
|
+
def rename_taxon(s):
|
|
113
|
+
if rename_taxa is None:
|
|
114
|
+
return s
|
|
115
|
+
if isinstance(rename_taxa, dict):
|
|
116
|
+
return rename_taxa.get(s, s)
|
|
117
|
+
return rename_taxa(s)
|
|
118
|
+
|
|
97
119
|
if strip_comments:
|
|
98
120
|
nexus = Nexus([cmd.without_comments() for cmd in nexus], config=nexus.cfg)
|
|
99
121
|
nexus = Nexus([cmd.with_normalised_whitespace() for cmd in nexus], config=nexus.cfg)
|
|
@@ -102,7 +124,8 @@ def normalise(nexus: Nexus,
|
|
|
102
124
|
if nexus.characters:
|
|
103
125
|
matrix = nexus.characters.get_matrix()
|
|
104
126
|
taxlabels = list(matrix.keys())
|
|
105
|
-
matrix = collections.OrderedDict(
|
|
127
|
+
matrix = collections.OrderedDict(
|
|
128
|
+
(rename_taxon(k), v) for k, v in matrix.items() if k not in remove_taxa)
|
|
106
129
|
characters = nexus.DATA or nexus.CHARACTERS
|
|
107
130
|
cls = Data if characters.name == 'DATA' and not data_to_characters else Characters
|
|
108
131
|
nexus.replace_block(
|
|
@@ -116,20 +139,28 @@ def normalise(nexus: Nexus,
|
|
|
116
139
|
else:
|
|
117
140
|
taxlabels = list(matrix.keys())
|
|
118
141
|
matrix = collections.OrderedDict(
|
|
119
|
-
(k, collections.OrderedDict(
|
|
142
|
+
(rename_taxon(k), collections.OrderedDict(
|
|
143
|
+
(rename_taxon(kk), vv) for kk, vv in v.items() if kk not in remove_taxa))
|
|
120
144
|
for k, v in matrix.items() if k not in remove_taxa)
|
|
121
145
|
nexus.replace_block(nexus.DISTANCES, Distances.from_data(matrix))
|
|
122
146
|
|
|
123
147
|
if nexus.TREES:
|
|
148
|
+
def rename(n):
|
|
149
|
+
if n.name:
|
|
150
|
+
new = rename_taxon(n.unquoted_name)
|
|
151
|
+
n.name = newick.Node(new, auto_quote=True).name
|
|
152
|
+
|
|
124
153
|
trees = []
|
|
125
154
|
for tree in nexus.TREES.trees:
|
|
126
155
|
nwk = nexus.TREES.translate(tree) if nexus.TREES.TRANSLATE else tree.newick
|
|
127
156
|
if remove_taxa:
|
|
128
157
|
nwk.prune_by_names(remove_taxa)
|
|
158
|
+
nwk.visit(rename)
|
|
129
159
|
trees.append((tree.name, nwk, tree.rooted))
|
|
130
160
|
nexus.replace_block(nexus.TREES, Trees.from_data(*trees))
|
|
131
161
|
|
|
132
162
|
if taxlabels:
|
|
163
|
+
taxlabels = [rename_taxon(t) for t in taxlabels]
|
|
133
164
|
taxa = Taxa.from_data([t for t in taxlabels if t not in remove_taxa])
|
|
134
165
|
if nexus.TAXA:
|
|
135
166
|
assert nexus.TAXA.DIMENSIONS.ntax == len(taxlabels)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: commonnexus
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.9.1
|
|
4
4
|
Summary: A nexus (phylogenetics) file reader and writer (.nex, .trees)
|
|
5
5
|
Home-page: https://github.com/dlce-eva/commonnexus
|
|
6
6
|
Author: Robert Forkel
|
|
@@ -99,14 +99,15 @@ and writing NEXUS
|
|
|
99
99
|
>>> print(Nexus.from_blocks(Data.from_data(nex.CHARACTERS.get_matrix())))
|
|
100
100
|
#NEXUS
|
|
101
101
|
BEGIN DATA;
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
102
|
+
DIMENSIONS NCHAR=10;
|
|
103
|
+
FORMAT DATATYPE=STANDARD MISSING=? GAP=- SYMBOLS="01";
|
|
104
|
+
MATRIX
|
|
105
|
+
t1 1001010000
|
|
106
|
+
t2 0101000100
|
|
107
|
+
t3 0011101010
|
|
108
|
+
t4 0001100001
|
|
109
|
+
t5 0001100001
|
|
110
|
+
;
|
|
110
111
|
END;
|
|
111
112
|
```
|
|
112
113
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
commonnexus/__init__.py,sha256=
|
|
1
|
+
commonnexus/__init__.py,sha256=PjK3pLfL2O6-bT7dktuGDSQufBuU5J5VLsg9yRZVBPI,121
|
|
2
2
|
commonnexus/__main__.py,sha256=k07DsoCHCRw2Wi0HzXCmvNCSt8cddDEGCBlnHZorm6k,4361
|
|
3
3
|
commonnexus/cli_util.py,sha256=pmqd3Fs2Wef7hRf0Z6koZ1Bg6-16FZUMOYZ8cMTY5Ig,3710
|
|
4
4
|
commonnexus/command.py,sha256=5ZbW3wQY1GO6fChK7aqOzwjKBRxkPlKdUf09JbYEy9c,4284
|
|
5
|
-
commonnexus/nexus.py,sha256=
|
|
5
|
+
commonnexus/nexus.py,sha256=rsfxX_cM4aOxS5p-lu39U2ZEcHswDoXPGGiVBiiVAMA,18380
|
|
6
6
|
commonnexus/tokenizer.py,sha256=8G2nP-xNN1gBd2a_7qxGDhHloCzoH4YE063p01FLTuk,11459
|
|
7
7
|
commonnexus/util.py,sha256=WomyVUggYJON5T9uNuIZ_Bxm16UJHIhO737SjKcFYO0,168
|
|
8
8
|
commonnexus/blocks/__init__.py,sha256=ehKEEc_E62zhlkvbp0p3asY56QkLuhqk7wYd1AqQuKI,602
|
|
@@ -27,10 +27,10 @@ commonnexus/commands/trees.py,sha256=aMN9WPvzOXfNgPcLvwmkJHtyvjq3LWWo2rpbGFCyczs
|
|
|
27
27
|
commonnexus/tools/__init__.py,sha256=X8dv4VrjAo5C8VJdUXMrKviDLUrwXbawE5SJiQ--2uQ,213
|
|
28
28
|
commonnexus/tools/combine.py,sha256=5vPQKcqA2acFji2lZc_xNZr1Xm2F3TPmfQk-fa9GA60,3048
|
|
29
29
|
commonnexus/tools/matrix.py,sha256=3e2n_beug3MZyzXDgawjxxh1QfiM7KVp757OepCjQR0,11492
|
|
30
|
-
commonnexus/tools/normalise.py,sha256=
|
|
31
|
-
commonnexus-1.
|
|
32
|
-
commonnexus-1.
|
|
33
|
-
commonnexus-1.
|
|
34
|
-
commonnexus-1.
|
|
35
|
-
commonnexus-1.
|
|
36
|
-
commonnexus-1.
|
|
30
|
+
commonnexus/tools/normalise.py,sha256=RdAasznob1aIY4B5z0Ir3jm9T5Yj_nbFzdtFtUM553U,6131
|
|
31
|
+
commonnexus-1.9.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
32
|
+
commonnexus-1.9.1.dist-info/METADATA,sha256=wTiq1lHknPwEw9KXEd_2EB_UG_45v_jWmYBNhLFpTVE,5357
|
|
33
|
+
commonnexus-1.9.1.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
|
|
34
|
+
commonnexus-1.9.1.dist-info/entry_points.txt,sha256=0CbsOaqe6jaHnG1rpheuIISCZvFp1G1oBgDym82rpAo,58
|
|
35
|
+
commonnexus-1.9.1.dist-info/top_level.txt,sha256=SiWuQrmGciG5Ivhrqz6ueU1Sxv-fzudflsMof81-I54,12
|
|
36
|
+
commonnexus-1.9.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|