commonnexus 1.8.0__py2.py3-none-any.whl → 1.9.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
commonnexus/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  from .nexus import Nexus, Config # noqa: F401
2
2
  from commonnexus.blocks import Block # noqa: F401
3
3
 
4
- __version__ = '1.8.0'
4
+ __version__ = '1.9.1'
commonnexus/nexus.py CHANGED
@@ -5,6 +5,7 @@ import collections
5
5
  import dataclasses
6
6
 
7
7
  from .tokenizer import TokenType, iter_tokens, get_name
8
+ from .util import log_or_raise
8
9
  from commonnexus.command import Command
9
10
  from commonnexus.blocks import Block
10
11
 
@@ -45,7 +46,7 @@ class Config:
45
46
 
46
47
  class Nexus(list):
47
48
  """
48
- A NEXUS object implemented as list of tokens with methods to access newick constituents.
49
+ A NEXUS object implemented as list of commands with methods to read and write blocks.
49
50
 
50
51
  From the spec:
51
52
 
@@ -95,6 +96,7 @@ class Nexus(list):
95
96
  """
96
97
  self.cfg = config or Config(**kw)
97
98
  self.trailing_whitespace = []
99
+ self.leading = []
98
100
  self.block_implementations = {}
99
101
  for cls in Block.__subclasses__():
100
102
  self.block_implementations[cls.__name__.upper()] = cls
@@ -120,7 +122,10 @@ class Nexus(list):
120
122
  if token.is_semicolon:
121
123
  commands.append(Command(tuple(tokens)))
122
124
  tokens = []
123
- self.trailing_whitespace = tokens
125
+ if commands:
126
+ self.trailing_whitespace = tokens
127
+ else:
128
+ self.leading = tokens
124
129
  s = commands
125
130
  list.__init__(self, s)
126
131
 
@@ -211,6 +216,7 @@ class Nexus(list):
211
216
  END;
212
217
  """
213
218
  return NEXUS \
219
+ + ''.join(str(t) for t in self.leading) \
214
220
  + ''.join(''.join(str(t) for t in cmd) for cmd in self) \
215
221
  + ''.join(str(t) for t in self.trailing_whitespace)
216
222
 
@@ -224,8 +230,10 @@ class Nexus(list):
224
230
  p.write_text(text, encoding=self.cfg.encoding)
225
231
 
226
232
  def iter_comments(self):
233
+ yield from (t for t in self.leading if t.type == TokenType.COMMENT)
227
234
  for cmd in self:
228
235
  yield from (t for t in cmd if t.type == TokenType.COMMENT)
236
+ yield from (t for t in self.trailing_whitespace if t.type == TokenType.COMMENT)
229
237
 
230
238
  @property
231
239
  def comments(self) -> typing.List[str]:
@@ -262,6 +270,8 @@ class Nexus(list):
262
270
 
263
271
  def validate(self, log=None):
264
272
  valid = True
273
+ if any(t.type not in {TokenType.WHITESPACE, TokenType.COMMENT} for t in self.leading):
274
+ log_or_raise('Invalid token in preamble', log=log)
265
275
  for block in self.iter_blocks():
266
276
  #
267
277
  # FIXME: we can do a lot of validation here! If block.__commands__ is a list, there is
@@ -269,6 +279,9 @@ class Nexus(list):
269
279
  # If Payload.__multivalued__ == False, only one command instance is allowed, ...
270
280
  #
271
281
  valid = valid and block.validate(log=log)
282
+ if any(t.type not in {TokenType.WHITESPACE, TokenType.COMMENT}
283
+ for t in self.trailing_whitespace):
284
+ log_or_raise('Invalid token in text after the last command', log=log)
272
285
  return valid
273
286
 
274
287
  def get_numbers(self, object_name, items):
@@ -20,6 +20,8 @@ In addition, after normalisation, the following assumptions hold:
20
20
  import typing
21
21
  import collections
22
22
 
23
+ import newick
24
+
23
25
  from commonnexus import Nexus
24
26
  from commonnexus.blocks.characters import Data
25
27
  from commonnexus.blocks import Taxa, Distances, Characters, Trees
@@ -28,15 +30,28 @@ from commonnexus.blocks import Taxa, Distances, Characters, Trees
28
30
  def normalise(nexus: Nexus,
29
31
  data_to_characters: bool = False,
30
32
  strip_comments: bool = False,
31
- remove_taxa: typing.Optional[typing.Container[str]] = None) -> Nexus:
33
+ remove_taxa: typing.Optional[typing.Container[str]] = None,
34
+ rename_taxa: typing.Optional[
35
+ typing.Union[typing.Callable[[str], str], typing.Dict[str, str]]] = None,
36
+ ) -> Nexus:
32
37
  """
38
+ Normalise a `Nexus` object as described above.
39
+
33
40
  :param nexus: A `Nexus` object to be normalised in-place.
34
41
  :param data_to_characters: Flag signaling whether DATA blocks should be converted to CHARACTER \
35
42
  blocks.
36
43
  :param strip_comments: Flag signaling whether to remove all non-command comments.
37
44
  :param remove_taxa: Container of taxon labels specifying taxa to remove from relevant blocks.
45
+ :param rename_taxa: Specification of taxa to rename; either a ``dict``, mapping old names to \
46
+ new names, or a callable, accepting the old name as sole argument and returning the new name.
38
47
  :return: The modified `Nexus` object.
39
48
 
49
+ .. warning::
50
+
51
+ ``remove_taxa`` and ``rename_taxa`` only operate on TAXA, CHARACTERS/DATA, DISTANCES and
52
+ TREES blocks. Thus, normalisation may result in an inconsistent NEXUS file, if the file
53
+ contains other blocks which reference taxa (e.g. NOTES).
54
+
40
55
  .. code-block:: python
41
56
 
42
57
  >>> from commonnexus import Nexus
@@ -94,6 +109,13 @@ def normalise(nexus: Nexus,
94
109
  """
95
110
  remove_taxa = remove_taxa or []
96
111
 
112
+ def rename_taxon(s):
113
+ if rename_taxa is None:
114
+ return s
115
+ if isinstance(rename_taxa, dict):
116
+ return rename_taxa.get(s, s)
117
+ return rename_taxa(s)
118
+
97
119
  if strip_comments:
98
120
  nexus = Nexus([cmd.without_comments() for cmd in nexus], config=nexus.cfg)
99
121
  nexus = Nexus([cmd.with_normalised_whitespace() for cmd in nexus], config=nexus.cfg)
@@ -102,7 +124,8 @@ def normalise(nexus: Nexus,
102
124
  if nexus.characters:
103
125
  matrix = nexus.characters.get_matrix()
104
126
  taxlabels = list(matrix.keys())
105
- matrix = collections.OrderedDict((k, v) for k, v in matrix.items() if k not in remove_taxa)
127
+ matrix = collections.OrderedDict(
128
+ (rename_taxon(k), v) for k, v in matrix.items() if k not in remove_taxa)
106
129
  characters = nexus.DATA or nexus.CHARACTERS
107
130
  cls = Data if characters.name == 'DATA' and not data_to_characters else Characters
108
131
  nexus.replace_block(
@@ -116,20 +139,28 @@ def normalise(nexus: Nexus,
116
139
  else:
117
140
  taxlabels = list(matrix.keys())
118
141
  matrix = collections.OrderedDict(
119
- (k, collections.OrderedDict((kk, vv) for kk, vv in v.items() if kk not in remove_taxa))
142
+ (rename_taxon(k), collections.OrderedDict(
143
+ (rename_taxon(kk), vv) for kk, vv in v.items() if kk not in remove_taxa))
120
144
  for k, v in matrix.items() if k not in remove_taxa)
121
145
  nexus.replace_block(nexus.DISTANCES, Distances.from_data(matrix))
122
146
 
123
147
  if nexus.TREES:
148
+ def rename(n):
149
+ if n.name:
150
+ new = rename_taxon(n.unquoted_name)
151
+ n.name = newick.Node(new, auto_quote=True).name
152
+
124
153
  trees = []
125
154
  for tree in nexus.TREES.trees:
126
155
  nwk = nexus.TREES.translate(tree) if nexus.TREES.TRANSLATE else tree.newick
127
156
  if remove_taxa:
128
157
  nwk.prune_by_names(remove_taxa)
158
+ nwk.visit(rename)
129
159
  trees.append((tree.name, nwk, tree.rooted))
130
160
  nexus.replace_block(nexus.TREES, Trees.from_data(*trees))
131
161
 
132
162
  if taxlabels:
163
+ taxlabels = [rename_taxon(t) for t in taxlabels]
133
164
  taxa = Taxa.from_data([t for t in taxlabels if t not in remove_taxa])
134
165
  if nexus.TAXA:
135
166
  assert nexus.TAXA.DIMENSIONS.ntax == len(taxlabels)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: commonnexus
3
- Version: 1.8.0
3
+ Version: 1.9.1
4
4
  Summary: A nexus (phylogenetics) file reader and writer (.nex, .trees)
5
5
  Home-page: https://github.com/dlce-eva/commonnexus
6
6
  Author: Robert Forkel
@@ -99,14 +99,15 @@ and writing NEXUS
99
99
  >>> print(Nexus.from_blocks(Data.from_data(nex.CHARACTERS.get_matrix())))
100
100
  #NEXUS
101
101
  BEGIN DATA;
102
- DIMENSIONS NCHAR=10;
103
- FORMAT DATATYPE=STANDARD MISSING=? GAP=- SYMBOLS="01";
104
- MATRIX
105
- t1 1001010000
106
- t2 0101000100
107
- t3 0011101010
108
- t4 0001100001
109
- t5 0001100001;
102
+ DIMENSIONS NCHAR=10;
103
+ FORMAT DATATYPE=STANDARD MISSING=? GAP=- SYMBOLS="01";
104
+ MATRIX
105
+ t1 1001010000
106
+ t2 0101000100
107
+ t3 0011101010
108
+ t4 0001100001
109
+ t5 0001100001
110
+ ;
110
111
  END;
111
112
  ```
112
113
 
@@ -1,8 +1,8 @@
1
- commonnexus/__init__.py,sha256=8UeJyuZr7nj6mfW-nQwfC5iaoujNK1JeLMbHDnmlt48,121
1
+ commonnexus/__init__.py,sha256=PjK3pLfL2O6-bT7dktuGDSQufBuU5J5VLsg9yRZVBPI,121
2
2
  commonnexus/__main__.py,sha256=k07DsoCHCRw2Wi0HzXCmvNCSt8cddDEGCBlnHZorm6k,4361
3
3
  commonnexus/cli_util.py,sha256=pmqd3Fs2Wef7hRf0Z6koZ1Bg6-16FZUMOYZ8cMTY5Ig,3710
4
4
  commonnexus/command.py,sha256=5ZbW3wQY1GO6fChK7aqOzwjKBRxkPlKdUf09JbYEy9c,4284
5
- commonnexus/nexus.py,sha256=00pUVi1sBaVViI0r_P8YUfEeHw0h_10_sSGeIWn905o,17661
5
+ commonnexus/nexus.py,sha256=rsfxX_cM4aOxS5p-lu39U2ZEcHswDoXPGGiVBiiVAMA,18380
6
6
  commonnexus/tokenizer.py,sha256=8G2nP-xNN1gBd2a_7qxGDhHloCzoH4YE063p01FLTuk,11459
7
7
  commonnexus/util.py,sha256=WomyVUggYJON5T9uNuIZ_Bxm16UJHIhO737SjKcFYO0,168
8
8
  commonnexus/blocks/__init__.py,sha256=ehKEEc_E62zhlkvbp0p3asY56QkLuhqk7wYd1AqQuKI,602
@@ -27,10 +27,10 @@ commonnexus/commands/trees.py,sha256=aMN9WPvzOXfNgPcLvwmkJHtyvjq3LWWo2rpbGFCyczs
27
27
  commonnexus/tools/__init__.py,sha256=X8dv4VrjAo5C8VJdUXMrKviDLUrwXbawE5SJiQ--2uQ,213
28
28
  commonnexus/tools/combine.py,sha256=5vPQKcqA2acFji2lZc_xNZr1Xm2F3TPmfQk-fa9GA60,3048
29
29
  commonnexus/tools/matrix.py,sha256=3e2n_beug3MZyzXDgawjxxh1QfiM7KVp757OepCjQR0,11492
30
- commonnexus/tools/normalise.py,sha256=l7MNBkFQk0hyl2IA33AvRFS0mjAZwDYumpfw8pzzMa0,4924
31
- commonnexus-1.8.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
32
- commonnexus-1.8.0.dist-info/METADATA,sha256=trS0EGuKEPPdw-cdMeR2fdj-nzwIzgAZtP2zU3Fs58Q,5379
33
- commonnexus-1.8.0.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
34
- commonnexus-1.8.0.dist-info/entry_points.txt,sha256=0CbsOaqe6jaHnG1rpheuIISCZvFp1G1oBgDym82rpAo,58
35
- commonnexus-1.8.0.dist-info/top_level.txt,sha256=SiWuQrmGciG5Ivhrqz6ueU1Sxv-fzudflsMof81-I54,12
36
- commonnexus-1.8.0.dist-info/RECORD,,
30
+ commonnexus/tools/normalise.py,sha256=RdAasznob1aIY4B5z0Ir3jm9T5Yj_nbFzdtFtUM553U,6131
31
+ commonnexus-1.9.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
32
+ commonnexus-1.9.1.dist-info/METADATA,sha256=wTiq1lHknPwEw9KXEd_2EB_UG_45v_jWmYBNhLFpTVE,5357
33
+ commonnexus-1.9.1.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
34
+ commonnexus-1.9.1.dist-info/entry_points.txt,sha256=0CbsOaqe6jaHnG1rpheuIISCZvFp1G1oBgDym82rpAo,58
35
+ commonnexus-1.9.1.dist-info/top_level.txt,sha256=SiWuQrmGciG5Ivhrqz6ueU1Sxv-fzudflsMof81-I54,12
36
+ commonnexus-1.9.1.dist-info/RECORD,,