phynetpy 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phynetpy/Alphabet.py +273 -0
- phynetpy/BirthDeath.py +758 -0
- phynetpy/GTR.py +834 -0
- phynetpy/GeneTrees.py +693 -0
- phynetpy/GraphUtils.py +1654 -0
- phynetpy/MSA.py +705 -0
- phynetpy/Matrix.py +418 -0
- phynetpy/Network.py +4014 -0
- phynetpy/NetworkMoves.py +326 -0
- phynetpy/NetworkParser.py +583 -0
- phynetpy/Newick.py +295 -0
- phynetpy/PhyloNet.py +81 -0
- phynetpy/Validation.py +1022 -0
- phynetpy/__init__.py +0 -0
- phynetpy/test_alphabet.py +63 -0
- phynetpy/test_birthdeath.py +130 -0
- phynetpy/test_network.py +580 -0
- phynetpy/validation_demo.py +1 -0
- phynetpy-0.2.0.dist-info/METADATA +58 -0
- phynetpy-0.2.0.dist-info/RECORD +22 -0
- phynetpy-0.2.0.dist-info/WHEEL +5 -0
- phynetpy-0.2.0.dist-info/top_level.txt +1 -0
phynetpy/Alphabet.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
#! /usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
##############################################################################
|
|
5
|
+
## -- PhyNetPy --
|
|
6
|
+
## Library for the Development and use of Phylogenetic Network Methods
|
|
7
|
+
##
|
|
8
|
+
## Copyright 2025 Mark Kessler, Luay Nakhleh.
|
|
9
|
+
## All rights reserved.
|
|
10
|
+
##
|
|
11
|
+
## See "LICENSE.txt" for terms and conditions of usage.
|
|
12
|
+
##
|
|
13
|
+
## If you use this work or any portion thereof in published work,
|
|
14
|
+
## please cite it as:
|
|
15
|
+
##
|
|
16
|
+
## Mark Kessler, Luay Nakhleh. 2025.
|
|
17
|
+
##
|
|
18
|
+
##############################################################################
|
|
19
|
+
|
|
20
|
+
"""
|
|
21
|
+
Author : Mark Kessler
|
|
22
|
+
Last Edit : 11/6/25
|
|
23
|
+
First Included in Version : 1.0.0
|
|
24
|
+
Docs - [x]
|
|
25
|
+
Tests - [x] Passed 5/5 tests with 100% coverage on 11/7/25
|
|
26
|
+
Design - [x]
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from dataclasses import dataclass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
########################
|
|
33
|
+
### MODULE CONSTANTS ###
|
|
34
|
+
########################
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class AlphabetMapping:
|
|
39
|
+
name : str
|
|
40
|
+
mapping : dict[str, int]
|
|
41
|
+
|
|
42
|
+
DNA : AlphabetMapping = AlphabetMapping("DNA",
|
|
43
|
+
{ "-" : 0, "A" : 1, "C" : 2, "M" : 3, "G" : 4, "R" : 5, "S" : 6,
|
|
44
|
+
"V" : 7, "T" : 8, "W" : 9, "Y" : 10, "H" : 11, "K" : 12,
|
|
45
|
+
"D" : 13, "B" : 14, "X" : 15})
|
|
46
|
+
|
|
47
|
+
RNA : AlphabetMapping = AlphabetMapping("RNA",
|
|
48
|
+
{"-" : 0,"A" : 1, "C" : 2, "M" : 3, "G" : 4, "R" : 5, "S" : 6,
|
|
49
|
+
"V" : 7, "U" : 8, "W" : 9, "Y" : 10, "H" : 11,
|
|
50
|
+
"K" : 12, "D" : 13, "B" : 14, "X" : 15})
|
|
51
|
+
|
|
52
|
+
PROTEIN : AlphabetMapping = AlphabetMapping("PROTEIN", {"-" : 0, "A" : 1, "B" : 2, "C" : 3, "D" : 4,
|
|
53
|
+
"E" : 5, "F" : 6, "G" : 7, "H" : 8, "I" : 9,
|
|
54
|
+
"J" : 10, "K" : 11, "L" : 12, "M" : 13, "N" : 14,
|
|
55
|
+
"P" : 15, "Q" : 16, "R" : 17, "S" : 18, "T" : 19,
|
|
56
|
+
"V" : 20, "W" : 21, "X" : 22, "Y" : 23, "Z" : 24,
|
|
57
|
+
"." : 25})
|
|
58
|
+
|
|
59
|
+
CODON : AlphabetMapping = AlphabetMapping("CODON", {"-" : 0, "A" : 1, "C" : 2, "M" : 3, "G" : 4, "R" : 5,
|
|
60
|
+
"S" : 6, "V" : 7, "T" : 8, "W" : 9, "Y" : 10,
|
|
61
|
+
"H" : 11, "K" : 12, "D" : 13, "B" : 14, "." : 15})
|
|
62
|
+
|
|
63
|
+
_ALPHABETS : list[AlphabetMapping] = [DNA, RNA, PROTEIN, CODON]
|
|
64
|
+
|
|
65
|
+
_ALPHABET_NAMES : list[str] = ["DNA", "RNA", "PROTEIN", "CODON"]
|
|
66
|
+
|
|
67
|
+
# Hard-coded reverse mappings for standard alphabets
|
|
68
|
+
_DNA_REVERSE : dict[int, str] = AlphabetMapping("DNA_REVERSE",
|
|
69
|
+
{0: "-", 1: "A", 2: "C", 3: "M", 4: "G", 5: "R", 6: "S",
|
|
70
|
+
7: "V", 8: "T", 9: "W", 10: "Y", 11: "H", 12: "K",
|
|
71
|
+
13: "D", 14: "B", 15: "X"})
|
|
72
|
+
|
|
73
|
+
_RNA_REVERSE: dict[int, str] = AlphabetMapping("RNA_REVERSE",
|
|
74
|
+
{0: "-", 1: "A", 2: "C", 3: "M", 4: "G", 5: "R", 6: "S",
|
|
75
|
+
7: "V", 8: "U", 9: "W", 10: "Y", 11: "H", 12: "K",
|
|
76
|
+
13: "D", 14: "B", 15: "X"})
|
|
77
|
+
|
|
78
|
+
_PROTEIN_REVERSE: dict[int, str] = AlphabetMapping("PROTEIN_REVERSE",
|
|
79
|
+
{0: "-", 1: "A", 2: "B", 3: "C", 4: "D", 5: "E", 6: "F",
|
|
80
|
+
7: "G", 8: "H", 9: "I", 10: "J", 11: "K", 12: "L",
|
|
81
|
+
13: "M", 14: "N", 15: "P", 16: "Q", 17: "R", 18: "S",
|
|
82
|
+
19: "T", 20: "V", 21: "W", 22: "X", 23: "Y", 24: "Z",
|
|
83
|
+
25: "."})
|
|
84
|
+
|
|
85
|
+
_CODON_REVERSE: dict[int, str] = AlphabetMapping("CODON_REVERSE",
|
|
86
|
+
{0: "-", 1: "A", 2: "C", 3: "M", 4: "G", 5: "R", 6: "S",
|
|
87
|
+
7: "V", 8: "T", 9: "W", 10: "Y", 11: "H", 12: "K",
|
|
88
|
+
13: "D", 14: "B", 15: "."})
|
|
89
|
+
|
|
90
|
+
_REVERSE_MAPPINGS : [str, AlphabetMapping] = {DNA.name: _DNA_REVERSE, RNA.name: _RNA_REVERSE, PROTEIN.name: _PROTEIN_REVERSE, CODON.name: _CODON_REVERSE}
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
#########################
|
|
94
|
+
#### EXCEPTION CLASS ####
|
|
95
|
+
#########################
|
|
96
|
+
|
|
97
|
+
class AlphabetError(Exception):
|
|
98
|
+
"""
|
|
99
|
+
Error class for all errors relating to alphabet mappings.
|
|
100
|
+
"""
|
|
101
|
+
def __init__(self, message : str = "Error during Alphabet class mapping\
|
|
102
|
+
operation") -> None:
|
|
103
|
+
"""
|
|
104
|
+
Initialize an AlphabetError with a message.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
message (str): error message
|
|
108
|
+
Returns:
|
|
109
|
+
N/A
|
|
110
|
+
"""
|
|
111
|
+
self.message = message
|
|
112
|
+
super().__init__(self.message)
|
|
113
|
+
|
|
114
|
+
##########################
|
|
115
|
+
#### HELPER FUNCTIONS ####
|
|
116
|
+
##########################
|
|
117
|
+
|
|
118
|
+
def _build_reverse_mapping(mapping: dict[str, int]) -> dict[int, str]:
|
|
119
|
+
"""
|
|
120
|
+
Build a reverse mapping dictionary from state to character.
|
|
121
|
+
If multiple characters map to the same state, keeps the first one encountered.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
mapping (dict[str, int]): Dictionary mapping characters to states
|
|
125
|
+
Returns:
|
|
126
|
+
dict[int, str]: Dictionary mapping states to characters
|
|
127
|
+
"""
|
|
128
|
+
reverse_mapping : dict[int, str] = {}
|
|
129
|
+
for char, state in mapping.items():
|
|
130
|
+
if state not in reverse_mapping:
|
|
131
|
+
reverse_mapping[state] = char
|
|
132
|
+
return reverse_mapping
|
|
133
|
+
|
|
134
|
+
def snp_alphabet(ploidy : int) -> AlphabetMapping:
|
|
135
|
+
"""
|
|
136
|
+
For SNP alphabet initialization. For data sets in which the maximum ploidy
|
|
137
|
+
is Xn, use X as @ploidy.
|
|
138
|
+
|
|
139
|
+
For phased SNP data, use 1. For unphased SNP data, use 2.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
ploidy (int): The ploidyness value of a species
|
|
143
|
+
(ie, humans = 2, some plants > 2, etc)
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
dict[str, int]: Returns an SNP alphabet map that maps str(int)->int
|
|
147
|
+
for 0 <= int <= ploidy, plus the various extra character mappings.
|
|
148
|
+
|
|
149
|
+
"""
|
|
150
|
+
alphabet : dict[str, int] = {}
|
|
151
|
+
for num in range(ploidy + 1):
|
|
152
|
+
alphabet[str(num)] = num
|
|
153
|
+
|
|
154
|
+
alphabet["-"] = ploidy + 1
|
|
155
|
+
|
|
156
|
+
return AlphabetMapping("SNP", alphabet)
|
|
157
|
+
|
|
158
|
+
########################
|
|
159
|
+
#### ALPHABET CLASS ####
|
|
160
|
+
########################
|
|
161
|
+
|
|
162
|
+
class Alphabet:
|
|
163
|
+
"""
|
|
164
|
+
Class that deals with the mapping from characters to state values that
|
|
165
|
+
have partial likelihood values associated with them.
|
|
166
|
+
This state mapping is primarily based on Base10 -> Binary
|
|
167
|
+
conversions such that the decimal numbers become a generalized
|
|
168
|
+
version of the one-hot encoding scheme.
|
|
169
|
+
|
|
170
|
+
DNA MAPPING INFORMATION
|
|
171
|
+
Symbol(s) Name Partial Likelihood
|
|
172
|
+
A Adenine [1,0,0,0] -> 1
|
|
173
|
+
C Cytosine [0,1,0,0] -> 2
|
|
174
|
+
G Guanine [0,0,1,0] -> 4
|
|
175
|
+
T U Thymine [0,0,0,1] -> 8
|
|
176
|
+
Symbol(s) Name Partial Likelihood
|
|
177
|
+
X Any A C G T ([1,1,1,1] -> 15)
|
|
178
|
+
V Not T A C G ([1,1,1,0] -> 7)
|
|
179
|
+
H Not G A C T ([1,1,0,1] -> 11)
|
|
180
|
+
D Not C A G T ([1,0,1,1] -> 13)
|
|
181
|
+
B Not A C G T ([0,1,1,1] -> 14)
|
|
182
|
+
M Amino A C ([1,1,0,0] -> 3)
|
|
183
|
+
R Purine A G ([1,0,1,0] -> 5)
|
|
184
|
+
W Weak A T ([1,0,0,1] -> 9)
|
|
185
|
+
S Strong C G ([0,1,1,0] -> 6)
|
|
186
|
+
Y Pyrimidine C T ([0,1,0,1] -> 10)
|
|
187
|
+
K Keto G T ([0,0,1,1] -> 12)
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def __init__(self, mapping : AlphabetMapping) -> None:
|
|
192
|
+
"""
|
|
193
|
+
Initialize this Alphabet object with a mapping of choice. May be from
|
|
194
|
+
any of the predefined mappings {DNA, RNA, PROTEIN, CODON}, or it
|
|
195
|
+
can be a special user defined alphabet.
|
|
196
|
+
|
|
197
|
+
For SNP alphabets, use the helper function 'snp_alphabet' with your
|
|
198
|
+
desired ploidy upperbound and generate a custom alphabet that way.
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
mapping (AlphabetMapping): Any of the constant type alphabets
|
|
203
|
+
(from the set {DNA, RNA, PROTEIN,
|
|
204
|
+
CODON}), or a user defined alphabet.
|
|
205
|
+
Returns:
|
|
206
|
+
N/A
|
|
207
|
+
"""
|
|
208
|
+
|
|
209
|
+
self.alphabet : AlphabetMapping = mapping
|
|
210
|
+
|
|
211
|
+
# Use pre-computed reverse mapping for standard alphabets,
|
|
212
|
+
# only compute for user-defined alphabets
|
|
213
|
+
if self.alphabet.name in _REVERSE_MAPPINGS.keys():
|
|
214
|
+
self._reverse_mapping : AlphabetMapping = _REVERSE_MAPPINGS[self.alphabet.name]
|
|
215
|
+
else:
|
|
216
|
+
# Build reverse mapping dictionary for user-defined alphabets
|
|
217
|
+
# Note: If multiple characters map to the same state, we keep the first
|
|
218
|
+
# character encountered (maintains original behavior)
|
|
219
|
+
self._reverse_mapping : AlphabetMapping = AlphabetMapping("USER_REVERSE", _build_reverse_mapping(mapping.mapping))
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def map(self, char : str) -> int:
|
|
223
|
+
"""
|
|
224
|
+
Return mapping for a character encountered in a nexus file
|
|
225
|
+
|
|
226
|
+
Raises:
|
|
227
|
+
AlphabetError: if the char encountered is undefined for the data
|
|
228
|
+
mapping.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
char (str): nexus file matrix data point
|
|
232
|
+
Returns:
|
|
233
|
+
int: the integer corresponding to char in the alphabet mapping
|
|
234
|
+
"""
|
|
235
|
+
try:
|
|
236
|
+
return self.alphabet.mapping[char.upper()]
|
|
237
|
+
except KeyError:
|
|
238
|
+
raise AlphabetError("Attempted to map <" + char + ">. That \
|
|
239
|
+
character is invalid for this alphabet")
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def get_type(self) -> str:
|
|
243
|
+
"""
|
|
244
|
+
Returns a string that is equal to the alphabet constant name.
|
|
245
|
+
|
|
246
|
+
ie. if one is using the DNA alphabet,
|
|
247
|
+
this function will return "DNA"
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
N/A
|
|
251
|
+
Returns:
|
|
252
|
+
str: the type of alphabet being used
|
|
253
|
+
"""
|
|
254
|
+
return self.alphabet.name
|
|
255
|
+
|
|
256
|
+
def reverse_map(self, state : int) -> str:
|
|
257
|
+
"""
|
|
258
|
+
Get the character that maps to "state" in the given alphabet
|
|
259
|
+
|
|
260
|
+
Raises:
|
|
261
|
+
AlphabetError: if the provided state is not a valid one in the
|
|
262
|
+
alphabet
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
state (int): a value in the alphabet map
|
|
266
|
+
Returns:
|
|
267
|
+
str: the key that maps to "state"
|
|
268
|
+
"""
|
|
269
|
+
try:
|
|
270
|
+
return self._reverse_mapping.mapping[state]
|
|
271
|
+
except KeyError:
|
|
272
|
+
raise AlphabetError("Given state does not exist in alphabet")
|
|
273
|
+
|