phynetpy 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
phynetpy/Alphabet.py ADDED
@@ -0,0 +1,273 @@
1
+ #! /usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ ##############################################################################
5
+ ## -- PhyNetPy --
6
+ ## Library for the Development and use of Phylogenetic Network Methods
7
+ ##
8
+ ## Copyright 2025 Mark Kessler, Luay Nakhleh.
9
+ ## All rights reserved.
10
+ ##
11
+ ## See "LICENSE.txt" for terms and conditions of usage.
12
+ ##
13
+ ## If you use this work or any portion thereof in published work,
14
+ ## please cite it as:
15
+ ##
16
+ ## Mark Kessler, Luay Nakhleh. 2025.
17
+ ##
18
+ ##############################################################################
19
+
20
+ """
21
+ Author : Mark Kessler
22
+ Last Edit : 11/6/25
23
+ First Included in Version : 1.0.0
24
+ Docs - [x]
25
+ Tests - [x] Passed 5/5 tests with 100% coverage on 11/7/25
26
+ Design - [x]
27
+ """
28
+
29
+ from dataclasses import dataclass
30
+
31
+
32
+ ########################
33
+ ### MODULE CONSTANTS ###
34
+ ########################
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class AlphabetMapping:
39
+ name : str
40
+ mapping : dict[str, int]
41
+
42
+ DNA : AlphabetMapping = AlphabetMapping("DNA",
43
+ { "-" : 0, "A" : 1, "C" : 2, "M" : 3, "G" : 4, "R" : 5, "S" : 6,
44
+ "V" : 7, "T" : 8, "W" : 9, "Y" : 10, "H" : 11, "K" : 12,
45
+ "D" : 13, "B" : 14, "X" : 15})
46
+
47
+ RNA : AlphabetMapping = AlphabetMapping("RNA",
48
+ {"-" : 0,"A" : 1, "C" : 2, "M" : 3, "G" : 4, "R" : 5, "S" : 6,
49
+ "V" : 7, "U" : 8, "W" : 9, "Y" : 10, "H" : 11,
50
+ "K" : 12, "D" : 13, "B" : 14, "X" : 15})
51
+
52
+ PROTEIN : AlphabetMapping = AlphabetMapping("PROTEIN", {"-" : 0, "A" : 1, "B" : 2, "C" : 3, "D" : 4,
53
+ "E" : 5, "F" : 6, "G" : 7, "H" : 8, "I" : 9,
54
+ "J" : 10, "K" : 11, "L" : 12, "M" : 13, "N" : 14,
55
+ "P" : 15, "Q" : 16, "R" : 17, "S" : 18, "T" : 19,
56
+ "V" : 20, "W" : 21, "X" : 22, "Y" : 23, "Z" : 24,
57
+ "." : 25})
58
+
59
+ CODON : AlphabetMapping = AlphabetMapping("CODON", {"-" : 0, "A" : 1, "C" : 2, "M" : 3, "G" : 4, "R" : 5,
60
+ "S" : 6, "V" : 7, "T" : 8, "W" : 9, "Y" : 10,
61
+ "H" : 11, "K" : 12, "D" : 13, "B" : 14, "." : 15})
62
+
63
+ _ALPHABETS : list[AlphabetMapping] = [DNA, RNA, PROTEIN, CODON]
64
+
65
+ _ALPHABET_NAMES : list[str] = ["DNA", "RNA", "PROTEIN", "CODON"]
66
+
67
+ # Hard-coded reverse mappings for standard alphabets
68
+ _DNA_REVERSE : dict[int, str] = AlphabetMapping("DNA_REVERSE",
69
+ {0: "-", 1: "A", 2: "C", 3: "M", 4: "G", 5: "R", 6: "S",
70
+ 7: "V", 8: "T", 9: "W", 10: "Y", 11: "H", 12: "K",
71
+ 13: "D", 14: "B", 15: "X"})
72
+
73
+ _RNA_REVERSE: dict[int, str] = AlphabetMapping("RNA_REVERSE",
74
+ {0: "-", 1: "A", 2: "C", 3: "M", 4: "G", 5: "R", 6: "S",
75
+ 7: "V", 8: "U", 9: "W", 10: "Y", 11: "H", 12: "K",
76
+ 13: "D", 14: "B", 15: "X"})
77
+
78
+ _PROTEIN_REVERSE: dict[int, str] = AlphabetMapping("PROTEIN_REVERSE",
79
+ {0: "-", 1: "A", 2: "B", 3: "C", 4: "D", 5: "E", 6: "F",
80
+ 7: "G", 8: "H", 9: "I", 10: "J", 11: "K", 12: "L",
81
+ 13: "M", 14: "N", 15: "P", 16: "Q", 17: "R", 18: "S",
82
+ 19: "T", 20: "V", 21: "W", 22: "X", 23: "Y", 24: "Z",
83
+ 25: "."})
84
+
85
+ _CODON_REVERSE: dict[int, str] = AlphabetMapping("CODON_REVERSE",
86
+ {0: "-", 1: "A", 2: "C", 3: "M", 4: "G", 5: "R", 6: "S",
87
+ 7: "V", 8: "T", 9: "W", 10: "Y", 11: "H", 12: "K",
88
+ 13: "D", 14: "B", 15: "."})
89
+
90
+ _REVERSE_MAPPINGS : [str, AlphabetMapping] = {DNA.name: _DNA_REVERSE, RNA.name: _RNA_REVERSE, PROTEIN.name: _PROTEIN_REVERSE, CODON.name: _CODON_REVERSE}
91
+
92
+
93
+ #########################
94
+ #### EXCEPTION CLASS ####
95
+ #########################
96
+
97
+ class AlphabetError(Exception):
98
+ """
99
+ Error class for all errors relating to alphabet mappings.
100
+ """
101
+ def __init__(self, message : str = "Error during Alphabet class mapping\
102
+ operation") -> None:
103
+ """
104
+ Initialize an AlphabetError with a message.
105
+
106
+ Args:
107
+ message (str): error message
108
+ Returns:
109
+ N/A
110
+ """
111
+ self.message = message
112
+ super().__init__(self.message)
113
+
114
+ ##########################
115
+ #### HELPER FUNCTIONS ####
116
+ ##########################
117
+
118
+ def _build_reverse_mapping(mapping: dict[str, int]) -> dict[int, str]:
119
+ """
120
+ Build a reverse mapping dictionary from state to character.
121
+ If multiple characters map to the same state, keeps the first one encountered.
122
+
123
+ Args:
124
+ mapping (dict[str, int]): Dictionary mapping characters to states
125
+ Returns:
126
+ dict[int, str]: Dictionary mapping states to characters
127
+ """
128
+ reverse_mapping : dict[int, str] = {}
129
+ for char, state in mapping.items():
130
+ if state not in reverse_mapping:
131
+ reverse_mapping[state] = char
132
+ return reverse_mapping
133
+
134
+ def snp_alphabet(ploidy : int) -> AlphabetMapping:
135
+ """
136
+ For SNP alphabet initialization. For data sets in which the maximum ploidy
137
+ is Xn, use X as @ploidy.
138
+
139
+ For phased SNP data, use 1. For unphased SNP data, use 2.
140
+
141
+ Args:
142
+ ploidy (int): The ploidyness value of a species
143
+ (ie, humans = 2, some plants > 2, etc)
144
+
145
+ Returns:
146
+ dict[str, int]: Returns an SNP alphabet map that maps str(int)->int
147
+ for 0 <= int <= ploidy, plus the various extra character mappings.
148
+
149
+ """
150
+ alphabet : dict[str, int] = {}
151
+ for num in range(ploidy + 1):
152
+ alphabet[str(num)] = num
153
+
154
+ alphabet["-"] = ploidy + 1
155
+
156
+ return AlphabetMapping("SNP", alphabet)
157
+
158
+ ########################
159
+ #### ALPHABET CLASS ####
160
+ ########################
161
+
162
+ class Alphabet:
163
+ """
164
+ Class that deals with the mapping from characters to state values that
165
+ have partial likelihood values associated with them.
166
+ This state mapping is primarily based on Base10 -> Binary
167
+ conversions such that the decimal numbers become a generalized
168
+ version of the one-hot encoding scheme.
169
+
170
+ DNA MAPPING INFORMATION
171
+ Symbol(s) Name Partial Likelihood
172
+ A Adenine [1,0,0,0] -> 1
173
+ C Cytosine [0,1,0,0] -> 2
174
+ G Guanine [0,0,1,0] -> 4
175
+ T U Thymine [0,0,0,1] -> 8
176
+ Symbol(s) Name Partial Likelihood
177
+ X Any A C G T ([1,1,1,1] -> 15)
178
+ V Not T A C G ([1,1,1,0] -> 7)
179
+ H Not G A C T ([1,1,0,1] -> 11)
180
+ D Not C A G T ([1,0,1,1] -> 13)
181
+ B Not A C G T ([0,1,1,1] -> 14)
182
+ M Amino A C ([1,1,0,0] -> 3)
183
+ R Purine A G ([1,0,1,0] -> 5)
184
+ W Weak A T ([1,0,0,1] -> 9)
185
+ S Strong C G ([0,1,1,0] -> 6)
186
+ Y Pyrimidine C T ([0,1,0,1] -> 10)
187
+ K Keto G T ([0,0,1,1] -> 12)
188
+ """
189
+
190
+
191
+ def __init__(self, mapping : AlphabetMapping) -> None:
192
+ """
193
+ Initialize this Alphabet object with a mapping of choice. May be from
194
+ any of the predefined mappings {DNA, RNA, PROTEIN, CODON}, or it
195
+ can be a special user defined alphabet.
196
+
197
+ For SNP alphabets, use the helper function 'snp_alphabet' with your
198
+ desired ploidy upperbound and generate a custom alphabet that way.
199
+
200
+
201
+ Args:
202
+ mapping (AlphabetMapping): Any of the constant type alphabets
203
+ (from the set {DNA, RNA, PROTEIN,
204
+ CODON}), or a user defined alphabet.
205
+ Returns:
206
+ N/A
207
+ """
208
+
209
+ self.alphabet : AlphabetMapping = mapping
210
+
211
+ # Use pre-computed reverse mapping for standard alphabets,
212
+ # only compute for user-defined alphabets
213
+ if self.alphabet.name in _REVERSE_MAPPINGS.keys():
214
+ self._reverse_mapping : AlphabetMapping = _REVERSE_MAPPINGS[self.alphabet.name]
215
+ else:
216
+ # Build reverse mapping dictionary for user-defined alphabets
217
+ # Note: If multiple characters map to the same state, we keep the first
218
+ # character encountered (maintains original behavior)
219
+ self._reverse_mapping : AlphabetMapping = AlphabetMapping("USER_REVERSE", _build_reverse_mapping(mapping.mapping))
220
+
221
+
222
+ def map(self, char : str) -> int:
223
+ """
224
+ Return mapping for a character encountered in a nexus file
225
+
226
+ Raises:
227
+ AlphabetError: if the char encountered is undefined for the data
228
+ mapping.
229
+
230
+ Args:
231
+ char (str): nexus file matrix data point
232
+ Returns:
233
+ int: the integer corresponding to char in the alphabet mapping
234
+ """
235
+ try:
236
+ return self.alphabet.mapping[char.upper()]
237
+ except KeyError:
238
+ raise AlphabetError("Attempted to map <" + char + ">. That \
239
+ character is invalid for this alphabet")
240
+
241
+
242
+ def get_type(self) -> str:
243
+ """
244
+ Returns a string that is equal to the alphabet constant name.
245
+
246
+ ie. if one is using the DNA alphabet,
247
+ this function will return "DNA"
248
+
249
+ Args:
250
+ N/A
251
+ Returns:
252
+ str: the type of alphabet being used
253
+ """
254
+ return self.alphabet.name
255
+
256
+ def reverse_map(self, state : int) -> str:
257
+ """
258
+ Get the character that maps to "state" in the given alphabet
259
+
260
+ Raises:
261
+ AlphabetError: if the provided state is not a valid one in the
262
+ alphabet
263
+
264
+ Args:
265
+ state (int): a value in the alphabet map
266
+ Returns:
267
+ str: the key that maps to "state"
268
+ """
269
+ try:
270
+ return self._reverse_mapping.mapping[state]
271
+ except KeyError:
272
+ raise AlphabetError("Given state does not exist in alphabet")
273
+