msasim 25.5.3__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
_Sailfish/__init__.pyi ADDED
@@ -0,0 +1,434 @@
1
+ """
2
+
3
+ Sailfish simulator
4
+ -----------------------
5
+
6
+ .. currentmodule:: _Sailfish
7
+
8
+ .. autosummary::
9
+ :toctree: _generate
10
+
11
+ DiscreteDistribution
12
+ SimProtocol
13
+ alphabetCode
14
+ modelCode
15
+ modelFactory
16
+ Simulator
17
+ Msa
18
+ Tree
19
+
20
+ """
21
+ from __future__ import annotations
22
+ import pybind11_stubgen.typing_ext
23
+ import typing
24
+ __all__ = ['AAJC', 'AMINOACID', 'Block', 'BlockTree', 'CPREV45', 'CUSTOM', 'DAYHOFF', 'Deletion', 'DiscreteDistribution', 'EHO_EXTENDED', 'EHO_HELIX', 'EHO_OTHER', 'EMPIRICODON', 'EX_BURIED', 'EX_EHO_BUR_EXT', 'EX_EHO_BUR_HEL', 'EX_EHO_BUR_OTH', 'EX_EHO_EXP_EXT', 'EX_EHO_EXP_HEL', 'EX_EHO_EXP_OTH', 'EX_EXPOSED', 'GTR', 'HIVB', 'HIVW', 'HKY', 'Insertion', 'JONES', 'LG', 'MTREV24', 'Msa', 'NUCJC', 'NUCLEOTIDE', 'NULLCODE', 'SimProtocol', 'Simulator', 'TAMURA92', 'Tree', 'WAG', 'alphabetCode', 'event', 'modelCode', 'modelFactory', 'node', 'sequenceContainer']
25
+ class Block:
26
+ @staticmethod
27
+ def _pybind11_conduit_v1_(*args, **kwargs):
28
+ ...
29
+ def __init__(self, arg0: int, arg1: int) -> None:
30
+ ...
31
+ class BlockTree:
32
+ @staticmethod
33
+ def _pybind11_conduit_v1_(*args, **kwargs):
34
+ ...
35
+ def __init__(self, arg0: int) -> None:
36
+ ...
37
+ def block_list(self) -> list[typing.Annotated[list[int], pybind11_stubgen.typing_ext.FixedSize(3)]]:
38
+ ...
39
+ def print_tree(self) -> str:
40
+ ...
41
+ class DiscreteDistribution:
42
+ @staticmethod
43
+ def _pybind11_conduit_v1_(*args, **kwargs):
44
+ ...
45
+ @staticmethod
46
+ def set_seed(arg0: int) -> None:
47
+ """
48
+ Set seed for the random number generator
49
+ """
50
+ def __init__(self, arg0: list[float]) -> None:
51
+ ...
52
+ def draw_sample(self) -> int:
53
+ """
54
+ Draw a random sample according to the given distribution
55
+ """
56
+ def get_table(self) -> list[tuple[float, int]]:
57
+ """
58
+ Get Vose's alias table (useful for debugging)
59
+ """
60
+ class Msa:
61
+ @staticmethod
62
+ def _pybind11_conduit_v1_(*args, **kwargs):
63
+ ...
64
+ @typing.overload
65
+ def __init__(self, arg0: int, arg1: int, arg2: list[bool]) -> None:
66
+ ...
67
+ @typing.overload
68
+ def __init__(self, arg0: dict[int, tuple[list[typing.Annotated[list[int], pybind11_stubgen.typing_ext.FixedSize(3)]], int]], arg1: node, arg2: list[bool]) -> None:
69
+ ...
70
+ def fill_substitutions(self, arg0: sequenceContainer) -> None:
71
+ ...
72
+ def generate_msas(self: list[dict[int, tuple[list[typing.Annotated[list[int], pybind11_stubgen.typing_ext.FixedSize(3)]], int]]], arg0: node, arg1: list[bool]) -> list[Msa]:
73
+ ...
74
+ def get_msa(self) -> dict[int, list[int]]:
75
+ ...
76
+ def get_msa_string(self) -> str:
77
+ ...
78
+ def length(self) -> int:
79
+ ...
80
+ def num_sequences(self) -> int:
81
+ ...
82
+ def print_indels(self) -> None:
83
+ ...
84
+ def print_msa(self) -> None:
85
+ ...
86
+ def set_substitutions_folder(self, arg0: str) -> None:
87
+ ...
88
+ def write_msa(self, arg0: str) -> None:
89
+ ...
90
+ def write_msa_from_dir(self, arg0: str) -> None:
91
+ ...
92
+ class SimProtocol:
93
+ @staticmethod
94
+ def _pybind11_conduit_v1_(*args, **kwargs):
95
+ ...
96
+ def __init__(self, arg0: Tree) -> None:
97
+ ...
98
+ def get_deletion_length_distribution(self, arg0: int) -> DiscreteDistribution:
99
+ ...
100
+ def get_deletion_rate(self, arg0: int) -> float:
101
+ ...
102
+ def get_insertion_length_distribution(self, arg0: int) -> DiscreteDistribution:
103
+ ...
104
+ def get_insertion_rate(self, arg0: int) -> float:
105
+ ...
106
+ def get_minimum_sequence_size(self) -> int:
107
+ ...
108
+ def get_seed(self) -> int:
109
+ ...
110
+ def get_sequence_size(self) -> int:
111
+ ...
112
+ def set_deletion_length_distributions(self, arg0: list[DiscreteDistribution]) -> None:
113
+ ...
114
+ def set_deletion_rates(self, arg0: list[float]) -> None:
115
+ ...
116
+ def set_insertion_length_distributions(self, arg0: list[DiscreteDistribution]) -> None:
117
+ ...
118
+ def set_insertion_rates(self, arg0: list[float]) -> None:
119
+ ...
120
+ def set_minimum_sequence_size(self, arg0: int) -> None:
121
+ ...
122
+ def set_seed(self, arg0: int) -> None:
123
+ ...
124
+ def set_sequence_size(self, arg0: int) -> None:
125
+ ...
126
+ class Simulator:
127
+ @staticmethod
128
+ def _pybind11_conduit_v1_(*args, **kwargs):
129
+ ...
130
+ def __init__(self, arg0: SimProtocol) -> None:
131
+ ...
132
+ def gen_indels(self) -> dict[int, tuple[list[typing.Annotated[list[int], pybind11_stubgen.typing_ext.FixedSize(3)]], int]]:
133
+ ...
134
+ def gen_substitutions(self, arg0: int) -> sequenceContainer:
135
+ ...
136
+ def gen_substitutions_to_dir(self, arg0: int, arg1: str) -> None:
137
+ ...
138
+ def get_saved_nodes_mask(self) -> list[bool]:
139
+ ...
140
+ def get_site_rates(self) -> list[float]:
141
+ ...
142
+ def init_substitution_sim(self, arg0: modelFactory) -> None:
143
+ ...
144
+ def reset_sim(self, arg0: SimProtocol) -> None:
145
+ ...
146
+ def run_sim(self, arg0: int) -> list[dict[int, tuple[list[typing.Annotated[list[int], pybind11_stubgen.typing_ext.FixedSize(3)]], int]]]:
147
+ ...
148
+ def save_all_nodes_sequences(self) -> None:
149
+ ...
150
+ def save_root_sequence(self) -> None:
151
+ ...
152
+ def save_site_rates(self, arg0: bool) -> None:
153
+ ...
154
+ class Tree:
155
+ @staticmethod
156
+ def _pybind11_conduit_v1_(*args, **kwargs):
157
+ ...
158
+ def __init__(self, arg0: str, arg1: bool) -> None:
159
+ """
160
+ Create Phylogenetic tree object from newick formatted file
161
+ """
162
+ @property
163
+ def num_nodes(self) -> int:
164
+ ...
165
+ @property
166
+ def root(self) -> ...:
167
+ ...
168
+ class alphabetCode:
169
+ """
170
+ Members:
171
+
172
+ NULLCODE
173
+
174
+ NUCLEOTIDE
175
+
176
+ AMINOACID
177
+ """
178
+ AMINOACID: typing.ClassVar[alphabetCode] # value = <alphabetCode.AMINOACID: 2>
179
+ NUCLEOTIDE: typing.ClassVar[alphabetCode] # value = <alphabetCode.NUCLEOTIDE: 1>
180
+ NULLCODE: typing.ClassVar[alphabetCode] # value = <alphabetCode.NULLCODE: 0>
181
+ __members__: typing.ClassVar[dict[str, alphabetCode]] # value = {'NULLCODE': <alphabetCode.NULLCODE: 0>, 'NUCLEOTIDE': <alphabetCode.NUCLEOTIDE: 1>, 'AMINOACID': <alphabetCode.AMINOACID: 2>}
182
+ @staticmethod
183
+ def _pybind11_conduit_v1_(*args, **kwargs):
184
+ ...
185
+ def __eq__(self, other: typing.Any) -> bool:
186
+ ...
187
+ def __getstate__(self) -> int:
188
+ ...
189
+ def __hash__(self) -> int:
190
+ ...
191
+ def __index__(self) -> int:
192
+ ...
193
+ def __init__(self, value: int) -> None:
194
+ ...
195
+ def __int__(self) -> int:
196
+ ...
197
+ def __ne__(self, other: typing.Any) -> bool:
198
+ ...
199
+ def __repr__(self) -> str:
200
+ ...
201
+ def __setstate__(self, state: int) -> None:
202
+ ...
203
+ def __str__(self) -> str:
204
+ ...
205
+ @property
206
+ def name(self) -> str:
207
+ ...
208
+ @property
209
+ def value(self) -> int:
210
+ ...
211
+ class event:
212
+ """
213
+ Members:
214
+
215
+ Insertion
216
+
217
+ Deletion
218
+ """
219
+ Deletion: typing.ClassVar[event] # value = <event.Deletion: 1>
220
+ Insertion: typing.ClassVar[event] # value = <event.Insertion: 0>
221
+ __members__: typing.ClassVar[dict[str, event]] # value = {'Insertion': <event.Insertion: 0>, 'Deletion': <event.Deletion: 1>}
222
+ @staticmethod
223
+ def _pybind11_conduit_v1_(*args, **kwargs):
224
+ ...
225
+ def __eq__(self, other: typing.Any) -> bool:
226
+ ...
227
+ def __getstate__(self) -> int:
228
+ ...
229
+ def __hash__(self) -> int:
230
+ ...
231
+ def __index__(self) -> int:
232
+ ...
233
+ def __init__(self, value: int) -> None:
234
+ ...
235
+ def __int__(self) -> int:
236
+ ...
237
+ def __ne__(self, other: typing.Any) -> bool:
238
+ ...
239
+ def __repr__(self) -> str:
240
+ ...
241
+ def __setstate__(self, state: int) -> None:
242
+ ...
243
+ def __str__(self) -> str:
244
+ ...
245
+ @property
246
+ def name(self) -> str:
247
+ ...
248
+ @property
249
+ def value(self) -> int:
250
+ ...
251
+ class modelCode:
252
+ """
253
+ Members:
254
+
255
+ NUCJC
256
+
257
+ AAJC
258
+
259
+ GTR
260
+
261
+ HKY
262
+
263
+ TAMURA92
264
+
265
+ CPREV45
266
+
267
+ DAYHOFF
268
+
269
+ JONES
270
+
271
+ MTREV24
272
+
273
+ WAG
274
+
275
+ HIVB
276
+
277
+ HIVW
278
+
279
+ LG
280
+
281
+ EMPIRICODON
282
+
283
+ EX_BURIED
284
+
285
+ EX_EXPOSED
286
+
287
+ EHO_EXTENDED
288
+
289
+ EHO_HELIX
290
+
291
+ EHO_OTHER
292
+
293
+ EX_EHO_BUR_EXT
294
+
295
+ EX_EHO_BUR_HEL
296
+
297
+ EX_EHO_BUR_OTH
298
+
299
+ EX_EHO_EXP_EXT
300
+
301
+ EX_EHO_EXP_HEL
302
+
303
+ EX_EHO_EXP_OTH
304
+
305
+ CUSTOM
306
+ """
307
+ AAJC: typing.ClassVar[modelCode] # value = <modelCode.AAJC: 1>
308
+ CPREV45: typing.ClassVar[modelCode] # value = <modelCode.CPREV45: 6>
309
+ CUSTOM: typing.ClassVar[modelCode] # value = <modelCode.CUSTOM: 26>
310
+ DAYHOFF: typing.ClassVar[modelCode] # value = <modelCode.DAYHOFF: 7>
311
+ EHO_EXTENDED: typing.ClassVar[modelCode] # value = <modelCode.EHO_EXTENDED: 17>
312
+ EHO_HELIX: typing.ClassVar[modelCode] # value = <modelCode.EHO_HELIX: 18>
313
+ EHO_OTHER: typing.ClassVar[modelCode] # value = <modelCode.EHO_OTHER: 19>
314
+ EMPIRICODON: typing.ClassVar[modelCode] # value = <modelCode.EMPIRICODON: 14>
315
+ EX_BURIED: typing.ClassVar[modelCode] # value = <modelCode.EX_BURIED: 15>
316
+ EX_EHO_BUR_EXT: typing.ClassVar[modelCode] # value = <modelCode.EX_EHO_BUR_EXT: 20>
317
+ EX_EHO_BUR_HEL: typing.ClassVar[modelCode] # value = <modelCode.EX_EHO_BUR_HEL: 21>
318
+ EX_EHO_BUR_OTH: typing.ClassVar[modelCode] # value = <modelCode.EX_EHO_BUR_OTH: 22>
319
+ EX_EHO_EXP_EXT: typing.ClassVar[modelCode] # value = <modelCode.EX_EHO_EXP_EXT: 23>
320
+ EX_EHO_EXP_HEL: typing.ClassVar[modelCode] # value = <modelCode.EX_EHO_EXP_HEL: 24>
321
+ EX_EHO_EXP_OTH: typing.ClassVar[modelCode] # value = <modelCode.EX_EHO_EXP_OTH: 25>
322
+ EX_EXPOSED: typing.ClassVar[modelCode] # value = <modelCode.EX_EXPOSED: 16>
323
+ GTR: typing.ClassVar[modelCode] # value = <modelCode.GTR: 2>
324
+ HIVB: typing.ClassVar[modelCode] # value = <modelCode.HIVB: 11>
325
+ HIVW: typing.ClassVar[modelCode] # value = <modelCode.HIVW: 12>
326
+ HKY: typing.ClassVar[modelCode] # value = <modelCode.HKY: 3>
327
+ JONES: typing.ClassVar[modelCode] # value = <modelCode.JONES: 8>
328
+ LG: typing.ClassVar[modelCode] # value = <modelCode.LG: 13>
329
+ MTREV24: typing.ClassVar[modelCode] # value = <modelCode.MTREV24: 9>
330
+ NUCJC: typing.ClassVar[modelCode] # value = <modelCode.NUCJC: 0>
331
+ TAMURA92: typing.ClassVar[modelCode] # value = <modelCode.TAMURA92: 4>
332
+ WAG: typing.ClassVar[modelCode] # value = <modelCode.WAG: 10>
333
+ __members__: typing.ClassVar[dict[str, modelCode]] # value = {'NUCJC': <modelCode.NUCJC: 0>, 'AAJC': <modelCode.AAJC: 1>, 'GTR': <modelCode.GTR: 2>, 'HKY': <modelCode.HKY: 3>, 'TAMURA92': <modelCode.TAMURA92: 4>, 'CPREV45': <modelCode.CPREV45: 6>, 'DAYHOFF': <modelCode.DAYHOFF: 7>, 'JONES': <modelCode.JONES: 8>, 'MTREV24': <modelCode.MTREV24: 9>, 'WAG': <modelCode.WAG: 10>, 'HIVB': <modelCode.HIVB: 11>, 'HIVW': <modelCode.HIVW: 12>, 'LG': <modelCode.LG: 13>, 'EMPIRICODON': <modelCode.EMPIRICODON: 14>, 'EX_BURIED': <modelCode.EX_BURIED: 15>, 'EX_EXPOSED': <modelCode.EX_EXPOSED: 16>, 'EHO_EXTENDED': <modelCode.EHO_EXTENDED: 17>, 'EHO_HELIX': <modelCode.EHO_HELIX: 18>, 'EHO_OTHER': <modelCode.EHO_OTHER: 19>, 'EX_EHO_BUR_EXT': <modelCode.EX_EHO_BUR_EXT: 20>, 'EX_EHO_BUR_HEL': <modelCode.EX_EHO_BUR_HEL: 21>, 'EX_EHO_BUR_OTH': <modelCode.EX_EHO_BUR_OTH: 22>, 'EX_EHO_EXP_EXT': <modelCode.EX_EHO_EXP_EXT: 23>, 'EX_EHO_EXP_HEL': <modelCode.EX_EHO_EXP_HEL: 24>, 'EX_EHO_EXP_OTH': <modelCode.EX_EHO_EXP_OTH: 25>, 'CUSTOM': <modelCode.CUSTOM: 26>}
334
+ @staticmethod
335
+ def _pybind11_conduit_v1_(*args, **kwargs):
336
+ ...
337
+ def __eq__(self, other: typing.Any) -> bool:
338
+ ...
339
+ def __getstate__(self) -> int:
340
+ ...
341
+ def __hash__(self) -> int:
342
+ ...
343
+ def __index__(self) -> int:
344
+ ...
345
+ def __init__(self, value: int) -> None:
346
+ ...
347
+ def __int__(self) -> int:
348
+ ...
349
+ def __ne__(self, other: typing.Any) -> bool:
350
+ ...
351
+ def __repr__(self) -> str:
352
+ ...
353
+ def __setstate__(self, state: int) -> None:
354
+ ...
355
+ def __str__(self) -> str:
356
+ ...
357
+ @property
358
+ def name(self) -> str:
359
+ ...
360
+ @property
361
+ def value(self) -> int:
362
+ ...
363
+ class modelFactory:
364
+ @staticmethod
365
+ def _pybind11_conduit_v1_(*args, **kwargs):
366
+ ...
367
+ def __init__(self, arg0: Tree) -> None:
368
+ ...
369
+ def reset(self) -> None:
370
+ ...
371
+ def set_alphabet(self, arg0: alphabetCode) -> None:
372
+ ...
373
+ def set_amino_replacement_model_file(self, arg0: str) -> None:
374
+ ...
375
+ def set_gamma_parameters(self, arg0: float, arg1: int) -> None:
376
+ ...
377
+ def set_invariant_sites_proportion(self, arg0: float) -> None:
378
+ ...
379
+ def set_model_parameters(self, arg0: list[float]) -> None:
380
+ ...
381
+ def set_replacement_model(self, arg0: modelCode) -> None:
382
+ ...
383
+ class node:
384
+ @staticmethod
385
+ def _pybind11_conduit_v1_(*args, **kwargs):
386
+ ...
387
+ def distance_to_father(self) -> float:
388
+ ...
389
+ @property
390
+ def name(self) -> str:
391
+ ...
392
+ @property
393
+ def num_leaves(self) -> int:
394
+ ...
395
+ @property
396
+ def sons(self) -> list[node]:
397
+ ...
398
+ class sequenceContainer:
399
+ @staticmethod
400
+ def _pybind11_conduit_v1_(*args, **kwargs):
401
+ ...
402
+ def __init__(self) -> None:
403
+ ...
404
+ AAJC: modelCode # value = <modelCode.AAJC: 1>
405
+ AMINOACID: alphabetCode # value = <alphabetCode.AMINOACID: 2>
406
+ CPREV45: modelCode # value = <modelCode.CPREV45: 6>
407
+ CUSTOM: modelCode # value = <modelCode.CUSTOM: 26>
408
+ DAYHOFF: modelCode # value = <modelCode.DAYHOFF: 7>
409
+ Deletion: event # value = <event.Deletion: 1>
410
+ EHO_EXTENDED: modelCode # value = <modelCode.EHO_EXTENDED: 17>
411
+ EHO_HELIX: modelCode # value = <modelCode.EHO_HELIX: 18>
412
+ EHO_OTHER: modelCode # value = <modelCode.EHO_OTHER: 19>
413
+ EMPIRICODON: modelCode # value = <modelCode.EMPIRICODON: 14>
414
+ EX_BURIED: modelCode # value = <modelCode.EX_BURIED: 15>
415
+ EX_EHO_BUR_EXT: modelCode # value = <modelCode.EX_EHO_BUR_EXT: 20>
416
+ EX_EHO_BUR_HEL: modelCode # value = <modelCode.EX_EHO_BUR_HEL: 21>
417
+ EX_EHO_BUR_OTH: modelCode # value = <modelCode.EX_EHO_BUR_OTH: 22>
418
+ EX_EHO_EXP_EXT: modelCode # value = <modelCode.EX_EHO_EXP_EXT: 23>
419
+ EX_EHO_EXP_HEL: modelCode # value = <modelCode.EX_EHO_EXP_HEL: 24>
420
+ EX_EHO_EXP_OTH: modelCode # value = <modelCode.EX_EHO_EXP_OTH: 25>
421
+ EX_EXPOSED: modelCode # value = <modelCode.EX_EXPOSED: 16>
422
+ GTR: modelCode # value = <modelCode.GTR: 2>
423
+ HIVB: modelCode # value = <modelCode.HIVB: 11>
424
+ HIVW: modelCode # value = <modelCode.HIVW: 12>
425
+ HKY: modelCode # value = <modelCode.HKY: 3>
426
+ Insertion: event # value = <event.Insertion: 0>
427
+ JONES: modelCode # value = <modelCode.JONES: 8>
428
+ LG: modelCode # value = <modelCode.LG: 13>
429
+ MTREV24: modelCode # value = <modelCode.MTREV24: 9>
430
+ NUCJC: modelCode # value = <modelCode.NUCJC: 0>
431
+ NUCLEOTIDE: alphabetCode # value = <alphabetCode.NUCLEOTIDE: 1>
432
+ NULLCODE: alphabetCode # value = <alphabetCode.NULLCODE: 0>
433
+ TAMURA92: modelCode # value = <modelCode.TAMURA92: 4>
434
+ WAG: modelCode # value = <modelCode.WAG: 10>
_Sailfish/py.typed ADDED
File without changes
Binary file
msasim/__init__.py ADDED
File without changes
msasim/sailfish.py ADDED
@@ -0,0 +1,576 @@
1
+ import _Sailfish
2
+ import os, warnings, math, operator, time, profile, tempfile, pathlib
3
+ from functools import reduce
4
+ from typing import List, Optional, Dict
5
+ from re import split
6
+ from enum import Enum
7
+
8
+
9
+
10
+ MODEL_CODES = _Sailfish.modelCode
11
+
12
+ class SIMULATION_TYPE(Enum):
13
+ NOSUBS = 0
14
+ DNA = 1
15
+ PROTEIN = 2
16
+
17
+ class Distribution:
18
+ def set_dist(self, dist):
19
+ # sum should be "around" 1
20
+ epsilon = 10e-6
21
+ if abs(sum(dist)-1) > epsilon:
22
+ raise ValueError(f"Sum of the distribution should be 1 for a valid probability distribution. Input received is: {dist}, sum is {sum(dist)}")
23
+ for x in dist:
24
+ if x < 0 or x > 1:
25
+ raise ValueError(f"Each value of the probabilities should be between 0 to 1. Received a value of {x}")
26
+ self._dist = _Sailfish.DiscreteDistribution(dist)
27
+
28
+ def draw_sample(self) -> int:
29
+ return self._dist.draw_sample()
30
+
31
+ def set_seed(self, seed: int) -> None:
32
+ return self._dist.set_seed(seed)
33
+
34
+ def get_table(self) -> List:
35
+ return self._dist.get_table()
36
+
37
+ def _get_Sailfish_dist(self) -> _Sailfish.DiscreteDistribution:
38
+ return self._dist
39
+
40
+ class CustomDistribution(Distribution):
41
+ '''
42
+ Provide a custom discrete distribution to the model.
43
+ '''
44
+ def __init__(self, dist: List[float]):
45
+ self.set_dist(dist)
46
+
47
+ class GeometricDistribution(Distribution):
48
+ def __init__(self, p: float, truncation: int = 150):
49
+ """
50
+ Calculation of geoemtric moment
51
+ inputs:
52
+ p - p parameter of the geoemtric distribution
53
+ truncation - (optional, by default 150) maximal value of the distribution
54
+ """
55
+ self.p = p
56
+ self.truncation = truncation
57
+ PMF = lambda x: p*(1-p)**(x-1)
58
+ CDF = lambda x: 1-(1-p)**x
59
+ norm_factor = CDF(truncation) - CDF(0)
60
+
61
+ probabilities = [PMF(i)/norm_factor for i in range(1, truncation+1)]
62
+ # probabilities = probabilities / norm_factor
63
+
64
+ self.set_dist(probabilities)
65
+
66
+ def __repr__(self) -> str:
67
+ return f"Geometric distribution: (p={self.p}, truncation{self.truncation})"
68
+
69
+ class PoissonDistribution(Distribution):
70
+ def __init__(self, p: float, truncation: int = 150):
71
+ """
72
+ Calculation of geoemtric moment
73
+ inputs:
74
+ p - p parameter of the geoemtric distribution
75
+ truncation - (optional, by default 150) maximal value of the distribution
76
+ """
77
+ self.p = p
78
+ self.truncation = truncation
79
+
80
+ factorial = lambda z: reduce(operator.mul, [1, 1] if z == 0 else range(1,z+1))
81
+
82
+ PMF = lambda x: ((p**x)*(math.e**-p))*(1.0/factorial(x))
83
+ CDF = lambda x: (math.e**-p)*sum([(p**i)*(1.0/factorial(i)) for i in range(0,x+1)])
84
+
85
+ norm_factor = CDF(truncation) - CDF(0)
86
+
87
+ probabilities = [PMF(i)/norm_factor for i in range(1, truncation+1)]
88
+
89
+ self.set_dist(probabilities)
90
+
91
+ def __repr__(self) -> str:
92
+ return f"Poisson distribution: (p={self.p}, truncation{self.truncation})"
93
+
94
+ class ZipfDistribution(Distribution):
95
+ def __init__(self, p: float, truncation: int = 150):
96
+ """
97
+ Calculation of geoemtric moment
98
+ inputs:
99
+ p - p parameter of the geoemtric distribution
100
+ truncation - (optional, by default 150) maximal value of the distribution
101
+ """
102
+ self.p = p
103
+ self.truncation = truncation
104
+
105
+ norm_factor = sum([(i**-p) for i in range(1,truncation+1)])
106
+ probabilities = [(i**-p)/norm_factor for i in range(1, truncation+1)]
107
+
108
+ self.set_dist(probabilities)
109
+
110
+ def __repr__(self) -> str:
111
+ return f"Zipf distribution: (p={self.p}, truncation{self.truncation})"
112
+
113
+ def is_newick(tree: str):
114
+ # from: https://github.com/ila/Newick-validator/blob/master/Newick_Validator.py
115
+ # dividing the string into tokens, to check them singularly
116
+ tokens = split(r'([A-Za-z]+[^A-Za-z,)]+[A-Za-z]+|[0-9.]*[A-Za-z]+[0-9.]+|[0-9.]+\s+[0-9.]+|[0-9.]+|[A-za-z]+|\(|\)|;|:|,)', tree)
117
+
118
+ # removing spaces and empty strings (spaces within labels are still present)
119
+ parsed_tokens = list(filter(lambda x: not (x.isspace() or not x), tokens))
120
+
121
+ # checking whether the tree ends with ;
122
+ if parsed_tokens[-1] != ';':
123
+ raise ValueError(f"Tree without ; at the end. Tree received: {tree}")
124
+ return False
125
+ return True
126
+
127
+ # TODO, I think should be deleted
128
+ class Block:
129
+ '''
130
+ A single block of event.
131
+ Used to add insertions or deletions.
132
+ '''
133
+ def __init__(self, num1: int, num2: int):
134
+ self.block = _Sailfish.Block(num1, num2)
135
+
136
+ class BlockTree:
137
+ '''
138
+ Used to contain the events on a multiple branches (entire tree).
139
+ '''
140
+ def __init__(self, root_length: int):
141
+ self.blockTree = _Sailfish.BlockTree(root_length)
142
+
143
+ def print_tree(self) -> str:
144
+ return self.blockTree.print_tree()
145
+
146
+ def block_list(self) -> List:
147
+ return self.blockTree.block_list()
148
+
149
+ # TODO delete one of this (I think the above if not used)
150
+ class BlockTreePython:
151
+ '''
152
+ Used to contain the events on a multiple branches (entire tree).
153
+ '''
154
+ def __init__(self, branch_block_dict: Dict[str, _Sailfish.Block]):
155
+ self._branch_block_dict = branch_block_dict
156
+ # dictionary of {str: List of blocks}
157
+ self._branch_block_dict_python = {i: x for i, x in branch_block_dict.items()}
158
+
159
+ def _get_Sailfish_blocks(self) -> Dict[str, _Sailfish.Block]:
160
+ return self._branch_block_dict
161
+
162
+ def get_branches_str(self) -> str:
163
+ return {i: self._branch_block_dict[i].print_tree() for i in list(self._branch_block_dict.keys())}
164
+
165
+ def get_specific_branch(self, branch: str) -> str:
166
+ if not branch in self._branch_block_dict_python:
167
+ raise ValueError(f"branch not in the _branch_block, aviable branches are: {list(self._branch_block_dict_python.keys())}")
168
+ return self._branch_block_dict[branch].print_tree()
169
+
170
+ def print_branches(self) -> str:
171
+ for i in list(self._branch_block_dict.keys()):
172
+ print(f"branch = {i}")
173
+ print(self._branch_block_dict[i].print_tree())
174
+
175
+ def block_list(self) -> List:
176
+ if not branch in self._branch_block_dict_python:
177
+ raise ValueError(f"branch not in the _branch_block, aviable branches are: {list(self._branch_block_dict_python.keys())}")
178
+ return self._branch_block_dict_python[branch]
179
+
180
+ class Tree:
181
+ '''
182
+ The tree class for the simulator
183
+ '''
184
+ def __init__(self, input_str: str):
185
+ is_from_file = False
186
+ if os.path.isfile(input_str):
187
+ is_from_file = True
188
+ tree_str = open(input_str, 'r').read()
189
+ else:
190
+ tree_str = input_str
191
+ if not is_newick(tree_str):
192
+ if is_from_file:
193
+ raise ValueError(f"Failed to read tree from file. File path: {input_str}, content: {tree_str}")
194
+ else:
195
+ raise ValueError(f"Failed construct tree from string. String received: {tree_str}")
196
+ self._tree = _Sailfish.Tree(input_str, is_from_file)
197
+ self._tree_str = tree_str
198
+
199
+ def get_num_nodes(self) -> int:
200
+ return self._tree.num_nodes
201
+
202
+ def get_num_leaves(self) -> int:
203
+ return self._tree.root.num_leaves
204
+
205
+ def _get_Sailfish_tree(self) -> _Sailfish.Tree:
206
+ return self._tree
207
+
208
+ def __repr__(self) -> str:
209
+ return f"{self._tree_str}"
210
+
211
+ class SimProtocol:
212
+ '''
213
+ The simulator protocol, sets the different distribution, tree and root length.
214
+ '''
215
+ def __init__(self, tree = None,
216
+ root_seq_size: int = 100,
217
+ deletion_rate: float = 0.0,
218
+ insertion_rate: float = 0.0,
219
+ deletion_dist: Distribution = ZipfDistribution(1.7, 50),
220
+ insertion_dist: Distribution = ZipfDistribution(1.7, 50),
221
+ minimum_seq_size: int = 100,
222
+ seed: int = 0,
223
+ ):
224
+ if isinstance(tree, Tree):
225
+ self._tree = tree
226
+ elif isinstance(tree, str):
227
+ self._tree = Tree(tree)
228
+ else:
229
+ raise ValueError(f"please provide one of the following: (1) a newick format of a tree; (2) a path to a file containing a tree; (3) or a tree created by the Tree class")
230
+
231
+ self._num_branches = self._tree.get_num_nodes() - 1
232
+ self._sim = _Sailfish.SimProtocol(self._tree._get_Sailfish_tree())
233
+ self.set_seed(seed)
234
+ self.set_sequence_size(root_seq_size)
235
+ self._is_deletion_rate_zero = not deletion_rate
236
+ self._is_insertion_rate_zero = not insertion_rate
237
+ self.set_deletion_rates(deletion_rate=deletion_rate)
238
+ self.set_insertion_rates(insertion_rate=insertion_rate)
239
+ self.set_deletion_length_distributions(deletion_dist=deletion_dist)
240
+ self.set_insertion_length_distributions(insertion_dist=insertion_dist)
241
+ self.set_min_sequence_size(min_sequence_size=minimum_seq_size)
242
+
243
+ def get_tree(self) -> Tree:
244
+ return self._tree
245
+
246
+ def _get_Sailfish_tree(self) -> _Sailfish.Tree:
247
+ return self._tree._get_Sailfish_tree()
248
+
249
+ def _get_root(self):
250
+ return self._tree._get_Sailfish_tree().root
251
+
252
+ def get_num_branches(self) -> int:
253
+ return self._num_branches
254
+
255
+ def set_seed(self, seed: int) -> None:
256
+ self._seed = seed
257
+ self._sim.set_seed(seed)
258
+
259
+ def get_seed(self) -> int:
260
+ return self._seed
261
+
262
+ def set_sequence_size(self, sequence_size: int) -> None:
263
+ self._sim.set_sequence_size(sequence_size)
264
+ self._root_seq_size = sequence_size
265
+
266
+ def get_sequence_size(self) -> int:
267
+ return self._root_seq_size
268
+
269
+ def set_min_sequence_size(self, min_sequence_size: int) -> None:
270
+ self._sim.set_minimum_sequence_size(min_sequence_size)
271
+ self._min_seq_size = min_sequence_size
272
+
273
+
274
+ def set_insertion_rates(self, insertion_rate: Optional[float] = None, insertion_rates: Optional[List[float]] = None) -> None:
275
+ if insertion_rate is not None:
276
+ self.insertion_rates = [insertion_rate] * self._num_branches
277
+ if insertion_rate:
278
+ self._is_insertion_rate_zero = False
279
+ elif insertion_rates:
280
+ if not len(insertion_rates) == self._num_branches:
281
+ raise ValueError(f"The length of the insertaion rates should be equal to the number of branches in the tree. The insertion_rates length is {len(insertion_rates)} and the number of branches is {self._num_branches}. You can pass a single value as insertion_rate which will be used for all branches.")
282
+ self.insertion_rates = insertion_rates
283
+ for insertion_rate in insertion_rates:
284
+ if insertion_rate:
285
+ self._is_insertion_rate_zero = False
286
+ else:
287
+ raise ValueError(f"please provide one of the following: insertion_rate (a single value used for all branches), or a insertion_rates (a list of values, each corresponding to a different branch)")
288
+
289
+ self._sim.set_insertion_rates(self.insertion_rates)
290
+
291
+ def get_insertion_rate(self, branch_num: int) -> float:
292
+ if branch_num >= self._num_branches:
293
+ raise ValueError(f"The branch number should be between 0 to {self._num_branches} (not included). Received value of {branch_num}")
294
+ return self._sim.get_insertion_rate(branch_num)
295
+
296
+ def get_all_insertion_rates(self) -> Dict:
297
+ return {i: self.get_insertion_rate(i) for i in range(self._num_branches)}
298
+
299
+ def set_deletion_rates(self, deletion_rate: Optional[float] = None, deletion_rates: Optional[List[float]] = None) -> None:
300
+ if deletion_rate is not None:
301
+ self.deletion_rates = [deletion_rate] * self._num_branches
302
+ if deletion_rate:
303
+ self._is_deletion_rate_zero = False
304
+ elif deletion_rates:
305
+ if not len(deletion_rates) == self._num_branches:
306
+ raise ValueError(f"The length of the deletion rates should be equal to the number of branches in the tree. The deletion_rates length is {len(deletion_rates)} and the number of branches is {self._num_branches}. You can pass a single value as deletion_rate which will be used for all branches.")
307
+ self.deletion_rates = deletion_rates
308
+ for deletion_rate in deletion_rates:
309
+ if deletion_rate:
310
+ self._is_deletion_rate_zero = False
311
+ else:
312
+ raise ValueError(f"please provide one of the following: deletion_rate (a single value used for all branches), or a deletion_rates (a list of values, each corresponding to a different branch)")
313
+
314
+ self._sim.set_deletion_rates(self.deletion_rates)
315
+
316
+ def get_deletion_rate(self, branch_num: int) -> float:
317
+ if branch_num >= self._num_branches:
318
+ raise ValueError(f"The branch number should be between 0 to {self._num_branches} (not included). Received value of {branch_num}")
319
+ return self._sim.get_deletion_rate(branch_num)
320
+
321
+ def get_all_deletion_rates(self) -> Dict:
322
+ return {i: self.get_deletion_rate(i) for i in range(self._num_branches)}
323
+
324
+ def set_insertion_length_distributions(self, insertion_dist: Optional[Distribution] = None, insertion_dists: Optional[List[Distribution]] = None) -> None:
325
+ if insertion_dist:
326
+ self.insertion_dists = [insertion_dist] * self._num_branches
327
+ elif insertion_dists:
328
+ if not len(insertion_dists) == self._num_branches:
329
+ raise ValueError(f"The length of the insertion dists should be equal to the number of branches in the tree. The insertion_dists length is {len(insertion_dists)} and the number of branches is {self._num_branches}. You can pass a single value as insertion_dist which will be used for all branches.")
330
+ self.insertion_dists = insertion_dists
331
+ else:
332
+ raise ValueError(f"please provide one of the following: deletion_rate (a single value used for all branches), or a deletion_rates (a list of values, each corresponding to a different branch)")
333
+
334
+ self._sim.set_insertion_length_distributions([dist._get_Sailfish_dist() for dist in self.insertion_dists])
335
+
336
+ def get_insertion_length_distribution(self, branch_num: int) -> Distribution:
337
+ if branch_num >= self._num_branches:
338
+ raise ValueError(f"The branch number should be between 0 to {self._num_branches} (not included). Received value of {branch_num}")
339
+ return self.insertion_dists[branch_num]
340
+
341
+ def get_all_insertion_length_distribution(self) -> Dict:
342
+ return {i: self.get_insertion_length_distribution(i) for i in range(self._num_branches)}
343
+
344
+ def set_deletion_length_distributions(self, deletion_dist: Optional[Distribution] = None, deletion_dists: Optional[List[Distribution]] = None) -> None:
345
+ if deletion_dist:
346
+ self.deletion_dists = [deletion_dist] * self._num_branches
347
+ elif deletion_dists:
348
+ if not len(deletion_dists) == self._num_branches:
349
+ raise ValueError(f"The length of the deletion dists should be equal to the number of branches in the tree. The deletion_dists length is {len(deletion_dists)} and the number of branches is {self._num_branches}. You can pass a single value as deletion_dist which will be used for all branches.")
350
+ self.deletion_dists = deletion_dists
351
+ else:
352
+ raise ValueError(f"please provide one of the following: deletion_rate (a single value used for all branches), or a deletion_rates (a list of values, each corresponding to a different branch)")
353
+
354
+ self._sim.set_deletion_length_distributions([dist._get_Sailfish_dist() for dist in self.deletion_dists])
355
+
356
+ def get_deletion_length_distribution(self, branch_num: int) -> Distribution:
357
+ if branch_num >= self._num_branches:
358
+ raise ValueError(f"The branch number should be between 0 to {self._num_branches} (not included). Received value of {branch_num}")
359
+ return self.deletion_dists[branch_num]
360
+
361
+ def get_all_deletion_length_distribution(self) -> Dict:
362
+ return {i: self.get_deletion_length_distribution(i) for i in range(self._num_branches)}
363
+
364
+ class Msa:
365
+ '''
366
+ The MSA class from the simulator
367
+ '''
368
+ def __init__(self, species_dict: Dict[str, BlockTree], root_node, save_list: List[bool]):
369
+ self._msa = _Sailfish.Msa(species_dict, root_node, save_list)
370
+
371
+ def generate_msas(self, node):
372
+ self._msa.generate_msas(node)
373
+
374
+ def get_length(self) -> int:
375
+ return self._msa.length()
376
+
377
+ def get_num_sequences(self) -> int:
378
+ return self._msa.num_sequences()
379
+
380
+ def fill_substitutions(self, sequenceContainer) -> None:
381
+ self._msa.fill_substitutions(sequenceContainer)
382
+
383
+ def print_msa(self) -> str:
384
+ return self._msa.print_msa()
385
+
386
+ def print_indels(self) -> str:
387
+ return self._msa.print_indels()
388
+
389
+ def get_msa(self) -> str:
390
+ return self._msa.get_msa_string()
391
+
392
+ def write_msa(self, file_path) -> None:
393
+ self._msa.write_msa(file_path)
394
+
395
+ #def __repr__(self) -> str:
396
+ # return f"{self.get_msa()}"
397
+
398
+ class Simulator:
399
+ '''
400
+ Simulate MSAs based on SimProtocol
401
+ '''
402
+ def __init__(self, simProtocol: Optional[SimProtocol] = None, simulation_type: Optional[SIMULATION_TYPE] = None):
403
+ if not simProtocol:
404
+ warnings.warn(f"initalized a simulator without simProtocol -> using a default protocol with Tree = '(A:0.01,B:0.5,C:0.03);' and root length of 100")
405
+ # default simulation values
406
+ possion = PoissonDistribution(10, 100)
407
+ simProtocol = SimProtocol(tree="(A:0.01,B:0.5,C:0.03);")
408
+ simProtocol.set_insertion_length_distributions(possion)
409
+ simProtocol.set_deletion_length_distributions(possion)
410
+ simProtocol.set_insertion_rates(0.05)
411
+ simProtocol.set_deletion_rates(0.05)
412
+ simProtocol.set_sequence_size(100)
413
+ simProtocol.set_min_sequence_size(1)
414
+
415
+ # verify sim_protocol
416
+ if self._verify_sim_protocol(simProtocol):
417
+ self._simProtocol = simProtocol
418
+ self._simulator = _Sailfish.Simulator(self._simProtocol._sim)
419
+ else:
420
+ raise ValueError(f"failed to verify simProtocol")
421
+
422
+ if not simulation_type:
423
+ warnings.warn(f"simulation type not provided -> running indel only simulation")
424
+ simulation_type = SIMULATION_TYPE.NOSUBS
425
+
426
+ if simulation_type == SIMULATION_TYPE.PROTEIN:
427
+ self._alphabet = _Sailfish.alphabetCode.AMINOACID
428
+ elif simulation_type == SIMULATION_TYPE.DNA:
429
+ self._alphabet = _Sailfish.alphabetCode.NUCLEOTIDE
430
+ elif simulation_type == SIMULATION_TYPE.NOSUBS:
431
+ self._alphabet = _Sailfish.alphabetCode.NULLCODE
432
+ else:
433
+ raise ValueError(f"unknown simulation type, please provde one of the following: {[e.name for e in SIMULATION_TYPE]}")
434
+
435
+ self._simulation_type = simulation_type
436
+ self._is_sub_model_init = False
437
+
438
+ def _verify_sim_protocol(self, simProtocol) -> bool:
439
+ if not simProtocol.get_tree():
440
+ raise ValueError(f"protocol miss tree, please provide when initalizing the simProtocol")
441
+ if not simProtocol.get_sequence_size() or simProtocol.get_sequence_size() == 0:
442
+ raise ValueError(f"protocol miss root length, please provide -> simProtocol.set_sequence_size(int)")
443
+ if not simProtocol.get_insertion_length_distribution(0):
444
+ raise ValueError(f"protocol miss insertion length distribution, please provide -> simProtocol.set_insertion_length_distributions(float)")
445
+ if not simProtocol.get_deletion_length_distribution(0):
446
+ raise ValueError(f"protocol miss deletion length distribution, please provide -> simProtocol.set_deletion_length_distributions(float)")
447
+ if simProtocol.get_insertion_rate(0) < 0:
448
+ raise ValueError(f"please provide a non zero value for insertion rate, provided value of: {simProtocol.get_insertion_rate(0)} -> simProtocol.set_insertion_rate(float)")
449
+ if simProtocol.get_deletion_rate(0) < 0:
450
+ raise ValueError(f"please provide a non zero value for deletion rate, provided value of: {simProtocol.get_deletion_rate(0)} -> simProtocol.set_deletion_rate(float)")
451
+ return True
452
+
453
+ def reset_sim(self):
454
+ # TODO, complete
455
+ pass
456
+
457
+ def _init_sub_model(self) -> None:
458
+ self._model_factory = _Sailfish.modelFactory(self._simProtocol._get_Sailfish_tree())
459
+ self._model_factory.set_alphabet(self._alphabet)
460
+ if self._simulation_type == SIMULATION_TYPE.PROTEIN:
461
+ warnings.warn(f"replacement matrix not provided -> running with default parameters: WAG model")
462
+ self._model_factory.set_replacement_model(_Sailfish.modelCode.WAG)
463
+ else:
464
+ warnings.warn(f"replacement matrix not provided -> running with default parameters: JC model")
465
+ self._model_factory.set_replacement_model(_Sailfish.modelCode.NUCJC)
466
+ self._model_factory.set_gamma_parameters(1.0, 1)
467
+
468
+ self._simulator.init_substitution_sim(self._model_factory)
469
+ self._is_sub_model_init = True
470
+
471
+ def set_replacement_model(
472
+ self,
473
+ model: _Sailfish.modelCode,
474
+ amino_model_file: pathlib.Path = None,
475
+ model_parameters: List = None,
476
+ gamma_parameters_alpha : float = 1.0,
477
+ gamma_parameters_categories: int = 1,
478
+ invariant_sites_proportion: float = 0.0
479
+ ) -> None:
480
+ if not model:
481
+ raise ValueError(f"please provide a substitution model from the the following list: {_Sailfish.modelCode}")
482
+ if int(gamma_parameters_categories) != gamma_parameters_categories:
483
+ raise ValueError(f"gamma_parameters_catergories has to be a positive int value: received value of {gamma_parameters_categories}")
484
+ self._model_factory = _Sailfish.modelFactory(self._simProtocol._get_Sailfish_tree())
485
+
486
+ self._model_factory.set_alphabet(self._alphabet)
487
+ if self._simulation_type == SIMULATION_TYPE.PROTEIN:
488
+ if model_parameters:
489
+ raise ValueError(f"no model parameters are used in protein, recevied value of: {model_parameters}")
490
+ self._model_factory.set_replacement_model(model)
491
+ if model == MODEL_CODES.CUSTOM and amino_model_file:
492
+ self._model_factory.set_amino_replacement_model_file(str(amino_model_file))
493
+ else:
494
+ if model == MODEL_CODES.NUCJC and model_parameters:
495
+ raise ValueError(f"no model parameters in JC model, recevied value of: {model_parameters}")
496
+ self._model_factory.set_replacement_model(model)
497
+ if model == MODEL_CODES.NUCJC and not model_parameters:
498
+ pass
499
+ elif not model_parameters:
500
+ raise ValueError(f"please provide a model parameters")
501
+ else:
502
+ self._model_factory.set_model_parameters(model_parameters)
503
+
504
+ self._model_factory.set_gamma_parameters(gamma_parameters_alpha, gamma_parameters_categories)
505
+ self._model_factory.set_invariant_sites_proportion(invariant_sites_proportion)
506
+ self._simulator.init_substitution_sim(self._model_factory)
507
+
508
+ self._is_sub_model_init = True
509
+
510
+ def gen_indels(self) -> BlockTreePython:
511
+ return BlockTreePython(self._simulator.gen_indels())
512
+
513
+ def get_sequences_to_save(self) -> List[bool]:
514
+ return self._simulator.get_saved_nodes_mask()
515
+
516
+ def save_root_sequence(self):
517
+ self._simulator.save_root_sequence()
518
+
519
+ def save_all_nodes_sequences(self):
520
+ self._simulator.save_all_nodes_sequences()
521
+
522
+ def gen_substitutions(self, length: int):
523
+ if not self._is_sub_model_init:
524
+ self._init_sub_model()
525
+ return self._simulator.gen_substitutions(length)
526
+
527
+ # @profile
528
+ def simulate(self, times: int = 1) -> List[Msa]:
529
+ Msas = []
530
+ for _ in range(times):
531
+ if self._simProtocol._is_insertion_rate_zero and self._simProtocol._is_deletion_rate_zero:
532
+ msa = Msa(sum(self.get_sequences_to_save()),
533
+ self._simProtocol.get_sequence_size(),
534
+ self.get_sequences_to_save())
535
+ else:
536
+ blocktree = self.gen_indels()
537
+ msa = Msa(blocktree._get_Sailfish_blocks(),
538
+ self._simProtocol._get_root(),
539
+ self.get_sequences_to_save())
540
+
541
+ # sim.init_substitution_sim(mFac)
542
+ if self._simulation_type != SIMULATION_TYPE.NOSUBS:
543
+ substitutions = self.gen_substitutions(msa.get_length())
544
+ msa.fill_substitutions(substitutions)
545
+
546
+ Msas.append(msa)
547
+ return Msas
548
+
549
+ def simulate_low_memory(self, output_file_path: pathlib.Path) -> Msa:
550
+ if self._simProtocol._is_insertion_rate_zero and self._simProtocol._is_deletion_rate_zero:
551
+ msa = Msa(sum(self.get_sequences_to_save()),
552
+ self._simProtocol.get_sequence_size(),
553
+ self.get_sequences_to_save())
554
+ else:
555
+ blocktree = self.gen_indels()
556
+ msa = Msa(blocktree._get_Sailfish_blocks(),
557
+ self._simProtocol._get_root(),
558
+ self.get_sequences_to_save())
559
+
560
+ # sim.init_substitution_sim(mFac)
561
+ if self._simulation_type != SIMULATION_TYPE.NOSUBS:
562
+ with tempfile.TemporaryDirectory() as tmpdirname:
563
+ self._simulator.gen_substitutions_to_dir(msa.get_length(), tmpdirname)
564
+ msa._msa.set_substitutions_folder(tmpdirname)
565
+ msa._msa.write_msa_from_dir(str(output_file_path))
566
+
567
+
568
+
569
+ def __call__(self) -> Msa:
570
+ return self.simulate(1)[0]
571
+
572
+ def save_rates(self, is_save: bool) -> None:
573
+ self._simulator.save_site_rates(is_save)
574
+
575
+ def get_rates(self) -> List[float]:
576
+ return self._simulator.get_site_rates()
@@ -0,0 +1,63 @@
1
+ Metadata-Version: 2.4
2
+ Name: msasim
3
+ Version: 25.5.3
4
+ Summary: A fast MSA simulator
5
+ Home-page: https://github.com/elyawy/Sailfish-backend
6
+ Author: Elya Wygoda
7
+ Author-email: elya.wygoda@gmail.com
8
+ Requires-Python: >=3.6
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Provides-Extra: test
12
+ Requires-Dist: pytest; extra == "test"
13
+ Dynamic: author
14
+ Dynamic: author-email
15
+ Dynamic: description
16
+ Dynamic: description-content-type
17
+ Dynamic: home-page
18
+ Dynamic: license-file
19
+ Dynamic: provides-extra
20
+ Dynamic: requires-python
21
+ Dynamic: summary
22
+
23
+ # Sailfish
24
+
25
+ Sailfish is a performant multiple sequence alignment(MSA) simulator, written in C++ and Python, allowing for quick and easy generation of large simulated datasets.
26
+
27
+ ## Project goals
28
+
29
+ - Ease of use
30
+ - Speed
31
+ - Modularity
32
+
33
+ ## Installation
34
+
35
+ ```bash
36
+ pip install msasim
37
+ ```
38
+
39
+ ## Example
40
+
41
+ ```python
42
+ from msasim import sailfish as sim
43
+ from msasim.sailfish import MODEL_CODES, ZipfDistribution
44
+
45
+ ROOT_SEQUENCE_LENGTH = 100
46
+
47
+ sim_protocol = sim.SimProtocol("(A:0.5,B:0.5);",
48
+ deletion_rate=0.01,
49
+ insertion_rate=0.01,
50
+ deletion_dist=ZipfDistribution(1.08, 50),
51
+ insertion_dist=ZipfDistribution(1.08, 50),
52
+ seed=50)
53
+ sim_protocol.set_sequence_size(ROOT_SEQUENCE_LENGTH)
54
+
55
+ simulation = sim.Simulator(sim_protocol, simulation_type=sim.SIMULATION_TYPE.PROTEIN)
56
+
57
+ simulation.set_replacement_model(model=MODEL_CODES.WAG,
58
+ gamma_parameters_alpha=1.0,
59
+ gamma_parameters_catergories=4)
60
+ msa = simulation()
61
+ msa.print_msa()
62
+
63
+ ```
@@ -0,0 +1,10 @@
1
+ _Sailfish.cpython-313-darwin.so,sha256=OHjaTECS2_-064t5U39QksVjp4Q2D-1PkqlqRRQQwTg,1083472
2
+ msasim-25.5.3.dist-info/RECORD,,
3
+ msasim-25.5.3.dist-info/WHEEL,sha256=Cxq5pla4scSj9raD9htoFbIhfTk0lSQlQLVCyAjgIi4,136
4
+ msasim-25.5.3.dist-info/top_level.txt,sha256=NS1ILx5V94Yyh_M7yDrrqbBPu1TL_zJuxhsI90YEJVY,17
5
+ msasim-25.5.3.dist-info/METADATA,sha256=8QKpevdyyqP12_yZKu7gh4ZsEl50fFadC2hwqifhbWs,1666
6
+ msasim-25.5.3.dist-info/licenses/LICENSE,sha256=p1Aa_mM2Nu6dG3XqMVU62Jhf04lNOwtXUrhhvhcDips,10312
7
+ msasim/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ msasim/sailfish.py,sha256=smYUY3vr5LomFQgACskkVy8lZkedGwahz6PEBbvpeSU,26249
9
+ _Sailfish/__init__.pyi,sha256=g1apAVKIwxit-nAJ1B9WXADEkpFqRTR8wmLvFN39ZME,15178
10
+ _Sailfish/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -0,0 +1,6 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (78.1.0)
3
+ Root-Is-Purelib: false
4
+ Tag: cp313-cp313-macosx_11_0_arm64
5
+ Generator: delocate 0.13.0
6
+
@@ -0,0 +1,172 @@
1
+ Academic Free License ("AFL") v. 3.0
2
+
3
+ This Academic Free License (the "License") applies to any original work of
4
+ authorship (the "Original Work") whose owner (the "Licensor") has placed the
5
+ following licensing notice adjacent to the copyright notice for the Original
6
+ Work:
7
+
8
+ Licensed under the Academic Free License version 3.0
9
+
10
+ 1) Grant of Copyright License. Licensor grants You a worldwide, royalty-free,
11
+ non-exclusive, sublicensable license, for the duration of the copyright, to do
12
+ the following:
13
+
14
+ a) to reproduce the Original Work in copies, either alone or as part of a
15
+ collective work;
16
+
17
+ b) to translate, adapt, alter, transform, modify, or arrange the Original
18
+ Work, thereby creating derivative works ("Derivative Works") based upon the
19
+ Original Work;
20
+
21
+ c) to distribute or communicate copies of the Original Work and Derivative
22
+ Works to the public, under any license of your choice that does not
23
+ contradict the terms and conditions, including Licensor's reserved rights
24
+ and remedies, in this Academic Free License;
25
+
26
+ d) to perform the Original Work publicly; and
27
+
28
+ e) to display the Original Work publicly.
29
+
30
+ 2) Grant of Patent License. Licensor grants You a worldwide, royalty-free,
31
+ non-exclusive, sublicensable license, under patent claims owned or controlled
32
+ by the Licensor that are embodied in the Original Work as furnished by the
33
+ Licensor, for the duration of the patents, to make, use, sell, offer for sale,
34
+ have made, and import the Original Work and Derivative Works.
35
+
36
+ 3) Grant of Source Code License. The term "Source Code" means the preferred
37
+ form of the Original Work for making modifications to it and all available
38
+ documentation describing how to modify the Original Work. Licensor agrees to
39
+ provide a machine-readable copy of the Source Code of the Original Work along
40
+ with each copy of the Original Work that Licensor distributes. Licensor
41
+ reserves the right to satisfy this obligation by placing a machine-readable
42
+ copy of the Source Code in an information repository reasonably calculated to
43
+ permit inexpensive and convenient access by You for as long as Licensor
44
+ continues to distribute the Original Work.
45
+
46
+ 4) Exclusions From License Grant. Neither the names of Licensor, nor the names
47
+ of any contributors to the Original Work, nor any of their trademarks or
48
+ service marks, may be used to endorse or promote products derived from this
49
+ Original Work without express prior permission of the Licensor. Except as
50
+ expressly stated herein, nothing in this License grants any license to
51
+ Licensor's trademarks, copyrights, patents, trade secrets or any other
52
+ intellectual property. No patent license is granted to make, use, sell, offer
53
+ for sale, have made, or import embodiments of any patent claims other than the
54
+ licensed claims defined in Section 2. No license is granted to the trademarks
55
+ of Licensor even if such marks are included in the Original Work. Nothing in
56
+ this License shall be interpreted to prohibit Licensor from licensing under
57
+ terms different from this License any Original Work that Licensor otherwise
58
+ would have a right to license.
59
+
60
+ 5) External Deployment. The term "External Deployment" means the use,
61
+ distribution, or communication of the Original Work or Derivative Works in any
62
+ way such that the Original Work or Derivative Works may be used by anyone
63
+ other than You, whether those works are distributed or communicated to those
64
+ persons or made available as an application intended for use over a network.
65
+ As an express condition for the grants of license hereunder, You must treat
66
+ any External Deployment by You of the Original Work or a Derivative Work as a
67
+ distribution under section 1(c).
68
+
69
+ 6) Attribution Rights. You must retain, in the Source Code of any Derivative
70
+ Works that You create, all copyright, patent, or trademark notices from the
71
+ Source Code of the Original Work, as well as any notices of licensing and any
72
+ descriptive text identified therein as an "Attribution Notice." You must cause
73
+ the Source Code for any Derivative Works that You create to carry a prominent
74
+ Attribution Notice reasonably calculated to inform recipients that You have
75
+ modified the Original Work.
76
+
77
+ 7) Warranty of Provenance and Disclaimer of Warranty. Licensor warrants that
78
+ the copyright in and to the Original Work and the patent rights granted herein
79
+ by Licensor are owned by the Licensor or are sublicensed to You under the
80
+ terms of this License with the permission of the contributor(s) of those
81
+ copyrights and patent rights. Except as expressly stated in the immediately
82
+ preceding sentence, the Original Work is provided under this License on an "AS
83
+ IS" BASIS and WITHOUT WARRANTY, either express or implied, including, without
84
+ limitation, the warranties of non-infringement, merchantability or fitness for
85
+ a particular purpose. THE ENTIRE RISK AS TO THE QUALITY OF THE ORIGINAL WORK
86
+ IS WITH YOU. This DISCLAIMER OF WARRANTY constitutes an essential part of this
87
+ License. No license to the Original Work is granted by this License except
88
+ under this disclaimer.
89
+
90
+ 8) Limitation of Liability. Under no circumstances and under no legal theory,
91
+ whether in tort (including negligence), contract, or otherwise, shall the
92
+ Licensor be liable to anyone for any indirect, special, incidental, or
93
+ consequential damages of any character arising as a result of this License or
94
+ the use of the Original Work including, without limitation, damages for loss
95
+ of goodwill, work stoppage, computer failure or malfunction, or any and all
96
+ other commercial damages or losses. This limitation of liability shall not
97
+ apply to the extent applicable law prohibits such limitation.
98
+
99
+ 9) Acceptance and Termination. If, at any time, You expressly assented to this
100
+ License, that assent indicates your clear and irrevocable acceptance of this
101
+ License and all of its terms and conditions. If You distribute or communicate
102
+ copies of the Original Work or a Derivative Work, You must make a reasonable
103
+ effort under the circumstances to obtain the express assent of recipients to
104
+ the terms of this License. This License conditions your rights to undertake
105
+ the activities listed in Section 1, including your right to create Derivative
106
+ Works based upon the Original Work, and doing so without honoring these terms
107
+ and conditions is prohibited by copyright law and international treaty.
108
+ Nothing in this License is intended to affect copyright exceptions and
109
+ limitations (including "fair use" or "fair dealing"). This License shall
110
+ terminate immediately and You may no longer exercise any of the rights granted
111
+ to You by this License upon your failure to honor the conditions in Section
112
+ 1(c).
113
+
114
+ 10) Termination for Patent Action. This License shall terminate automatically
115
+ and You may no longer exercise any of the rights granted to You by this
116
+ License as of the date You commence an action, including a cross-claim or
117
+ counterclaim, against Licensor or any licensee alleging that the Original Work
118
+ infringes a patent. This termination provision shall not apply for an action
119
+ alleging patent infringement by combinations of the Original Work with other
120
+ software or hardware.
121
+
122
+ 11) Jurisdiction, Venue and Governing Law. Any action or suit relating to this
123
+ License may be brought only in the courts of a jurisdiction wherein the
124
+ Licensor resides or in which Licensor conducts its primary business, and under
125
+ the laws of that jurisdiction excluding its conflict-of-law provisions. The
126
+ application of the United Nations Convention on Contracts for the
127
+ International Sale of Goods is expressly excluded. Any use of the Original
128
+ Work outside the scope of this License or after its termination shall be
129
+ subject to the requirements and penalties of copyright or patent law in the
130
+ appropriate jurisdiction. This section shall survive the termination of this
131
+ License.
132
+
133
+ 12) Attorneys' Fees. In any action to enforce the terms of this License or
134
+ seeking damages relating thereto, the prevailing party shall be entitled to
135
+ recover its costs and expenses, including, without limitation, reasonable
136
+ attorneys' fees and costs incurred in connection with such action, including
137
+ any appeal of such action. This section shall survive the termination of this
138
+ License.
139
+
140
+ 13) Miscellaneous. If any provision of this License is held to be
141
+ unenforceable, such provision shall be reformed only to the extent necessary
142
+ to make it enforceable.
143
+
144
+ 14) Definition of "You" in This License. "You" throughout this License,
145
+ whether in upper or lower case, means an individual or a legal entity
146
+ exercising rights under, and complying with all of the terms of, this License.
147
+ For legal entities, "You" includes any entity that controls, is controlled by,
148
+ or is under common control with you. For purposes of this definition,
149
+ "control" means (i) the power, direct or indirect, to cause the direction or
150
+ management of such entity, whether by contract or otherwise, or (ii) ownership
151
+ of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial
152
+ ownership of such entity.
153
+
154
+ 15) Right to Use. You may use the Original Work in all ways not otherwise
155
+ restricted or conditioned by this License or by law, and Licensor promises not
156
+ to interfere with or be responsible for such uses by You.
157
+
158
+ 16) Modification of This License. This License is Copyright © 2005 Lawrence
159
+ Rosen. Permission is granted to copy, distribute, or communicate this License
160
+ without modification. Nothing in this License permits You to modify this
161
+ License as applied to the Original Work or to Derivative Works. However, You
162
+ may modify the text of this License and copy, distribute or communicate your
163
+ modified version (the "Modified License") and apply it to other original works
164
+ of authorship subject to the following conditions: (i) You may not indicate in
165
+ any way that your Modified License is the "Academic Free License" or "AFL" and
166
+ you may not use those names in the name of your Modified License; (ii) You
167
+ must replace the notice specified in the first paragraph above with the notice
168
+ "Licensed under <insert your license name here>" or with a notice of your own
169
+ that is not confusingly similar to the notice in this License; and (iii) You
170
+ may not claim that your original works are open source software unless your
171
+ Modified License has been approved by Open Source Initiative (OSI) and You
172
+ comply with its license review and certification process.
@@ -0,0 +1,2 @@
1
+ _Sailfish
2
+ msasim