msasim 2024.5.22__cp36-cp36m-musllinux_1_1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Sailfish/__init__.py ADDED
File without changes
Sailfish/simulator.py ADDED
@@ -0,0 +1,537 @@
1
+ import _Sailfish
2
+ import os, warnings, math, operator, time, profile
3
+ from functools import reduce
4
+ from typing import List, Optional, Dict
5
+ from re import split
6
+ from enum import Enum
7
+
8
+ # print(_Sailfish.Tree)
9
+ # print(_Sailfish.Simulator)
10
+ # print(_Sailfish.SimProtocol)
11
+ # print(_Sailfish.Msa)
12
+ # print(_Sailfish.DiscreteDistribution)
13
+ # print(_Sailfish.modelFactory)
14
+ # print(_Sailfish.alphabetCode)
15
+ # print(_Sailfish.modelCode)
16
+
17
+
18
+ MODEL_CODES = _Sailfish.modelCode
19
+
20
+ class SIMULATION_TYPE(Enum):
21
+ DNA = 1
22
+ PROTEIN = 2
23
+
24
+ class Distribution:
25
+ def set_dist(self, dist):
26
+ # sum should be "around" 1
27
+ epsilon = 10e-6
28
+ if abs(sum(dist)-1) > epsilon:
29
+ raise ValueError(f"Sum of the distribution should be 1 for a valid probability distribution. Input received is: {dist}, sum is {sum(dist)}")
30
+ for x in dist:
31
+ if x < 0 or x > 1:
32
+ raise ValueError(f"Each value of the probabilities should be between 0 to 1. Received a value of {x}")
33
+ self._dist = _Sailfish.DiscreteDistribution(dist)
34
+
35
+ def draw_sample(self) -> int:
36
+ return self._dist.draw_sample()
37
+
38
+ def set_seed(self, seed: int) -> None:
39
+ return self._dist.set_seed(seed)
40
+
41
+ def get_table(self) -> List:
42
+ return self._dist.get_table()
43
+
44
+ def _get_Sailfish_dist(self) -> _Sailfish.DiscreteDistribution:
45
+ return self._dist
46
+
47
+ class CustomDistribution(Distribution):
48
+ '''
49
+ Provide a custom discrete distribution to the model.
50
+ '''
51
+ def __init__(self, dist: List[float]):
52
+ self.set_dist(dist)
53
+
54
+ class GeometricDistribution(Distribution):
55
+ def __init__(self, p: float, truncation: int = 150):
56
+ """
57
+ Calculation of geoemtric moment
58
+ inputs:
59
+ p - p parameter of the geoemtric distribution
60
+ truncation - (optional, by default 150) maximal value of the distribution
61
+ """
62
+ self.p = p
63
+ self.truncation = truncation
64
+ PMF = lambda x: p*(1-p)**(x-1)
65
+ CDF = lambda x: 1-(1-p)**x
66
+ norm_factor = CDF(truncation) - CDF(0)
67
+
68
+ probabilities = [PMF(i)/norm_factor for i in range(1, truncation+1)]
69
+ # probabilities = probabilities / norm_factor
70
+
71
+ self.set_dist(probabilities)
72
+
73
+ def __repr__(self) -> str:
74
+ return f"Geometric distribution: (p={self.p}, truncation{self.truncation})"
75
+
76
+ class PoissonDistribution(Distribution):
77
+ def __init__(self, p: float, truncation: int = 150):
78
+ """
79
+ Calculation of geoemtric moment
80
+ inputs:
81
+ p - p parameter of the geoemtric distribution
82
+ truncation - (optional, by default 150) maximal value of the distribution
83
+ """
84
+ self.p = p
85
+ self.truncation = truncation
86
+
87
+ factorial = lambda z: reduce(operator.mul, [1, 1] if z == 0 else range(1,z+1))
88
+
89
+ PMF = lambda x: ((p**x)*(math.e**-p))*(1.0/factorial(x))
90
+ CDF = lambda x: (math.e**-p)*sum([(p**i)*(1.0/factorial(i)) for i in range(0,x+1)])
91
+
92
+ norm_factor = CDF(truncation) - CDF(0)
93
+
94
+ probabilities = [PMF(i)/norm_factor for i in range(1, truncation+1)]
95
+
96
+ self.set_dist(probabilities)
97
+
98
+ def __repr__(self) -> str:
99
+ return f"Poisson distribution: (p={self.p}, truncation{self.truncation})"
100
+
101
+ class ZipfDistribution(Distribution):
102
+ def __init__(self, p: float, truncation: int = 150):
103
+ """
104
+ Calculation of geoemtric moment
105
+ inputs:
106
+ p - p parameter of the geoemtric distribution
107
+ truncation - (optional, by default 150) maximal value of the distribution
108
+ """
109
+ self.p = p
110
+ self.truncation = truncation
111
+ HARMONIC = lambda n,s: sum([(i**-s) for i in range(1,n+1)])
112
+ PMF = lambda x: (x**-p)*(1.0/HARMONIC(truncation, p))
113
+ CDF = lambda x: HARMONIC(x, p) / HARMONIC(truncation, p)
114
+ norm_factor = CDF(truncation) - CDF(0)
115
+
116
+ probabilities = [PMF(i)/norm_factor for i in range(1, truncation+1)]
117
+
118
+ self.set_dist(probabilities)
119
+
120
+ def __repr__(self) -> str:
121
+ return f"Zipf distribution: (p={self.p}, truncation{self.truncation})"
122
+
123
+ def is_newick(tree: str):
124
+ # from: https://github.com/ila/Newick-validator/blob/master/Newick_Validator.py
125
+ # dividing the string into tokens, to check them singularly
126
+ tokens = split(r'([A-Za-z]+[^A-Za-z,)]+[A-Za-z]+|[0-9.]*[A-Za-z]+[0-9.]+|[0-9.]+\s+[0-9.]+|[0-9.]+|[A-za-z]+|\(|\)|;|:|,)', tree)
127
+
128
+ # removing spaces and empty strings (spaces within labels are still present)
129
+ parsed_tokens = list(filter(lambda x: not (x.isspace() or not x), tokens))
130
+
131
+ # checking whether the tree ends with ;
132
+ if parsed_tokens[-1] != ';':
133
+ raise ValueError(f"Tree without ; at the end. Tree received: {tree}")
134
+ return False
135
+ return True
136
+
137
+ # TODO, I think should be deleted
138
+ class Block:
139
+ '''
140
+ A single block of event.
141
+ Used to add insertions or deletions.
142
+ '''
143
+ def __init__(self, num1: int, num2: int):
144
+ self.block = _Sailfish.Block(num1, num2)
145
+
146
+ class BlockTree:
147
+ '''
148
+ Used to contain the events on a multiple branches (entire tree).
149
+ '''
150
+ def __init__(self, root_length: int):
151
+ self.blockTree = _Sailfish.BlockTree(root_length)
152
+
153
+ def print_tree(self) -> str:
154
+ return self.blockTree.print_tree()
155
+
156
+ def block_list(self) -> List:
157
+ return self.blockTree.block_list()
158
+
159
+ # TODO delete one of this (I think the above if not used)
160
+ class BlockTreePython:
161
+ '''
162
+ Used to contain the events on a multiple branches (entire tree).
163
+ '''
164
+ def __init__(self, branch_block_dict: Dict[str, _Sailfish.Block]):
165
+ self._branch_block_dict = branch_block_dict
166
+ # dictionary of {str: List of blocks}
167
+ self._branch_block_dict_python = {i: x for i, x in branch_block_dict.items()}
168
+
169
+ def _get_Sailfish_blocks(self) -> Dict[str, _Sailfish.Block]:
170
+ return self._branch_block_dict
171
+
172
+ def get_branches_str(self) -> str:
173
+ return {i: self._branch_block_dict[i].print_tree() for i in list(self._branch_block_dict.keys())}
174
+
175
+ def get_specific_branch(self, branch: str) -> str:
176
+ if not branch in self._branch_block_dict_python:
177
+ raise ValueError(f"branch not in the _branch_block, aviable branches are: {list(self._branch_block_dict_python.keys())}")
178
+ return self._branch_block_dict[branch].print_tree()
179
+
180
+ def print_branches(self) -> str:
181
+ for i in list(self._branch_block_dict.keys()):
182
+ print(f"branch = {i}")
183
+ print(self._branch_block_dict[i].print_tree())
184
+
185
+ def block_list(self) -> List:
186
+ if not branch in self._branch_block_dict_python:
187
+ raise ValueError(f"branch not in the _branch_block, aviable branches are: {list(self._branch_block_dict_python.keys())}")
188
+ return self._branch_block_dict_python[branch]
189
+
190
+ class Tree:
191
+ '''
192
+ The tree class for the simulator
193
+ '''
194
+ def __init__(self, input_str: str):
195
+ is_from_file = False
196
+ if os.path.isfile(input_str):
197
+ is_from_file = True
198
+ tree_str = open(input_str, 'r').read()
199
+ else:
200
+ tree_str = input_str
201
+ if not is_newick(tree_str):
202
+ if is_from_file:
203
+ raise ValueError(f"Failed to read tree from file. File path: {input_str}, content: {tree_str}")
204
+ else:
205
+ raise ValueError(f"Failed construct tree from string. String received: {tree_str}")
206
+ self._tree = _Sailfish.Tree(input_str, is_from_file)
207
+ self._tree_str = tree_str
208
+
209
+ def get_num_nodes(self) -> int:
210
+ return self._tree.num_nodes
211
+
212
+ def get_num_leaves(self) -> int:
213
+ return self._tree.root.num_leaves
214
+
215
+ def _get_Sailfish_tree(self) -> _Sailfish.Tree:
216
+ return self._tree
217
+
218
+ def __repr__(self) -> str:
219
+ return f"{self._tree_str}"
220
+
221
+ class SimProtocol:
222
+ '''
223
+ The simulator protocol, sets the different distribution, tree and root length.
224
+ '''
225
+ def __init__(self, tree = None,
226
+ root_seq_size: int = 100,
227
+ deletion_rate: float = 0.0,
228
+ insertion_rate: float = 0.0,
229
+ deletion_dist: Distribution = ZipfDistribution(1.7, 50),
230
+ insertion_dist: Distribution = ZipfDistribution(1.7, 50),
231
+ seed: int = 0,
232
+ ):
233
+ if isinstance(tree, Tree):
234
+ self._tree = tree
235
+ elif isinstance(tree, str):
236
+ self._tree = Tree(tree)
237
+ else:
238
+ raise ValueError(f"please provide one of the following: (1) a newick format of a tree; (2) a path to a file containing a tree; (3) or a tree created by the Tree class")
239
+
240
+ self._num_branches = self._tree.get_num_nodes() - 1
241
+ self._sim = _Sailfish.SimProtocol(self._tree._get_Sailfish_tree())
242
+ self.set_seed(seed)
243
+ self.set_sequence_size(root_seq_size)
244
+ self._is_deletion_rate_zero = not deletion_rate
245
+ self._is_insertion_rate_zero = not insertion_rate
246
+ self.set_deletion_rates(deletion_rate=deletion_rate)
247
+ self.set_insertion_rates(insertion_rate=insertion_rate)
248
+ self.set_deletion_length_distributions(deletion_dist=deletion_dist)
249
+ self.set_insertion_length_distributions(insertion_dist=insertion_dist)
250
+
251
+ def get_tree(self) -> Tree:
252
+ return self._tree
253
+
254
+ def _get_Sailfish_tree(self) -> _Sailfish.Tree:
255
+ return self._tree._get_Sailfish_tree()
256
+
257
+ def _get_root(self):
258
+ return self._tree._get_Sailfish_tree().root
259
+
260
+ def get_num_branches(self) -> int:
261
+ return self._num_branches
262
+
263
+ def set_seed(self, seed: int) -> None:
264
+ self._seed = seed
265
+ self._sim.set_seed(seed)
266
+
267
+ def get_seed(self) -> int:
268
+ return self._seed
269
+
270
+ def set_sequence_size(self, sequence_size: int) -> None:
271
+ self._sim.set_sequence_size(sequence_size)
272
+ self._root_seq_size = sequence_size
273
+
274
+ def get_sequence_size(self) -> int:
275
+ return self._root_seq_size
276
+
277
+ def set_insertion_rates(self, insertion_rate: Optional[float] = None, insertion_rates: Optional[List[float]] = None) -> None:
278
+ if insertion_rate is not None:
279
+ self.insertion_rates = [insertion_rate] * self._num_branches
280
+ if insertion_rate:
281
+ self._is_insertion_rate_zero = False
282
+ elif insertion_rates:
283
+ if not len(insertion_rates) == self._num_branches:
284
+ raise ValueError(f"The length of the insertaion rates should be equal to the number of branches in the tree. The insertion_rates length is {len(insertion_rates)} and the number of branches is {self._num_branches}. You can pass a single value as insertion_rate which will be used for all branches.")
285
+ self.insertion_rates = insertion_rates
286
+ for insertion_rate in insertion_rates:
287
+ if insertion_rate:
288
+ self._is_insertion_rate_zero = False
289
+ else:
290
+ raise ValueError(f"please provide one of the following: insertion_rate (a single value used for all branches), or a insertion_rates (a list of values, each corresponding to a different branch)")
291
+
292
+ self._sim.set_insertion_rates(self.insertion_rates)
293
+
294
+ def get_insertion_rate(self, branch_num: int) -> float:
295
+ if branch_num >= self._num_branches:
296
+ raise ValueError(f"The branch number should be between 0 to {self._num_branches} (not included). Received value of {branch_num}")
297
+ return self._sim.get_insertion_rate(branch_num)
298
+
299
+ def get_all_insertion_rates(self) -> Dict:
300
+ return {i: self.get_insertion_rate(i) for i in range(self._num_branches)}
301
+
302
+ def set_deletion_rates(self, deletion_rate: Optional[float] = None, deletion_rates: Optional[List[float]] = None) -> None:
303
+ if deletion_rate is not None:
304
+ self.deletion_rates = [deletion_rate] * self._num_branches
305
+ if deletion_rate:
306
+ self._is_deletion_rate_zero = False
307
+ elif deletion_rates:
308
+ if not len(deletion_rates) == self._num_branches:
309
+ raise ValueError(f"The length of the deletion rates should be equal to the number of branches in the tree. The deletion_rates length is {len(deletion_rates)} and the number of branches is {self._num_branches}. You can pass a single value as deletion_rate which will be used for all branches.")
310
+ self.deletion_rates = deletion_rates
311
+ for deletion_rate in deletion_rates:
312
+ if deletion_rate:
313
+ self._is_deletion_rate_zero = False
314
+ else:
315
+ raise ValueError(f"please provide one of the following: deletion_rate (a single value used for all branches), or a deletion_rates (a list of values, each corresponding to a different branch)")
316
+
317
+ self._sim.set_deletion_rates(self.deletion_rates)
318
+
319
+ def get_deletion_rate(self, branch_num: int) -> float:
320
+ if branch_num >= self._num_branches:
321
+ raise ValueError(f"The branch number should be between 0 to {self._num_branches} (not included). Received value of {branch_num}")
322
+ return self._sim.get_deletion_rate(branch_num)
323
+
324
+ def get_all_deletion_rates(self) -> Dict:
325
+ return {i: self.get_deletion_rate(i) for i in range(self._num_branches)}
326
+
327
+ def set_insertion_length_distributions(self, insertion_dist: Optional[Distribution] = None, insertion_dists: Optional[List[Distribution]] = None) -> None:
328
+ if insertion_dist:
329
+ self.insertion_dists = [insertion_dist] * self._num_branches
330
+ elif insertion_dists:
331
+ if not len(insertion_dists) == self._num_branches:
332
+ raise ValueError(f"The length of the insertion dists should be equal to the number of branches in the tree. The insertion_dists length is {len(insertion_dists)} and the number of branches is {self._num_branches}. You can pass a single value as insertion_dist which will be used for all branches.")
333
+ self.insertion_dists = insertion_dists
334
+ else:
335
+ raise ValueError(f"please provide one of the following: deletion_rate (a single value used for all branches), or a deletion_rates (a list of values, each corresponding to a different branch)")
336
+
337
+ self._sim.set_insertion_length_distributions([dist._get_Sailfish_dist() for dist in self.insertion_dists])
338
+
339
+ def get_insertion_length_distribution(self, branch_num: int) -> Distribution:
340
+ if branch_num >= self._num_branches:
341
+ raise ValueError(f"The branch number should be between 0 to {self._num_branches} (not included). Received value of {branch_num}")
342
+ return self.insertion_dists[branch_num]
343
+
344
+ def get_all_insertion_length_distribution(self) -> Dict:
345
+ return {i: self.get_insertion_length_distribution(i) for i in range(self._num_branches)}
346
+
347
+ def set_deletion_length_distributions(self, deletion_dist: Optional[Distribution] = None, deletion_dists: Optional[List[Distribution]] = None) -> None:
348
+ if deletion_dist:
349
+ self.deletion_dists = [deletion_dist] * self._num_branches
350
+ elif deletion_dists:
351
+ if not len(deletion_dists) == self._num_branches:
352
+ raise ValueError(f"The length of the deletion dists should be equal to the number of branches in the tree. The deletion_dists length is {len(deletion_dists)} and the number of branches is {self._num_branches}. You can pass a single value as deletion_dist which will be used for all branches.")
353
+ self.deletion_dists = deletion_dists
354
+ else:
355
+ raise ValueError(f"please provide one of the following: deletion_rate (a single value used for all branches), or a deletion_rates (a list of values, each corresponding to a different branch)")
356
+
357
+ self._sim.set_deletion_length_distributions([dist._get_Sailfish_dist() for dist in self.deletion_dists])
358
+
359
+ def get_deletion_length_distribution(self, branch_num: int) -> Distribution:
360
+ if branch_num >= self._num_branches:
361
+ raise ValueError(f"The branch number should be between 0 to {self._num_branches} (not included). Received value of {branch_num}")
362
+ return self.deletion_dists[branch_num]
363
+
364
+ def get_all_deletion_length_distribution(self) -> Dict:
365
+ return {i: self.get_deletion_length_distribution(i) for i in range(self._num_branches)}
366
+
367
+ class Msa:
368
+ '''
369
+ The MSA class from the simulator
370
+ '''
371
+ def __init__(self, species_dict: Dict[str, BlockTree], root_node):
372
+ self._msa = _Sailfish.Msa(species_dict, root_node)
373
+
374
+ def generate_msas(self, node):
375
+ self._msa.generate_msas(node)
376
+
377
+ def get_length(self) -> int:
378
+ return self._msa.length()
379
+
380
+ def get_num_sequences(self) -> int:
381
+ return self._msa.num_sequences()
382
+
383
+ def fill_substitutions(self, sequenceContainer) -> None:
384
+ self._msa.fill_substitutions(sequenceContainer)
385
+
386
+ def print_msa(self) -> str:
387
+ return self._msa.print_msa()
388
+
389
+ def print_indels(self) -> str:
390
+ return self._msa.print_indels()
391
+
392
+ def get_msa(self) -> str:
393
+ return self._msa.get_msa()
394
+
395
+ def write_msa(self, file_path) -> None:
396
+ self._msa.write_msa(file_path)
397
+
398
+ #def __repr__(self) -> str:
399
+ # return f"{self.get_msa()}"
400
+
401
+ class Simulator:
402
+ '''
403
+ Simulate MSAs based on SimProtocol
404
+ '''
405
+ def __init__(self, simProtocol: Optional[SimProtocol] = None, simulation_type: Optional[SIMULATION_TYPE] = None):
406
+ if not simProtocol:
407
+ warnings.warn(f"initalized a simulator without simProtocol -> using a default protocol with Tree = '(A:0.01,B:0.5,C:0.03);' and root length of 100")
408
+ # default simulation values
409
+ possion = PoissonDistribution(10, 100)
410
+ simProtocol = SimProtocol(tree="(A:0.01,B:0.5,C:0.03);")
411
+ simProtocol.set_insertion_length_distributions(possion)
412
+ simProtocol.set_deletion_length_distributions(possion)
413
+ simProtocol.set_insertion_rates(0.05)
414
+ simProtocol.set_deletion_rates(0.05)
415
+ simProtocol.set_sequence_size(100)
416
+ # verify sim_protocol
417
+ if self._verify_sim_protocol(simProtocol):
418
+ self._simProtocol = simProtocol
419
+ self._simulator = _Sailfish.Simulator(self._simProtocol._sim)
420
+ else:
421
+ raise ValueError(f"failed to verify simProtocol")
422
+
423
+ if not simulation_type:
424
+ warnings.warn(f"simulation type not provided -> running protein simulation")
425
+ simulation_type = SIMULATION_TYPE.PROTEIN
426
+
427
+ if simulation_type == SIMULATION_TYPE.PROTEIN:
428
+ self._alphabet = _Sailfish.alphabetCode.AMINOACID
429
+ elif simulation_type == SIMULATION_TYPE.DNA:
430
+ self._alphabet = _Sailfish.alphabetCode.NUCLEOTIDE
431
+ else:
432
+ raise ValueError(f"unknown simulation type, please provde one of the following: {[e.name for e in SIMULATION_TYPE]}")
433
+
434
+ self._simulation_type = simulation_type
435
+ self._is_sub_model_init = False
436
+
437
+ def _verify_sim_protocol(self, simProtocol) -> bool:
438
+ if not simProtocol.get_tree():
439
+ raise ValueError(f"protocol miss tree, please provide when initalizing the simProtocol")
440
+ if not simProtocol.get_sequence_size() or simProtocol.get_sequence_size() == 0:
441
+ raise ValueError(f"protocol miss root length, please provide -> simProtocol.set_sequence_size(int)")
442
+ if not simProtocol.get_insertion_length_distribution(0):
443
+ raise ValueError(f"protocol miss insertion length distribution, please provide -> simProtocol.set_insertion_length_distributions(float)")
444
+ if not simProtocol.get_deletion_length_distribution(0):
445
+ raise ValueError(f"protocol miss deletion length distribution, please provide -> simProtocol.set_deletion_length_distributions(float)")
446
+ if simProtocol.get_insertion_rate(0) < 0:
447
+ raise ValueError(f"please provide a non zero value for insertion rate, provided value of: {simProtocol.get_insertion_rate(0)} -> simProtocol.set_insertion_rate(float)")
448
+ if simProtocol.get_deletion_rate(0) < 0:
449
+ raise ValueError(f"please provide a non zero value for deletion rate, provided value of: {simProtocol.get_deletion_rate(0)} -> simProtocol.set_deletion_rate(float)")
450
+ return True
451
+
452
+ def reset_sim(self):
453
+ # TODO, complete
454
+ pass
455
+
456
+ def _init_sub_model(self) -> None:
457
+ self._model_factory = _Sailfish.modelFactory(self._simProtocol._get_Sailfish_tree())
458
+ self._model_factory.set_alphabet(self._alphabet)
459
+ if self._simulation_type == SIMULATION_TYPE.PROTEIN:
460
+ warnings.warn(f"replacement matrix not provided -> running with default parameters: WAG model")
461
+ self._model_factory.set_replacement_model(_Sailfish.modelCode.WAG)
462
+ else:
463
+ warnings.warn(f"replacement matrix not provided -> running with default parameters: JC model")
464
+ self._model_factory.set_replacement_model(_Sailfish.modelCode.NUCJC)
465
+ self._model_factory.set_gamma_parameters(1.0, 1)
466
+
467
+ self._simulator.init_substitution_sim(self._model_factory)
468
+ self._is_sub_model_init = True
469
+
470
+ def set_replacement_model(
471
+ self,
472
+ model: _Sailfish.modelCode,
473
+ model_parameters: List = None,
474
+ gamma_parameters_alpha : float = 1.0,
475
+ gamma_parameters_catergories: int = 1
476
+ ) -> None:
477
+ if not model:
478
+ raise ValueError(f"please provide a substitution model from the the following list: {_Sailfish.modelCode}")
479
+ if int(gamma_parameters_catergories) != gamma_parameters_catergories:
480
+ raise ValueError(f"gamma_parameters_catergories has to be a positive int value: received value of {gamma_parameters_catergories}")
481
+ self._model_factory = _Sailfish.modelFactory(self._simProtocol._get_Sailfish_tree())
482
+
483
+ self._model_factory.set_alphabet(self._alphabet)
484
+ if self._simulation_type == SIMULATION_TYPE.PROTEIN:
485
+ if model_parameters:
486
+ raise ValueError(f"no model parameters are used in protein, recevied value of: {model_parameters}")
487
+ self._model_factory.set_replacement_model(model)
488
+ else:
489
+ if model == MODEL_CODES.NUCJC and model_parameters:
490
+ raise ValueError(f"no model parameters in JC model, recevied value of: {model_parameters}")
491
+ self._model_factory.set_replacement_model(model)
492
+ if model == MODEL_CODES.NUCJC and not model_parameters:
493
+ pass
494
+ elif not model_parameters:
495
+ raise ValueError(f"please provide a model parameters")
496
+ else:
497
+ self._model_factory.set_model_parameters(model_parameters)
498
+
499
+ self._model_factory.set_gamma_parameters(gamma_parameters_alpha, gamma_parameters_catergories)
500
+ self._simulator.init_substitution_sim(self._model_factory)
501
+
502
+ self._is_sub_model_init = True
503
+
504
+ def gen_indels(self) -> BlockTreePython:
505
+ return BlockTreePython(self._simulator.gen_indels())
506
+
507
+
508
+ def gen_substitutions(self, length: int):
509
+ if not self._is_sub_model_init:
510
+ self._init_sub_model()
511
+ return self._simulator.gen_substitutions(length)
512
+
513
+ # @profile
514
+ def simulate(self, times: int = 1) -> List[Msa]:
515
+ Msas = []
516
+ for _ in range(times):
517
+ if self._simProtocol._is_insertion_rate_zero and self._simProtocol._is_deletion_rate_zero:
518
+ msa = Msa(self._simProtocol.get_tree().get_num_leaves(), self._simProtocol.get_sequence_size())
519
+ else:
520
+ blocktree = self.gen_indels()
521
+ msa = Msa(blocktree._get_Sailfish_blocks(), self._simProtocol._get_root())
522
+
523
+ # sim.init_substitution_sim(mFac)
524
+ substitutions = self.gen_substitutions(msa.get_length())
525
+ msa.fill_substitutions(substitutions)
526
+
527
+ Msas.append(msa)
528
+ return Msas
529
+
530
+ def __call__(self) -> Msa:
531
+ return self.simulate(1)[0]
532
+
533
+ def save_rates(self, is_save: bool) -> None:
534
+ self._simulator.save_site_rates(is_save)
535
+
536
+ def get_rates(self) -> List[float]:
537
+ return self._simulator.get_site_rates()
@@ -0,0 +1,172 @@
1
+ Academic Free License ("AFL") v. 3.0
2
+
3
+ This Academic Free License (the "License") applies to any original work of
4
+ authorship (the "Original Work") whose owner (the "Licensor") has placed the
5
+ following licensing notice adjacent to the copyright notice for the Original
6
+ Work:
7
+
8
+ Licensed under the Academic Free License version 3.0
9
+
10
+ 1) Grant of Copyright License. Licensor grants You a worldwide, royalty-free,
11
+ non-exclusive, sublicensable license, for the duration of the copyright, to do
12
+ the following:
13
+
14
+ a) to reproduce the Original Work in copies, either alone or as part of a
15
+ collective work;
16
+
17
+ b) to translate, adapt, alter, transform, modify, or arrange the Original
18
+ Work, thereby creating derivative works ("Derivative Works") based upon the
19
+ Original Work;
20
+
21
+ c) to distribute or communicate copies of the Original Work and Derivative
22
+ Works to the public, under any license of your choice that does not
23
+ contradict the terms and conditions, including Licensor's reserved rights
24
+ and remedies, in this Academic Free License;
25
+
26
+ d) to perform the Original Work publicly; and
27
+
28
+ e) to display the Original Work publicly.
29
+
30
+ 2) Grant of Patent License. Licensor grants You a worldwide, royalty-free,
31
+ non-exclusive, sublicensable license, under patent claims owned or controlled
32
+ by the Licensor that are embodied in the Original Work as furnished by the
33
+ Licensor, for the duration of the patents, to make, use, sell, offer for sale,
34
+ have made, and import the Original Work and Derivative Works.
35
+
36
+ 3) Grant of Source Code License. The term "Source Code" means the preferred
37
+ form of the Original Work for making modifications to it and all available
38
+ documentation describing how to modify the Original Work. Licensor agrees to
39
+ provide a machine-readable copy of the Source Code of the Original Work along
40
+ with each copy of the Original Work that Licensor distributes. Licensor
41
+ reserves the right to satisfy this obligation by placing a machine-readable
42
+ copy of the Source Code in an information repository reasonably calculated to
43
+ permit inexpensive and convenient access by You for as long as Licensor
44
+ continues to distribute the Original Work.
45
+
46
+ 4) Exclusions From License Grant. Neither the names of Licensor, nor the names
47
+ of any contributors to the Original Work, nor any of their trademarks or
48
+ service marks, may be used to endorse or promote products derived from this
49
+ Original Work without express prior permission of the Licensor. Except as
50
+ expressly stated herein, nothing in this License grants any license to
51
+ Licensor's trademarks, copyrights, patents, trade secrets or any other
52
+ intellectual property. No patent license is granted to make, use, sell, offer
53
+ for sale, have made, or import embodiments of any patent claims other than the
54
+ licensed claims defined in Section 2. No license is granted to the trademarks
55
+ of Licensor even if such marks are included in the Original Work. Nothing in
56
+ this License shall be interpreted to prohibit Licensor from licensing under
57
+ terms different from this License any Original Work that Licensor otherwise
58
+ would have a right to license.
59
+
60
+ 5) External Deployment. The term "External Deployment" means the use,
61
+ distribution, or communication of the Original Work or Derivative Works in any
62
+ way such that the Original Work or Derivative Works may be used by anyone
63
+ other than You, whether those works are distributed or communicated to those
64
+ persons or made available as an application intended for use over a network.
65
+ As an express condition for the grants of license hereunder, You must treat
66
+ any External Deployment by You of the Original Work or a Derivative Work as a
67
+ distribution under section 1(c).
68
+
69
+ 6) Attribution Rights. You must retain, in the Source Code of any Derivative
70
+ Works that You create, all copyright, patent, or trademark notices from the
71
+ Source Code of the Original Work, as well as any notices of licensing and any
72
+ descriptive text identified therein as an "Attribution Notice." You must cause
73
+ the Source Code for any Derivative Works that You create to carry a prominent
74
+ Attribution Notice reasonably calculated to inform recipients that You have
75
+ modified the Original Work.
76
+
77
+ 7) Warranty of Provenance and Disclaimer of Warranty. Licensor warrants that
78
+ the copyright in and to the Original Work and the patent rights granted herein
79
+ by Licensor are owned by the Licensor or are sublicensed to You under the
80
+ terms of this License with the permission of the contributor(s) of those
81
+ copyrights and patent rights. Except as expressly stated in the immediately
82
+ preceding sentence, the Original Work is provided under this License on an "AS
83
+ IS" BASIS and WITHOUT WARRANTY, either express or implied, including, without
84
+ limitation, the warranties of non-infringement, merchantability or fitness for
85
+ a particular purpose. THE ENTIRE RISK AS TO THE QUALITY OF THE ORIGINAL WORK
86
+ IS WITH YOU. This DISCLAIMER OF WARRANTY constitutes an essential part of this
87
+ License. No license to the Original Work is granted by this License except
88
+ under this disclaimer.
89
+
90
+ 8) Limitation of Liability. Under no circumstances and under no legal theory,
91
+ whether in tort (including negligence), contract, or otherwise, shall the
92
+ Licensor be liable to anyone for any indirect, special, incidental, or
93
+ consequential damages of any character arising as a result of this License or
94
+ the use of the Original Work including, without limitation, damages for loss
95
+ of goodwill, work stoppage, computer failure or malfunction, or any and all
96
+ other commercial damages or losses. This limitation of liability shall not
97
+ apply to the extent applicable law prohibits such limitation.
98
+
99
+ 9) Acceptance and Termination. If, at any time, You expressly assented to this
100
+ License, that assent indicates your clear and irrevocable acceptance of this
101
+ License and all of its terms and conditions. If You distribute or communicate
102
+ copies of the Original Work or a Derivative Work, You must make a reasonable
103
+ effort under the circumstances to obtain the express assent of recipients to
104
+ the terms of this License. This License conditions your rights to undertake
105
+ the activities listed in Section 1, including your right to create Derivative
106
+ Works based upon the Original Work, and doing so without honoring these terms
107
+ and conditions is prohibited by copyright law and international treaty.
108
+ Nothing in this License is intended to affect copyright exceptions and
109
+ limitations (including "fair use" or "fair dealing"). This License shall
110
+ terminate immediately and You may no longer exercise any of the rights granted
111
+ to You by this License upon your failure to honor the conditions in Section
112
+ 1(c).
113
+
114
+ 10) Termination for Patent Action. This License shall terminate automatically
115
+ and You may no longer exercise any of the rights granted to You by this
116
+ License as of the date You commence an action, including a cross-claim or
117
+ counterclaim, against Licensor or any licensee alleging that the Original Work
118
+ infringes a patent. This termination provision shall not apply for an action
119
+ alleging patent infringement by combinations of the Original Work with other
120
+ software or hardware.
121
+
122
+ 11) Jurisdiction, Venue and Governing Law. Any action or suit relating to this
123
+ License may be brought only in the courts of a jurisdiction wherein the
124
+ Licensor resides or in which Licensor conducts its primary business, and under
125
+ the laws of that jurisdiction excluding its conflict-of-law provisions. The
126
+ application of the United Nations Convention on Contracts for the
127
+ International Sale of Goods is expressly excluded. Any use of the Original
128
+ Work outside the scope of this License or after its termination shall be
129
+ subject to the requirements and penalties of copyright or patent law in the
130
+ appropriate jurisdiction. This section shall survive the termination of this
131
+ License.
132
+
133
+ 12) Attorneys' Fees. In any action to enforce the terms of this License or
134
+ seeking damages relating thereto, the prevailing party shall be entitled to
135
+ recover its costs and expenses, including, without limitation, reasonable
136
+ attorneys' fees and costs incurred in connection with such action, including
137
+ any appeal of such action. This section shall survive the termination of this
138
+ License.
139
+
140
+ 13) Miscellaneous. If any provision of this License is held to be
141
+ unenforceable, such provision shall be reformed only to the extent necessary
142
+ to make it enforceable.
143
+
144
+ 14) Definition of "You" in This License. "You" throughout this License,
145
+ whether in upper or lower case, means an individual or a legal entity
146
+ exercising rights under, and complying with all of the terms of, this License.
147
+ For legal entities, "You" includes any entity that controls, is controlled by,
148
+ or is under common control with you. For purposes of this definition,
149
+ "control" means (i) the power, direct or indirect, to cause the direction or
150
+ management of such entity, whether by contract or otherwise, or (ii) ownership
151
+ of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial
152
+ ownership of such entity.
153
+
154
+ 15) Right to Use. You may use the Original Work in all ways not otherwise
155
+ restricted or conditioned by this License or by law, and Licensor promises not
156
+ to interfere with or be responsible for such uses by You.
157
+
158
+ 16) Modification of This License. This License is Copyright © 2005 Lawrence
159
+ Rosen. Permission is granted to copy, distribute, or communicate this License
160
+ without modification. Nothing in this License permits You to modify this
161
+ License as applied to the Original Work or to Derivative Works. However, You
162
+ may modify the text of this License and copy, distribute or communicate your
163
+ modified version (the "Modified License") and apply it to other original works
164
+ of authorship subject to the following conditions: (i) You may not indicate in
165
+ any way that your Modified License is the "Academic Free License" or "AFL" and
166
+ you may not use those names in the name of your Modified License; (ii) You
167
+ must replace the notice specified in the first paragraph above with the notice
168
+ "Licensed under <insert your license name here>" or with a notice of your own
169
+ that is not confusingly similar to the notice in this License; and (iii) You
170
+ may not claim that your original works are open source software unless your
171
+ Modified License has been approved by Open Source Initiative (OSI) and You
172
+ comply with its license review and certification process.
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.1
2
+ Name: msasim
3
+ Version: 2024.5.22
4
+ Summary: A fast MSA simulator
5
+ Home-page: https://github.com/elyawy/Sailfish-backend
6
+ Author: Elya Wygoda
7
+ Author-email: elya.wygoda@gmail.com
8
+ License: UNKNOWN
9
+ Platform: UNKNOWN
10
+ Requires-Python: >=3.6
11
+ License-File: LICENSE
12
+ Provides-Extra: test
13
+ Requires-Dist: pytest ; extra == 'test'
14
+
15
+ Sailfish is a performant multiple sequence alignment simulator, written in C++, allowing fast generation of large simualted datasets.
16
+
@@ -0,0 +1,10 @@
1
+ _Sailfish.cpython-36m-x86_64-linux-gnu.so,sha256=wDu66tG2DnoqDsSenpvNOX-4aZfg17hyBs9-NRyKfOo,1199161
2
+ Sailfish/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ Sailfish/simulator.py,sha256=CTFAnavIHy918aF141FKeZjO5Y-oYASI-TR5MMGGVks,24324
4
+ msasim.libs/libstdc++-a9383cce.so.6.0.28,sha256=Wy9UCdwS1rwI9GU5e7qE61S0AkRqqwti1q_adWSs-Rk,2447393
5
+ msasim.libs/libgcc_s-a04fdf82.so.1,sha256=YxqJNaesQMhDswHEQpXsiLnVvMBBbYO6KYMDZFPWKSM,81257
6
+ msasim-2024.5.22.dist-info/WHEEL,sha256=rbVfYZzJiOCEiiKDVkaog3MWj3SC9SfTJ-H92vzqnHY,112
7
+ msasim-2024.5.22.dist-info/LICENSE,sha256=p1Aa_mM2Nu6dG3XqMVU62Jhf04lNOwtXUrhhvhcDips,10312
8
+ msasim-2024.5.22.dist-info/top_level.txt,sha256=mVJT9Q6vqdBJyJ6acJvUdopsnVCQH7wEMQ-3iT6C-Jg,19
9
+ msasim-2024.5.22.dist-info/RECORD,,
10
+ msasim-2024.5.22.dist-info/METADATA,sha256=2gIzialKFhrP-FmN1o7Lf_kGyAWPBHxu6WwX44JkQI0,471
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.37.1)
3
+ Root-Is-Purelib: false
4
+ Tag: cp36-cp36m-musllinux_1_1_x86_64
5
+
@@ -0,0 +1,2 @@
1
+ Sailfish
2
+ _Sailfish
Binary file
Binary file