msasim 24.10.4__tar.gz → 24.13.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of msasim might be problematic. Click here for more details.
- {msasim-24.10.4 → msasim-24.13.0}/PKG-INFO +1 -1
- {msasim-24.10.4 → msasim-24.13.0}/msasim/sailfish.py +30 -10
- {msasim-24.10.4 → msasim-24.13.0}/msasim.egg-info/PKG-INFO +1 -1
- {msasim-24.10.4 → msasim-24.13.0}/setup.py +1 -1
- {msasim-24.10.4 → msasim-24.13.0}/src/main.cpp +5 -0
- {msasim-24.10.4 → msasim-24.13.0}/src/modelFactory.cpp +25 -1
- {msasim-24.10.4 → msasim-24.13.0}/LICENSE +0 -0
- {msasim-24.10.4 → msasim-24.13.0}/README.md +0 -0
- {msasim-24.10.4 → msasim-24.13.0}/msasim/__init__.py +0 -0
- {msasim-24.10.4 → msasim-24.13.0}/msasim.egg-info/SOURCES.txt +0 -0
- {msasim-24.10.4 → msasim-24.13.0}/msasim.egg-info/dependency_links.txt +0 -0
- {msasim-24.10.4 → msasim-24.13.0}/msasim.egg-info/not-zip-safe +0 -0
- {msasim-24.10.4 → msasim-24.13.0}/msasim.egg-info/requires.txt +0 -0
- {msasim-24.10.4 → msasim-24.13.0}/msasim.egg-info/top_level.txt +0 -0
- {msasim-24.10.4 → msasim-24.13.0}/pyproject.toml +0 -0
- {msasim-24.10.4 → msasim-24.13.0}/setup.cfg +0 -0
- {msasim-24.10.4 → msasim-24.13.0}/src/rateMatrixSim.cpp +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import _Sailfish
|
|
2
|
-
import os, warnings, math, operator, time, profile
|
|
2
|
+
import os, warnings, math, operator, time, profile, tempfile, pathlib
|
|
3
3
|
from functools import reduce
|
|
4
4
|
from typing import List, Optional, Dict
|
|
5
5
|
from re import split
|
|
@@ -101,12 +101,9 @@ class ZipfDistribution(Distribution):
|
|
|
101
101
|
"""
|
|
102
102
|
self.p = p
|
|
103
103
|
self.truncation = truncation
|
|
104
|
-
HARMONIC = lambda n,s: sum([(i**-s) for i in range(1,n+1)])
|
|
105
|
-
PMF = lambda x: (x**-p)*(1.0/HARMONIC(truncation, p))
|
|
106
|
-
CDF = lambda x: HARMONIC(x, p) / HARMONIC(truncation, p)
|
|
107
|
-
norm_factor = CDF(truncation) - CDF(0)
|
|
108
104
|
|
|
109
|
-
|
|
105
|
+
norm_factor = sum([(i**-p) for i in range(1,truncation+1)])
|
|
106
|
+
probabilities = [(i**-p)/norm_factor for i in range(1, truncation+1)]
|
|
110
107
|
|
|
111
108
|
self.set_dist(probabilities)
|
|
112
109
|
|
|
@@ -465,15 +462,16 @@ class Simulator:
|
|
|
465
462
|
def set_replacement_model(
|
|
466
463
|
self,
|
|
467
464
|
model: _Sailfish.modelCode,
|
|
465
|
+
amino_model_file: pathlib.Path = None,
|
|
468
466
|
model_parameters: List = None,
|
|
469
467
|
gamma_parameters_alpha : float = 1.0,
|
|
470
|
-
|
|
468
|
+
gamma_parameters_categories: int = 1,
|
|
471
469
|
invariant_sites_proportion: float = 0.0
|
|
472
470
|
) -> None:
|
|
473
471
|
if not model:
|
|
474
472
|
raise ValueError(f"please provide a substitution model from the the following list: {_Sailfish.modelCode}")
|
|
475
|
-
if int(
|
|
476
|
-
raise ValueError(f"gamma_parameters_catergories has to be a positive int value: received value of {
|
|
473
|
+
if int(gamma_parameters_categories) != gamma_parameters_categories:
|
|
474
|
+
raise ValueError(f"gamma_parameters_catergories has to be a positive int value: received value of {gamma_parameters_categories}")
|
|
477
475
|
self._model_factory = _Sailfish.modelFactory(self._simProtocol._get_Sailfish_tree())
|
|
478
476
|
|
|
479
477
|
self._model_factory.set_alphabet(self._alphabet)
|
|
@@ -481,6 +479,8 @@ class Simulator:
|
|
|
481
479
|
if model_parameters:
|
|
482
480
|
raise ValueError(f"no model parameters are used in protein, recevied value of: {model_parameters}")
|
|
483
481
|
self._model_factory.set_replacement_model(model)
|
|
482
|
+
if model == MODEL_CODES.CUSTOM and amino_model_file:
|
|
483
|
+
self._model_factory.set_amino_replacement_model_file(str(amino_model_file))
|
|
484
484
|
else:
|
|
485
485
|
if model == MODEL_CODES.NUCJC and model_parameters:
|
|
486
486
|
raise ValueError(f"no model parameters in JC model, recevied value of: {model_parameters}")
|
|
@@ -492,7 +492,7 @@ class Simulator:
|
|
|
492
492
|
else:
|
|
493
493
|
self._model_factory.set_model_parameters(model_parameters)
|
|
494
494
|
|
|
495
|
-
self._model_factory.set_gamma_parameters(gamma_parameters_alpha,
|
|
495
|
+
self._model_factory.set_gamma_parameters(gamma_parameters_alpha, gamma_parameters_categories)
|
|
496
496
|
self._model_factory.set_invariant_sites_proportion(invariant_sites_proportion)
|
|
497
497
|
self._simulator.init_substitution_sim(self._model_factory)
|
|
498
498
|
|
|
@@ -537,6 +537,26 @@ class Simulator:
|
|
|
537
537
|
Msas.append(msa)
|
|
538
538
|
return Msas
|
|
539
539
|
|
|
540
|
+
def simulate_low_memory(self, output_file_path: pathlib.Path) -> Msa:
|
|
541
|
+
if self._simProtocol._is_insertion_rate_zero and self._simProtocol._is_deletion_rate_zero:
|
|
542
|
+
msa = Msa(sum(self.get_sequences_to_save()),
|
|
543
|
+
self._simProtocol.get_sequence_size(),
|
|
544
|
+
self.get_sequences_to_save())
|
|
545
|
+
else:
|
|
546
|
+
blocktree = self.gen_indels()
|
|
547
|
+
msa = Msa(blocktree._get_Sailfish_blocks(),
|
|
548
|
+
self._simProtocol._get_root(),
|
|
549
|
+
self.get_sequences_to_save())
|
|
550
|
+
|
|
551
|
+
# sim.init_substitution_sim(mFac)
|
|
552
|
+
if self._simulation_type != SIMULATION_TYPE.NOSUBS:
|
|
553
|
+
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
554
|
+
self._simulator.gen_substitutions_to_dir(msa.get_length(), tmpdirname)
|
|
555
|
+
msa._msa.set_substitutions_folder(tmpdirname)
|
|
556
|
+
msa._msa.write_msa_from_dir(str(output_file_path))
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
|
|
540
560
|
def __call__(self) -> Msa:
|
|
541
561
|
return self.simulate(1)[0]
|
|
542
562
|
|
|
@@ -9,7 +9,7 @@ from datetime import datetime
|
|
|
9
9
|
now = datetime.now()
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
__version__ = f"{now.year % 100}.{now.month+1}.
|
|
12
|
+
__version__ = f"{now.year % 100}.{now.month+1}.0"
|
|
13
13
|
|
|
14
14
|
# The main interface is through Pybind11Extension.
|
|
15
15
|
# * You can add cxx_std=11/14/17, and then build_ext can be removed.
|
|
@@ -113,6 +113,7 @@ PYBIND11_MODULE(_Sailfish, m) {
|
|
|
113
113
|
.value("EX_EHO_EXP_EXT", modelCode::EX_EHO_EXP_EXT)
|
|
114
114
|
.value("EX_EHO_EXP_HEL", modelCode::EX_EHO_EXP_HEL)
|
|
115
115
|
.value("EX_EHO_EXP_OTH", modelCode::EX_EHO_EXP_OTH)
|
|
116
|
+
.value("CUSTOM", modelCode::CUSTOM)
|
|
116
117
|
.export_values();
|
|
117
118
|
|
|
118
119
|
|
|
@@ -121,6 +122,7 @@ PYBIND11_MODULE(_Sailfish, m) {
|
|
|
121
122
|
.def(py::init<tree*>())
|
|
122
123
|
.def("set_alphabet", &modelFactory::setAlphabet)
|
|
123
124
|
.def("set_replacement_model" , &modelFactory::setReplacementModel)
|
|
125
|
+
.def("set_amino_replacement_model_file" , &modelFactory::setCustomAAModelFile)
|
|
124
126
|
.def("set_model_parameters" , &modelFactory::setModelParameters)
|
|
125
127
|
.def("set_gamma_parameters" , &modelFactory::setGammaParameters)
|
|
126
128
|
.def("set_invariant_sites_proportion", &modelFactory::setInvariantSitesProportion)
|
|
@@ -134,6 +136,7 @@ PYBIND11_MODULE(_Sailfish, m) {
|
|
|
134
136
|
.def("run_sim", &Simulator::runSimulator)
|
|
135
137
|
.def("init_substitution_sim", &Simulator::initSubstitionSim)
|
|
136
138
|
.def("gen_substitutions", &Simulator::simulateSubstitutions)
|
|
139
|
+
.def("gen_substitutions_to_dir", &Simulator::simulateAndWriteSubstitutions)
|
|
137
140
|
.def("save_site_rates", &Simulator::setSaveRates)
|
|
138
141
|
.def("get_site_rates", &Simulator::getSiteRates)
|
|
139
142
|
.def("save_all_nodes_sequences", &Simulator::setSaveAllNodes)
|
|
@@ -148,9 +151,11 @@ PYBIND11_MODULE(_Sailfish, m) {
|
|
|
148
151
|
.def("length", &MSA::getMSAlength)
|
|
149
152
|
.def("num_sequences", &MSA::getNumberOfSequences)
|
|
150
153
|
.def("fill_substitutions", &MSA::fillSubstitutions)
|
|
154
|
+
.def("set_substitutions_folder", &MSA::setSubstitutionsFolder)
|
|
151
155
|
.def("print_msa", &MSA::printFullMsa)
|
|
152
156
|
.def("print_indels", &MSA::printIndels)
|
|
153
157
|
.def("write_msa", &MSA::writeFullMsa)
|
|
158
|
+
.def("write_msa_from_dir", &MSA::writeMsaFromDir)
|
|
154
159
|
.def("get_msa_string", &MSA::generateMsaString)
|
|
155
160
|
.def("get_msa", &MSA::getMSAVec);
|
|
156
161
|
|
|
@@ -30,7 +30,7 @@ void modelFactory::setReplacementModel(modelCode model) {
|
|
|
30
30
|
_state = factoryState::PARAMETERS;
|
|
31
31
|
if (_alphabet == alphabetCode::AMINOACID) _state = factoryState::GAMMA;
|
|
32
32
|
if (_model == modelCode::AAJC || _model == modelCode::NUCJC) _state = factoryState::GAMMA;
|
|
33
|
-
|
|
33
|
+
if (_model == modelCode::CUSTOM) _state = factoryState::MODEL_FILE;
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
void modelFactory::setModelParameters(std::vector<MDOUBLE> params) {
|
|
@@ -63,6 +63,15 @@ void modelFactory::setModelParameters(std::vector<MDOUBLE> params) {
|
|
|
63
63
|
_state = factoryState::GAMMA;
|
|
64
64
|
}
|
|
65
65
|
|
|
66
|
+
void modelFactory::setCustomAAModelFile(const std::string &fileName) {
|
|
67
|
+
if (_state!=factoryState::MODEL_FILE) {
|
|
68
|
+
std::cout << "Please set the model to 'CUSTOM' before proceeding.\n";
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
_modelFilePath = fileName;
|
|
72
|
+
_state = factoryState::GAMMA;
|
|
73
|
+
}
|
|
74
|
+
|
|
66
75
|
void modelFactory::setGammaParameters(MDOUBLE alpha, size_t numCategories) {
|
|
67
76
|
if (_state!=factoryState::GAMMA) {
|
|
68
77
|
std::cout << "Please specify a model and its correct parameters before proceeding.\n";
|
|
@@ -184,6 +193,21 @@ std::shared_ptr<stochasticProcess> modelFactory::getStochasticProcess() {
|
|
|
184
193
|
case modelCode::EX_EHO_EXP_OTH:
|
|
185
194
|
repModel = std::make_unique<pupAll>(datMatrixHolder::EX_EHO_EXP_OTH);
|
|
186
195
|
break;
|
|
196
|
+
case modelCode::CUSTOM:
|
|
197
|
+
std::ifstream in(_modelFilePath);
|
|
198
|
+
if (!in.is_open()) throw std::runtime_error("Could not open file");
|
|
199
|
+
std::stringstream contents;
|
|
200
|
+
char buffer;
|
|
201
|
+
while (in.get(buffer)) {
|
|
202
|
+
if (buffer == '\"' || buffer == '\n') continue;
|
|
203
|
+
contents << buffer;
|
|
204
|
+
}
|
|
205
|
+
in.close();
|
|
206
|
+
const std::string &tmpstr = contents.str();
|
|
207
|
+
const char* cstr = tmpstr.c_str();
|
|
208
|
+
datMatrixString aminoFileString(cstr);
|
|
209
|
+
repModel = std::make_unique<pupAll>(aminoFileString);
|
|
210
|
+
break;
|
|
187
211
|
}
|
|
188
212
|
|
|
189
213
|
std::unique_ptr<pijAccelerator> pij;
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|