msasim 25.10.15__tar.gz → 25.11.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {msasim-25.10.15/msasim.egg-info → msasim-25.11.2}/PKG-INFO +1 -1
- {msasim-25.10.15 → msasim-25.11.2}/_Sailfish/__init__.pyi +2 -0
- {msasim-25.10.15 → msasim-25.11.2}/msasim/sailfish.py +16 -12
- {msasim-25.10.15 → msasim-25.11.2/msasim.egg-info}/PKG-INFO +1 -1
- {msasim-25.10.15 → msasim-25.11.2}/msasim.egg-info/SOURCES.txt +1 -2
- {msasim-25.10.15 → msasim-25.11.2}/setup.py +1 -1
- {msasim-25.10.15 → msasim-25.11.2}/src/main.cpp +34 -18
- {msasim-25.10.15 → msasim-25.11.2}/src/modelFactory.cpp +8 -0
- msasim-25.10.15/src/rateMatrixSim.cpp +0 -278
- {msasim-25.10.15 → msasim-25.11.2}/LICENSE +0 -0
- {msasim-25.10.15 → msasim-25.11.2}/README.md +0 -0
- {msasim-25.10.15 → msasim-25.11.2}/_Sailfish/py.typed +0 -0
- {msasim-25.10.15 → msasim-25.11.2}/msasim/__init__.py +0 -0
- {msasim-25.10.15 → msasim-25.11.2}/msasim.egg-info/dependency_links.txt +0 -0
- {msasim-25.10.15 → msasim-25.11.2}/msasim.egg-info/not-zip-safe +0 -0
- {msasim-25.10.15 → msasim-25.11.2}/msasim.egg-info/requires.txt +0 -0
- {msasim-25.10.15 → msasim-25.11.2}/msasim.egg-info/top_level.txt +0 -0
- {msasim-25.10.15 → msasim-25.11.2}/pyproject.toml +0 -0
- {msasim-25.10.15 → msasim-25.11.2}/setup.cfg +0 -0
|
@@ -376,6 +376,8 @@ class modelFactory:
|
|
|
376
376
|
...
|
|
377
377
|
def set_invariant_sites_proportion(self, arg0: float) -> None:
|
|
378
378
|
...
|
|
379
|
+
def set_site_rate_correlation(self, arg0: float) -> None:
|
|
380
|
+
...
|
|
379
381
|
def set_model_parameters(self, arg0: list[float]) -> None:
|
|
380
382
|
...
|
|
381
383
|
def set_replacement_model(self, arg0: modelCode) -> None:
|
|
@@ -25,14 +25,14 @@ class Distribution:
|
|
|
25
25
|
raise ValueError(f"Each value of the probabilities should be between 0 to 1. Received a value of {x}")
|
|
26
26
|
self._dist = _Sailfish.DiscreteDistribution(dist)
|
|
27
27
|
|
|
28
|
-
def draw_sample(self) -> int:
|
|
29
|
-
|
|
28
|
+
# def draw_sample(self) -> int:
|
|
29
|
+
# return self._dist.draw_sample()
|
|
30
30
|
|
|
31
|
-
def set_seed(self, seed: int) -> None:
|
|
32
|
-
|
|
31
|
+
# def set_seed(self, seed: int) -> None:
|
|
32
|
+
# return self._dist.set_seed(seed)
|
|
33
33
|
|
|
34
|
-
def get_table(self) -> List:
|
|
35
|
-
|
|
34
|
+
# def get_table(self) -> List:
|
|
35
|
+
# return self._dist.get_table()
|
|
36
36
|
|
|
37
37
|
def _get_Sailfish_dist(self) -> _Sailfish.DiscreteDistribution:
|
|
38
38
|
return self._dist
|
|
@@ -415,7 +415,10 @@ class Simulator:
|
|
|
415
415
|
# verify sim_protocol
|
|
416
416
|
if self._verify_sim_protocol(simProtocol):
|
|
417
417
|
self._simProtocol = simProtocol
|
|
418
|
-
|
|
418
|
+
if simulation_type == SIMULATION_TYPE.PROTEIN:
|
|
419
|
+
self._simulator = _Sailfish.AminoSimulator(self._simProtocol._sim)
|
|
420
|
+
else:
|
|
421
|
+
self._simulator = _Sailfish.NucleotideSimulator(self._simProtocol._sim)
|
|
419
422
|
else:
|
|
420
423
|
raise ValueError(f"failed to verify simProtocol")
|
|
421
424
|
|
|
@@ -475,7 +478,8 @@ class Simulator:
|
|
|
475
478
|
model_parameters: List = None,
|
|
476
479
|
gamma_parameters_alpha : float = 1.0,
|
|
477
480
|
gamma_parameters_categories: int = 1,
|
|
478
|
-
invariant_sites_proportion: float = 0.0
|
|
481
|
+
invariant_sites_proportion: float = 0.0,
|
|
482
|
+
site_rate_correlation: float = 0.0,
|
|
479
483
|
) -> None:
|
|
480
484
|
if not model:
|
|
481
485
|
raise ValueError(f"please provide a substitution model from the the following list: {_Sailfish.modelCode}")
|
|
@@ -503,6 +507,8 @@ class Simulator:
|
|
|
503
507
|
|
|
504
508
|
self._model_factory.set_gamma_parameters(gamma_parameters_alpha, gamma_parameters_categories)
|
|
505
509
|
self._model_factory.set_invariant_sites_proportion(invariant_sites_proportion)
|
|
510
|
+
self._model_factory.set_site_rate_correlation(site_rate_correlation)
|
|
511
|
+
|
|
506
512
|
self._simulator.init_substitution_sim(self._model_factory)
|
|
507
513
|
|
|
508
514
|
self._is_sub_model_init = True
|
|
@@ -556,13 +562,11 @@ class Simulator:
|
|
|
556
562
|
msa = Msa(blocktree._get_Sailfish_blocks(),
|
|
557
563
|
self._simProtocol._get_root(),
|
|
558
564
|
self.get_sequences_to_save())
|
|
565
|
+
self._simulator.set_aligned_sequence_map(msa._msa)
|
|
559
566
|
|
|
560
567
|
# sim.init_substitution_sim(mFac)
|
|
561
568
|
if self._simulation_type != SIMULATION_TYPE.NOSUBS:
|
|
562
|
-
|
|
563
|
-
self._simulator.gen_substitutions_to_dir(msa.get_length(), tmpdirname)
|
|
564
|
-
msa._msa.set_substitutions_folder(tmpdirname)
|
|
565
|
-
msa._msa.write_msa_from_dir(str(output_file_path))
|
|
569
|
+
self._simulator.gen_substitutions_to_file(msa.get_length(), str(output_file_path))
|
|
566
570
|
|
|
567
571
|
|
|
568
572
|
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
#include <pybind11/stl.h>
|
|
5
5
|
#include <memory>
|
|
6
6
|
|
|
7
|
+
#include "../libs/pcg/pcg_random.hpp"
|
|
7
8
|
#include "./Simulator.h"
|
|
8
9
|
|
|
9
10
|
namespace py = pybind11;
|
|
@@ -29,6 +30,8 @@ PYBIND11_MODULE(_Sailfish, m) {
|
|
|
29
30
|
Tree
|
|
30
31
|
)pbdoc";
|
|
31
32
|
|
|
33
|
+
using SelectedRNG = pcg32_fast;
|
|
34
|
+
|
|
32
35
|
py::class_<Block>(m, "Block")
|
|
33
36
|
.def(py::init<size_t, size_t>());
|
|
34
37
|
|
|
@@ -43,10 +46,7 @@ PYBIND11_MODULE(_Sailfish, m) {
|
|
|
43
46
|
.export_values();
|
|
44
47
|
|
|
45
48
|
py::class_<DiscreteDistribution>(m, "DiscreteDistribution")
|
|
46
|
-
.def(py::init<std::vector<double>>())
|
|
47
|
-
.def("draw_sample", &DiscreteDistribution::drawSample, "Draw a random sample according to the given distribution")
|
|
48
|
-
.def_static("set_seed", &DiscreteDistribution::setSeed, "Set seed for the random number generator")
|
|
49
|
-
.def("get_table", &DiscreteDistribution::getTable, "Get Vose's alias table (useful for debugging)");
|
|
49
|
+
.def(py::init<std::vector<double>>());
|
|
50
50
|
|
|
51
51
|
py::class_<tree>(m, "Tree")
|
|
52
52
|
.def(py::init<const std::string&, bool>(), "Create Phylogenetic tree object from newick formatted file")
|
|
@@ -128,32 +128,48 @@ PYBIND11_MODULE(_Sailfish, m) {
|
|
|
128
128
|
.def("set_model_parameters" , &modelFactory::setModelParameters)
|
|
129
129
|
.def("set_gamma_parameters" , &modelFactory::setGammaParameters)
|
|
130
130
|
.def("set_invariant_sites_proportion", &modelFactory::setInvariantSitesProportion)
|
|
131
|
+
.def("set_site_rate_correlation", &modelFactory::setSiteRateCorrelation)
|
|
131
132
|
.def("reset", &modelFactory::resetFactory);
|
|
132
133
|
|
|
133
134
|
|
|
134
|
-
py::class_<Simulator
|
|
135
|
+
py::class_<Simulator<SelectedRNG, 20>>(m, "AminoSimulator")
|
|
136
|
+
.def(py::init<SimulationProtocol*>())
|
|
137
|
+
.def("reset_sim", &Simulator<SelectedRNG, 20>::resetSimulator)
|
|
138
|
+
.def("gen_indels", &Simulator<SelectedRNG, 20>::generateSimulation)
|
|
139
|
+
.def("run_sim", &Simulator<SelectedRNG, 20>::runSimulator)
|
|
140
|
+
.def("init_substitution_sim", &Simulator<SelectedRNG, 20>::initSubstitionSim)
|
|
141
|
+
.def("gen_substitutions", &Simulator<SelectedRNG, 20>::simulateSubstitutions)
|
|
142
|
+
.def("gen_substitutions_to_file", &Simulator<SelectedRNG, 20>::simulateAndWriteSubstitutions)
|
|
143
|
+
.def("set_aligned_sequence_map", &Simulator<SelectedRNG, 20>::setAlignedSequenceMap)
|
|
144
|
+
.def("save_site_rates", &Simulator<SelectedRNG, 20>::setSaveRates)
|
|
145
|
+
.def("get_site_rates", &Simulator<SelectedRNG, 20>::getSiteRates)
|
|
146
|
+
.def("save_all_nodes_sequences", &Simulator<SelectedRNG, 20>::setSaveAllNodes)
|
|
147
|
+
.def("save_root_sequence", &Simulator<SelectedRNG, 20>::setSaveRoot)
|
|
148
|
+
.def("get_saved_nodes_mask", &Simulator<SelectedRNG, 20>::getNodesSaveList);
|
|
149
|
+
|
|
150
|
+
py::class_<Simulator<SelectedRNG, 4>>(m, "NucleotideSimulator")
|
|
135
151
|
.def(py::init<SimulationProtocol*>())
|
|
136
|
-
.def("reset_sim", &Simulator
|
|
137
|
-
.def("gen_indels", &Simulator
|
|
138
|
-
.def("run_sim", &Simulator
|
|
139
|
-
.def("init_substitution_sim", &Simulator
|
|
140
|
-
.def("gen_substitutions", &Simulator
|
|
141
|
-
.def("
|
|
142
|
-
.def("
|
|
143
|
-
.def("
|
|
144
|
-
.def("
|
|
145
|
-
.def("
|
|
146
|
-
.def("
|
|
152
|
+
.def("reset_sim", &Simulator<SelectedRNG, 4>::resetSimulator)
|
|
153
|
+
.def("gen_indels", &Simulator<SelectedRNG, 4>::generateSimulation)
|
|
154
|
+
.def("run_sim", &Simulator<SelectedRNG, 4>::runSimulator)
|
|
155
|
+
.def("init_substitution_sim", &Simulator<SelectedRNG, 4>::initSubstitionSim)
|
|
156
|
+
.def("gen_substitutions", &Simulator<SelectedRNG, 4>::simulateSubstitutions)
|
|
157
|
+
.def("gen_substitutions_to_file", &Simulator<SelectedRNG, 4>::simulateAndWriteSubstitutions)
|
|
158
|
+
.def("set_aligned_sequence_map", &Simulator<SelectedRNG, 4>::setAlignedSequenceMap)
|
|
159
|
+
.def("save_site_rates", &Simulator<SelectedRNG, 4>::setSaveRates)
|
|
160
|
+
.def("get_site_rates", &Simulator<SelectedRNG, 4>::getSiteRates)
|
|
161
|
+
.def("save_all_nodes_sequences", &Simulator<SelectedRNG, 4>::setSaveAllNodes)
|
|
162
|
+
.def("save_root_sequence", &Simulator<SelectedRNG, 4>::setSaveRoot)
|
|
163
|
+
.def("get_saved_nodes_mask", &Simulator<SelectedRNG, 4>::getNodesSaveList);
|
|
147
164
|
|
|
148
165
|
|
|
149
166
|
py::class_<MSA>(m, "Msa")
|
|
150
167
|
.def(py::init<size_t, size_t, const std::vector<bool>& >())
|
|
151
|
-
.def(py::init<BlockMap
|
|
168
|
+
.def(py::init<BlockMap&, tree::TreeNode*, const std::vector<bool>& >())
|
|
152
169
|
.def("generate_msas", &MSA::generateMSAs)
|
|
153
170
|
.def("length", &MSA::getMSAlength)
|
|
154
171
|
.def("num_sequences", &MSA::getNumberOfSequences)
|
|
155
172
|
.def("fill_substitutions", &MSA::fillSubstitutions)
|
|
156
|
-
.def("set_substitutions_folder", &MSA::setSubstitutionsFolder)
|
|
157
173
|
.def("print_msa", &MSA::printFullMsa)
|
|
158
174
|
.def("print_indels", &MSA::printIndels)
|
|
159
175
|
.def("write_msa", &MSA::writeFullMsa)
|
|
@@ -82,9 +82,17 @@ void modelFactory::setGammaParameters(MDOUBLE alpha, size_t numCategories) {
|
|
|
82
82
|
_state = factoryState::COMPLETE;
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
+
void modelFactory::setSiteRateCorrelation(MDOUBLE correlation) {
|
|
86
|
+
if (correlation < -1.0 || correlation > 1.0) {
|
|
87
|
+
errorMsg::reportError("Rate correlation must be between -1 and 1");
|
|
88
|
+
}
|
|
89
|
+
_siteRateCorrelation = correlation;
|
|
90
|
+
}
|
|
91
|
+
|
|
85
92
|
void modelFactory::resetFactory() {
|
|
86
93
|
_state = factoryState::ALPHABET;
|
|
87
94
|
_invariantProportion = 0.0;
|
|
95
|
+
_siteRateCorrelation = 0.0;
|
|
88
96
|
}
|
|
89
97
|
|
|
90
98
|
|
|
@@ -1,278 +0,0 @@
|
|
|
1
|
-
// $Id: simulateTree.cpp 8508 2010-08-12 15:21:04Z rubi $
|
|
2
|
-
#include <stack>
|
|
3
|
-
#include <unordered_map>
|
|
4
|
-
#include <ostream>
|
|
5
|
-
#include <sstream>
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
#include "../libs/Phylolib/includes/definitions.h"
|
|
9
|
-
#include "../libs/Phylolib/includes/treeUtil.h"
|
|
10
|
-
#include "../libs/Phylolib/includes/talRandom.h"
|
|
11
|
-
#include "../libs/Phylolib/includes/gammaDistribution.h"
|
|
12
|
-
#include "../libs/Phylolib/includes/codon.h"
|
|
13
|
-
|
|
14
|
-
#include "rateMatrixSim.h"
|
|
15
|
-
// simulateTree::simulateTree(tree* _inEt,
|
|
16
|
-
// const stochasticProcess* sp,
|
|
17
|
-
// const alphabet* alph) :
|
|
18
|
-
// _et(_inEt), _sp(sp),_alph(alph),_avgSubtitutionsPerSite(0.0) {
|
|
19
|
-
// };
|
|
20
|
-
|
|
21
|
-
rateMatrixSim::rateMatrixSim(modelFactory& mFac, std::shared_ptr<std::vector<bool>> nodesToSave) :
|
|
22
|
-
_et(mFac.getTree()), _sp(mFac.getStochasticProcess()), _alph(mFac.getAlphabet()),
|
|
23
|
-
_invariantSitesProportion(mFac.getInvariantSitesProportion()),
|
|
24
|
-
_cpijGam(), _rootSequence(mFac.getAlphabet()), _subManager(mFac.getTree()->getNodesNum()),
|
|
25
|
-
_nodesToSave(nodesToSave), _saveRates(false), _biased_coin(0,1) {
|
|
26
|
-
// _et = mFac.getTree();
|
|
27
|
-
// _sp = mFac.getStochasticProcess();
|
|
28
|
-
// _alph = mFac.getAlphabet();
|
|
29
|
-
|
|
30
|
-
size_t alphaSize = _sp->alphabetSize();
|
|
31
|
-
|
|
32
|
-
_cpijGam.fillPij(*_et, *_sp);
|
|
33
|
-
initGillespieSampler();
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
std::vector<MDOUBLE> rateProbs;
|
|
37
|
-
for (int j = 0 ; j < _sp->categories(); ++j) {
|
|
38
|
-
MDOUBLE currentRateProb = _sp->ratesProb(j);
|
|
39
|
-
currentRateProb = currentRateProb * (1.0 - _invariantSitesProportion);
|
|
40
|
-
rateProbs.push_back(currentRateProb);
|
|
41
|
-
}
|
|
42
|
-
if (_invariantSitesProportion > 0.0) rateProbs.push_back(_invariantSitesProportion);
|
|
43
|
-
|
|
44
|
-
_rateSampler = std::make_unique<DiscreteDistribution>(rateProbs);
|
|
45
|
-
|
|
46
|
-
std::vector<MDOUBLE> frequencies;
|
|
47
|
-
for (int j = 0; j < alphaSize; ++j) {
|
|
48
|
-
frequencies.push_back(_sp->freq(j));
|
|
49
|
-
}
|
|
50
|
-
_frequencySampler = std::make_unique<DiscreteDistribution>(frequencies);
|
|
51
|
-
|
|
52
|
-
_simulatedSequences = std::make_unique<sequenceContainer>();
|
|
53
|
-
|
|
54
|
-
};
|
|
55
|
-
|
|
56
|
-
void rateMatrixSim::setSaveRates(bool saveRates) {
|
|
57
|
-
_saveRates = saveRates;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
void rateMatrixSim::initGillespieSampler() {
|
|
61
|
-
_gillespieSampler.resize(_alph->size());
|
|
62
|
-
for (size_t i = 0; i < _alph->size(); ++i) {
|
|
63
|
-
std::vector<double> qRates(_alph->size(), 0.0);
|
|
64
|
-
double sum = -_sp->Qij(i,i);
|
|
65
|
-
double normalizer = 1.0 / sum;
|
|
66
|
-
for (size_t j = 0; j < _alph->size(); ++j) {
|
|
67
|
-
if (i==j) continue;
|
|
68
|
-
qRates[j] = _sp->Qij(i,j) * normalizer;
|
|
69
|
-
// std::cout << i << j << "->" << qRates[j] << ",";
|
|
70
|
-
}
|
|
71
|
-
// std::cout << "\n" << i << " " << sum << "\n";
|
|
72
|
-
_gillespieSampler[i] = std::make_unique<DiscreteDistribution>(qRates);
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// simulateTree::simulateTree(const tree& _inEt,
|
|
77
|
-
// const stochasticProcess& sp,
|
|
78
|
-
// const alphabet* alph) : _sp(sp) {
|
|
79
|
-
// _et = _inEt;
|
|
80
|
-
// // _sp = sp;
|
|
81
|
-
// _alph = alph;
|
|
82
|
-
// _avgSubtitutionsPerSite = 0.0;
|
|
83
|
-
// };
|
|
84
|
-
|
|
85
|
-
rateMatrixSim::~rateMatrixSim() {
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
// void rateMatrixSim::setSeed(size_t seed) {
|
|
89
|
-
// _seed = seed;
|
|
90
|
-
// _mt_rand->seed(seed);
|
|
91
|
-
// }
|
|
92
|
-
|
|
93
|
-
void rateMatrixSim::setRng(mt19937_64 *rng) {
|
|
94
|
-
_mt_rand = rng;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
// const mt19937_64& rateMatrixSim::getRng(){
|
|
99
|
-
// return *_mt_rand;
|
|
100
|
-
// }
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
void rateMatrixSim::generate_substitution_log(int seqLength) {
|
|
104
|
-
std::vector<MDOUBLE> ratesVec(seqLength);
|
|
105
|
-
|
|
106
|
-
MDOUBLE sumOfRatesAcrossSites = 0.0;
|
|
107
|
-
_rateCategories.resize(seqLength);
|
|
108
|
-
for (int h = 0; h < seqLength; h++) {
|
|
109
|
-
int selectedRandomCategory = _rateSampler->drawSample() - 1;
|
|
110
|
-
_rateCategories[h] = selectedRandomCategory;
|
|
111
|
-
if (selectedRandomCategory >= _sp->categories()) {
|
|
112
|
-
ratesVec[h] = 0.0;
|
|
113
|
-
continue;
|
|
114
|
-
}
|
|
115
|
-
ratesVec[h] = _sp->rates(selectedRandomCategory);
|
|
116
|
-
sumOfRatesAcrossSites += ratesVec[h];
|
|
117
|
-
}
|
|
118
|
-
if (_saveRates) _siteRates.insert(_siteRates.end(), ratesVec.begin(), ratesVec.end());
|
|
119
|
-
// MDOUBLE sumOfRatesNoramlizingFactor = 1.0 / sumOfRatesAcrossSites;
|
|
120
|
-
|
|
121
|
-
// _siteSampler = std::make_unique<DiscreteDistribution>(ratesVec, sumOfRatesNoramlizingFactor);
|
|
122
|
-
_rootSequence.resize(seqLength);
|
|
123
|
-
generateRootSeq(seqLength, ratesVec);
|
|
124
|
-
if ((*_nodesToSave)[_et->getRoot()->id()]) saveSequence(_et->getRoot()->id(), _et->getRoot()->name());
|
|
125
|
-
|
|
126
|
-
mutateSeqRecuresively(_et->getRoot(), seqLength);
|
|
127
|
-
_subManager.clear();
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
void rateMatrixSim::mutateSeqRecuresively(tree::nodeP currentNode, int seqLength) {
|
|
131
|
-
if (currentNode->isLeaf()) return;
|
|
132
|
-
|
|
133
|
-
for (auto &node: currentNode->getSons()) {
|
|
134
|
-
mutateSeqAlongBranch(node, seqLength);
|
|
135
|
-
if ((*_nodesToSave)[node->id()]) saveSequence(node->id(), node->name());
|
|
136
|
-
mutateSeqRecuresively(node, seqLength);
|
|
137
|
-
if (!_subManager.isEmpty(node->id())) {
|
|
138
|
-
_subManager.undoSubs(node->id(), _rootSequence, _rateCategories, _sp.get());
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
void rateMatrixSim::mutateSeqAlongBranch(tree::nodeP currentNode, int seqLength) {
|
|
144
|
-
const MDOUBLE distToFather = currentNode->dis2father();
|
|
145
|
-
mutateEntireSeq(currentNode, seqLength);
|
|
146
|
-
|
|
147
|
-
// if (distToFather > 0.5) {
|
|
148
|
-
// mutateEntireSeq(currentNode, seqLength);
|
|
149
|
-
// } else {
|
|
150
|
-
// mutateSeqGillespie(currentNode, seqLength, distToFather);
|
|
151
|
-
// }
|
|
152
|
-
// testSumOfRates();
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
void rateMatrixSim::mutateEntireSeq(tree::nodeP currentNode, int seqLength) {
|
|
157
|
-
const int nodeId = currentNode->id();
|
|
158
|
-
const int parentId = currentNode->father()->id();
|
|
159
|
-
|
|
160
|
-
for (size_t site = 0; site < seqLength; ++site) {
|
|
161
|
-
ALPHACHAR parentChar = _rootSequence[site];//_subManager.getCharacter(parentId, site, _rootSequence);
|
|
162
|
-
if (_rateCategories[site] == _sp->categories()) continue;
|
|
163
|
-
ALPHACHAR nextChar = _cpijGam.getRandomChar(_rateCategories[site], nodeId, parentChar);
|
|
164
|
-
if (nextChar != parentChar){
|
|
165
|
-
_subManager.handleEvent(nodeId, site, nextChar, _rateCategories, _sp.get(), _rootSequence);
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
void rateMatrixSim::mutateSeqGillespie(tree::nodeP currentNode, int seqLength, MDOUBLE distToParent) {
|
|
172
|
-
// std::cout << "mutating sequence using Gillespie!\n";
|
|
173
|
-
|
|
174
|
-
const int nodeId = currentNode->id();
|
|
175
|
-
const int parentId = currentNode->father()->id();
|
|
176
|
-
MDOUBLE branchLength = distToParent;
|
|
177
|
-
|
|
178
|
-
double lambdaParam = _subManager.getReactantsSum();
|
|
179
|
-
std::exponential_distribution<double> distribution(-lambdaParam);
|
|
180
|
-
double waitingTime = distribution(*_mt_rand);
|
|
181
|
-
if (waitingTime < 0) {
|
|
182
|
-
std::cout << branchLength << " " << lambdaParam << " " << waitingTime << "\n";
|
|
183
|
-
errorMsg::reportError("waiting time is negative :(");
|
|
184
|
-
}
|
|
185
|
-
while (waitingTime < branchLength) {
|
|
186
|
-
if (waitingTime < 0) {
|
|
187
|
-
std::cout << branchLength << " " << lambdaParam << " " << waitingTime << "\n";
|
|
188
|
-
errorMsg::reportError("waiting time is negative :(");
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
int mutatedSite = _subManager.sampleSite(*_mt_rand);
|
|
192
|
-
ALPHACHAR parentChar = _rootSequence[mutatedSite];
|
|
193
|
-
ALPHACHAR nextChar = _gillespieSampler[parentChar]->drawSample() - 1;
|
|
194
|
-
// std::cout << (int)parentChar << "->" << (int)nextChar << "\n";
|
|
195
|
-
_subManager.handleEvent(nodeId, mutatedSite, nextChar, _rateCategories, _sp.get(), _rootSequence);
|
|
196
|
-
|
|
197
|
-
lambdaParam = _subManager.getReactantsSum();
|
|
198
|
-
branchLength = branchLength - waitingTime;
|
|
199
|
-
std::exponential_distribution<double> distribution(-lambdaParam);
|
|
200
|
-
waitingTime = distribution(*_mt_rand);
|
|
201
|
-
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
void rateMatrixSim::generateRootSeq(int seqLength, std::vector<MDOUBLE>& ratesVec) {
|
|
209
|
-
size_t rootID = _et->getRoot()->id();
|
|
210
|
-
for (int i = 0; i < seqLength; i++) {
|
|
211
|
-
ALPHACHAR newChar = _frequencySampler->drawSample() - 1;
|
|
212
|
-
// ratesVec[i] = ratesVec[i]*(-_sp->Qij(newChar, newChar));
|
|
213
|
-
_rootSequence[i] = newChar;
|
|
214
|
-
}
|
|
215
|
-
// std::cout << ">Root-sequence\n" << _rootSequence << "\n";
|
|
216
|
-
// std::cout << ">Rates\n" << ratesVec;
|
|
217
|
-
_subManager.handleRootSequence(seqLength, ratesVec, _sp.get(), _rootSequence);
|
|
218
|
-
|
|
219
|
-
_rootSequence.setAlphabet(_alph);
|
|
220
|
-
_rootSequence.setName(_et->getRoot()->name());
|
|
221
|
-
_rootSequence.setID(_et->getRoot()->id());
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
void rateMatrixSim::saveSequence(const int &nodeId,const std::string &name) {
|
|
226
|
-
sequence temp(_rootSequence);
|
|
227
|
-
temp.setName(name);
|
|
228
|
-
temp.setID(nodeId);
|
|
229
|
-
// std::cout << temp << "\n";
|
|
230
|
-
_simulatedSequences->add(temp);
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
// sequenceContainer rateMatrixSim::toSeqData() {
|
|
234
|
-
// sequenceContainer myseqData;
|
|
235
|
-
// for (int i=0; i < _simulatedSequences.size(); ++i) {
|
|
236
|
-
// myseqData.add(*_simulatedSequences[i]);
|
|
237
|
-
// }
|
|
238
|
-
// return myseqData;
|
|
239
|
-
// }
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
std::unique_ptr<sequenceContainer> rateMatrixSim::getSequenceContainer() {
|
|
244
|
-
// std::unique_ptr<sequenceContainer> myseqData = std::make_unique<sequenceContainer>();
|
|
245
|
-
// // sequenceContainer myseqData;
|
|
246
|
-
// for (int i=0; i < _simulatedSequences.size(); ++i) {
|
|
247
|
-
// tree::nodeP theCurNode = _et->findNodeById(_simulatedSequences[i]->id());
|
|
248
|
-
// if (theCurNode == NULL)
|
|
249
|
-
// errorMsg::reportError("could not find the specified name: " + _simulatedSequences[i]->name());
|
|
250
|
-
// if (theCurNode->isInternal()) continue;
|
|
251
|
-
auto outputSequences = std::move(_simulatedSequences);
|
|
252
|
-
_simulatedSequences = std::make_unique<sequenceContainer>();
|
|
253
|
-
// myseqData->add(*std::move(_simulatedSequences[i]));
|
|
254
|
-
// }
|
|
255
|
-
|
|
256
|
-
return outputSequences;
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
bool rateMatrixSim::testSumOfRates() {
|
|
261
|
-
MDOUBLE sumOfRates = 0.0;
|
|
262
|
-
for (size_t i = 0; i < _rootSequence.seqLen(); i++) {
|
|
263
|
-
ALPHACHAR currentChar = _rootSequence[i];
|
|
264
|
-
MDOUBLE currentQii = _sp->Qij(currentChar, currentChar);
|
|
265
|
-
MDOUBLE currentRate = _sp->rates(_rateCategories[i]);
|
|
266
|
-
sumOfRates += (currentQii*currentRate);
|
|
267
|
-
}
|
|
268
|
-
MDOUBLE preCalculatedSum = _subManager.getReactantsSum();
|
|
269
|
-
if (abs(preCalculatedSum - sumOfRates) > 1e-6) {
|
|
270
|
-
std::cout << "preCalculatedSum=" << preCalculatedSum << " "
|
|
271
|
-
<< "sumOfRates=" << sumOfRates;
|
|
272
|
-
errorMsg::reportError("Error in sum of rates calculation!");
|
|
273
|
-
}
|
|
274
|
-
std::cout << "preCalculatedSum is correct\n" << "preCalculatedSum=" << preCalculatedSum << " "
|
|
275
|
-
<< "sumOfRates=" << sumOfRates << "\n";
|
|
276
|
-
|
|
277
|
-
return true;
|
|
278
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|