msasim 25.9.5__tar.gz → 25.11.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {msasim-25.9.5/msasim.egg-info → msasim-25.11.1}/PKG-INFO +1 -1
- {msasim-25.9.5 → msasim-25.11.1}/_Sailfish/__init__.pyi +2 -0
- {msasim-25.9.5 → msasim-25.11.1}/msasim/sailfish.py +12 -11
- {msasim-25.9.5 → msasim-25.11.1/msasim.egg-info}/PKG-INFO +1 -1
- {msasim-25.9.5 → msasim-25.11.1}/msasim.egg-info/SOURCES.txt +1 -2
- {msasim-25.9.5 → msasim-25.11.1}/setup.py +1 -1
- {msasim-25.9.5 → msasim-25.11.1}/src/main.cpp +20 -19
- {msasim-25.9.5 → msasim-25.11.1}/src/modelFactory.cpp +8 -0
- msasim-25.9.5/src/rateMatrixSim.cpp +0 -278
- {msasim-25.9.5 → msasim-25.11.1}/LICENSE +0 -0
- {msasim-25.9.5 → msasim-25.11.1}/README.md +0 -0
- {msasim-25.9.5 → msasim-25.11.1}/_Sailfish/py.typed +0 -0
- {msasim-25.9.5 → msasim-25.11.1}/msasim/__init__.py +0 -0
- {msasim-25.9.5 → msasim-25.11.1}/msasim.egg-info/dependency_links.txt +0 -0
- {msasim-25.9.5 → msasim-25.11.1}/msasim.egg-info/not-zip-safe +0 -0
- {msasim-25.9.5 → msasim-25.11.1}/msasim.egg-info/requires.txt +0 -0
- {msasim-25.9.5 → msasim-25.11.1}/msasim.egg-info/top_level.txt +0 -0
- {msasim-25.9.5 → msasim-25.11.1}/pyproject.toml +0 -0
- {msasim-25.9.5 → msasim-25.11.1}/setup.cfg +0 -0
|
@@ -376,6 +376,8 @@ class modelFactory:
|
|
|
376
376
|
...
|
|
377
377
|
def set_invariant_sites_proportion(self, arg0: float) -> None:
|
|
378
378
|
...
|
|
379
|
+
def set_site_rate_correlation(self, arg0: float) -> None:
|
|
380
|
+
...
|
|
379
381
|
def set_model_parameters(self, arg0: list[float]) -> None:
|
|
380
382
|
...
|
|
381
383
|
def set_replacement_model(self, arg0: modelCode) -> None:
|
|
@@ -25,14 +25,14 @@ class Distribution:
|
|
|
25
25
|
raise ValueError(f"Each value of the probabilities should be between 0 to 1. Received a value of {x}")
|
|
26
26
|
self._dist = _Sailfish.DiscreteDistribution(dist)
|
|
27
27
|
|
|
28
|
-
def draw_sample(self) -> int:
|
|
29
|
-
|
|
28
|
+
# def draw_sample(self) -> int:
|
|
29
|
+
# return self._dist.draw_sample()
|
|
30
30
|
|
|
31
|
-
def set_seed(self, seed: int) -> None:
|
|
32
|
-
|
|
31
|
+
# def set_seed(self, seed: int) -> None:
|
|
32
|
+
# return self._dist.set_seed(seed)
|
|
33
33
|
|
|
34
|
-
def get_table(self) -> List:
|
|
35
|
-
|
|
34
|
+
# def get_table(self) -> List:
|
|
35
|
+
# return self._dist.get_table()
|
|
36
36
|
|
|
37
37
|
def _get_Sailfish_dist(self) -> _Sailfish.DiscreteDistribution:
|
|
38
38
|
return self._dist
|
|
@@ -475,7 +475,8 @@ class Simulator:
|
|
|
475
475
|
model_parameters: List = None,
|
|
476
476
|
gamma_parameters_alpha : float = 1.0,
|
|
477
477
|
gamma_parameters_categories: int = 1,
|
|
478
|
-
invariant_sites_proportion: float = 0.0
|
|
478
|
+
invariant_sites_proportion: float = 0.0,
|
|
479
|
+
site_rate_correlation: float = 0.0,
|
|
479
480
|
) -> None:
|
|
480
481
|
if not model:
|
|
481
482
|
raise ValueError(f"please provide a substitution model from the the following list: {_Sailfish.modelCode}")
|
|
@@ -503,6 +504,8 @@ class Simulator:
|
|
|
503
504
|
|
|
504
505
|
self._model_factory.set_gamma_parameters(gamma_parameters_alpha, gamma_parameters_categories)
|
|
505
506
|
self._model_factory.set_invariant_sites_proportion(invariant_sites_proportion)
|
|
507
|
+
self._model_factory.set_site_rate_correlation(site_rate_correlation)
|
|
508
|
+
|
|
506
509
|
self._simulator.init_substitution_sim(self._model_factory)
|
|
507
510
|
|
|
508
511
|
self._is_sub_model_init = True
|
|
@@ -556,13 +559,11 @@ class Simulator:
|
|
|
556
559
|
msa = Msa(blocktree._get_Sailfish_blocks(),
|
|
557
560
|
self._simProtocol._get_root(),
|
|
558
561
|
self.get_sequences_to_save())
|
|
562
|
+
self._simulator.set_aligned_sequence_map(msa._msa)
|
|
559
563
|
|
|
560
564
|
# sim.init_substitution_sim(mFac)
|
|
561
565
|
if self._simulation_type != SIMULATION_TYPE.NOSUBS:
|
|
562
|
-
|
|
563
|
-
self._simulator.gen_substitutions_to_dir(msa.get_length(), tmpdirname)
|
|
564
|
-
msa._msa.set_substitutions_folder(tmpdirname)
|
|
565
|
-
msa._msa.write_msa_from_dir(str(output_file_path))
|
|
566
|
+
self._simulator.gen_substitutions_to_file(msa.get_length(), str(output_file_path))
|
|
566
567
|
|
|
567
568
|
|
|
568
569
|
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
#include <pybind11/stl.h>
|
|
5
5
|
#include <memory>
|
|
6
6
|
|
|
7
|
+
#include "../libs/pcg/pcg_random.hpp"
|
|
7
8
|
#include "./Simulator.h"
|
|
8
9
|
|
|
9
10
|
namespace py = pybind11;
|
|
@@ -29,11 +30,13 @@ PYBIND11_MODULE(_Sailfish, m) {
|
|
|
29
30
|
Tree
|
|
30
31
|
)pbdoc";
|
|
31
32
|
|
|
33
|
+
using SelectedRNG = pcg64;
|
|
34
|
+
|
|
32
35
|
py::class_<Block>(m, "Block")
|
|
33
36
|
.def(py::init<size_t, size_t>());
|
|
34
37
|
|
|
35
38
|
py::class_<BlockTree>(m, "BlockTree")
|
|
36
|
-
.def(py::init
|
|
39
|
+
.def(py::init<>())
|
|
37
40
|
.def("print_tree", &BlockTree::printTree)
|
|
38
41
|
.def("block_list", &BlockTree::getBlockList);
|
|
39
42
|
|
|
@@ -43,10 +46,7 @@ PYBIND11_MODULE(_Sailfish, m) {
|
|
|
43
46
|
.export_values();
|
|
44
47
|
|
|
45
48
|
py::class_<DiscreteDistribution>(m, "DiscreteDistribution")
|
|
46
|
-
.def(py::init<std::vector<double>>())
|
|
47
|
-
.def("draw_sample", &DiscreteDistribution::drawSample, "Draw a random sample according to the given distribution")
|
|
48
|
-
.def_static("set_seed", &DiscreteDistribution::setSeed, "Set seed for the random number generator")
|
|
49
|
-
.def("get_table", &DiscreteDistribution::getTable, "Get Vose's alias table (useful for debugging)");
|
|
49
|
+
.def(py::init<std::vector<double>>());
|
|
50
50
|
|
|
51
51
|
py::class_<tree>(m, "Tree")
|
|
52
52
|
.def(py::init<const std::string&, bool>(), "Create Phylogenetic tree object from newick formatted file")
|
|
@@ -128,32 +128,33 @@ PYBIND11_MODULE(_Sailfish, m) {
|
|
|
128
128
|
.def("set_model_parameters" , &modelFactory::setModelParameters)
|
|
129
129
|
.def("set_gamma_parameters" , &modelFactory::setGammaParameters)
|
|
130
130
|
.def("set_invariant_sites_proportion", &modelFactory::setInvariantSitesProportion)
|
|
131
|
+
.def("set_site_rate_correlation", &modelFactory::setSiteRateCorrelation)
|
|
131
132
|
.def("reset", &modelFactory::resetFactory);
|
|
132
133
|
|
|
133
134
|
|
|
134
|
-
py::class_<Simulator
|
|
135
|
+
py::class_<Simulator<SelectedRNG>>(m, "Simulator")
|
|
135
136
|
.def(py::init<SimulationProtocol*>())
|
|
136
|
-
.def("reset_sim", &Simulator
|
|
137
|
-
.def("gen_indels", &Simulator
|
|
138
|
-
.def("run_sim", &Simulator
|
|
139
|
-
.def("init_substitution_sim", &Simulator
|
|
140
|
-
.def("gen_substitutions", &Simulator
|
|
141
|
-
.def("
|
|
142
|
-
.def("
|
|
143
|
-
.def("
|
|
144
|
-
.def("
|
|
145
|
-
.def("
|
|
146
|
-
.def("
|
|
137
|
+
.def("reset_sim", &Simulator<SelectedRNG>::resetSimulator)
|
|
138
|
+
.def("gen_indels", &Simulator<SelectedRNG>::generateSimulation)
|
|
139
|
+
.def("run_sim", &Simulator<SelectedRNG>::runSimulator)
|
|
140
|
+
.def("init_substitution_sim", &Simulator<SelectedRNG>::initSubstitionSim)
|
|
141
|
+
.def("gen_substitutions", &Simulator<SelectedRNG>::simulateSubstitutions)
|
|
142
|
+
.def("gen_substitutions_to_file", &Simulator<SelectedRNG>::simulateAndWriteSubstitutions)
|
|
143
|
+
.def("set_aligned_sequence_map", &Simulator<SelectedRNG>::setAlignedSequenceMap)
|
|
144
|
+
.def("save_site_rates", &Simulator<SelectedRNG>::setSaveRates)
|
|
145
|
+
.def("get_site_rates", &Simulator<SelectedRNG>::getSiteRates)
|
|
146
|
+
.def("save_all_nodes_sequences", &Simulator<SelectedRNG>::setSaveAllNodes)
|
|
147
|
+
.def("save_root_sequence", &Simulator<SelectedRNG>::setSaveRoot)
|
|
148
|
+
.def("get_saved_nodes_mask", &Simulator<SelectedRNG>::getNodesSaveList);
|
|
147
149
|
|
|
148
150
|
|
|
149
151
|
py::class_<MSA>(m, "Msa")
|
|
150
152
|
.def(py::init<size_t, size_t, const std::vector<bool>& >())
|
|
151
|
-
.def(py::init<BlockMap
|
|
153
|
+
.def(py::init<BlockMap&, tree::TreeNode*, const std::vector<bool>& >())
|
|
152
154
|
.def("generate_msas", &MSA::generateMSAs)
|
|
153
155
|
.def("length", &MSA::getMSAlength)
|
|
154
156
|
.def("num_sequences", &MSA::getNumberOfSequences)
|
|
155
157
|
.def("fill_substitutions", &MSA::fillSubstitutions)
|
|
156
|
-
.def("set_substitutions_folder", &MSA::setSubstitutionsFolder)
|
|
157
158
|
.def("print_msa", &MSA::printFullMsa)
|
|
158
159
|
.def("print_indels", &MSA::printIndels)
|
|
159
160
|
.def("write_msa", &MSA::writeFullMsa)
|
|
@@ -82,9 +82,17 @@ void modelFactory::setGammaParameters(MDOUBLE alpha, size_t numCategories) {
|
|
|
82
82
|
_state = factoryState::COMPLETE;
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
+
void modelFactory::setSiteRateCorrelation(MDOUBLE correlation) {
|
|
86
|
+
if (correlation < -1.0 || correlation > 1.0) {
|
|
87
|
+
errorMsg::reportError("Rate correlation must be between -1 and 1");
|
|
88
|
+
}
|
|
89
|
+
_siteRateCorrelation = correlation;
|
|
90
|
+
}
|
|
91
|
+
|
|
85
92
|
void modelFactory::resetFactory() {
|
|
86
93
|
_state = factoryState::ALPHABET;
|
|
87
94
|
_invariantProportion = 0.0;
|
|
95
|
+
_siteRateCorrelation = 0.0;
|
|
88
96
|
}
|
|
89
97
|
|
|
90
98
|
|
|
@@ -1,278 +0,0 @@
|
|
|
1
|
-
// $Id: simulateTree.cpp 8508 2010-08-12 15:21:04Z rubi $
|
|
2
|
-
#include <stack>
|
|
3
|
-
#include <unordered_map>
|
|
4
|
-
#include <ostream>
|
|
5
|
-
#include <sstream>
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
#include "../libs/Phylolib/includes/definitions.h"
|
|
9
|
-
#include "../libs/Phylolib/includes/treeUtil.h"
|
|
10
|
-
#include "../libs/Phylolib/includes/talRandom.h"
|
|
11
|
-
#include "../libs/Phylolib/includes/gammaDistribution.h"
|
|
12
|
-
#include "../libs/Phylolib/includes/codon.h"
|
|
13
|
-
|
|
14
|
-
#include "rateMatrixSim.h"
|
|
15
|
-
// simulateTree::simulateTree(tree* _inEt,
|
|
16
|
-
// const stochasticProcess* sp,
|
|
17
|
-
// const alphabet* alph) :
|
|
18
|
-
// _et(_inEt), _sp(sp),_alph(alph),_avgSubtitutionsPerSite(0.0) {
|
|
19
|
-
// };
|
|
20
|
-
|
|
21
|
-
rateMatrixSim::rateMatrixSim(modelFactory& mFac, std::shared_ptr<std::vector<bool>> nodesToSave) :
|
|
22
|
-
_et(mFac.getTree()), _sp(mFac.getStochasticProcess()), _alph(mFac.getAlphabet()),
|
|
23
|
-
_invariantSitesProportion(mFac.getInvariantSitesProportion()),
|
|
24
|
-
_cpijGam(), _rootSequence(mFac.getAlphabet()), _subManager(mFac.getTree()->getNodesNum()),
|
|
25
|
-
_nodesToSave(nodesToSave), _saveRates(false), _biased_coin(0,1) {
|
|
26
|
-
// _et = mFac.getTree();
|
|
27
|
-
// _sp = mFac.getStochasticProcess();
|
|
28
|
-
// _alph = mFac.getAlphabet();
|
|
29
|
-
|
|
30
|
-
size_t alphaSize = _sp->alphabetSize();
|
|
31
|
-
|
|
32
|
-
_cpijGam.fillPij(*_et, *_sp);
|
|
33
|
-
initGillespieSampler();
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
std::vector<MDOUBLE> rateProbs;
|
|
37
|
-
for (int j = 0 ; j < _sp->categories(); ++j) {
|
|
38
|
-
MDOUBLE currentRateProb = _sp->ratesProb(j);
|
|
39
|
-
currentRateProb = currentRateProb * (1.0 - _invariantSitesProportion);
|
|
40
|
-
rateProbs.push_back(currentRateProb);
|
|
41
|
-
}
|
|
42
|
-
if (_invariantSitesProportion > 0.0) rateProbs.push_back(_invariantSitesProportion);
|
|
43
|
-
|
|
44
|
-
_rateSampler = std::make_unique<DiscreteDistribution>(rateProbs);
|
|
45
|
-
|
|
46
|
-
std::vector<MDOUBLE> frequencies;
|
|
47
|
-
for (int j = 0; j < alphaSize; ++j) {
|
|
48
|
-
frequencies.push_back(_sp->freq(j));
|
|
49
|
-
}
|
|
50
|
-
_frequencySampler = std::make_unique<DiscreteDistribution>(frequencies);
|
|
51
|
-
|
|
52
|
-
_simulatedSequences = std::make_unique<sequenceContainer>();
|
|
53
|
-
|
|
54
|
-
};
|
|
55
|
-
|
|
56
|
-
void rateMatrixSim::setSaveRates(bool saveRates) {
|
|
57
|
-
_saveRates = saveRates;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
void rateMatrixSim::initGillespieSampler() {
|
|
61
|
-
_gillespieSampler.resize(_alph->size());
|
|
62
|
-
for (size_t i = 0; i < _alph->size(); ++i) {
|
|
63
|
-
std::vector<double> qRates(_alph->size(), 0.0);
|
|
64
|
-
double sum = -_sp->Qij(i,i);
|
|
65
|
-
double normalizer = 1.0 / sum;
|
|
66
|
-
for (size_t j = 0; j < _alph->size(); ++j) {
|
|
67
|
-
if (i==j) continue;
|
|
68
|
-
qRates[j] = _sp->Qij(i,j) * normalizer;
|
|
69
|
-
// std::cout << i << j << "->" << qRates[j] << ",";
|
|
70
|
-
}
|
|
71
|
-
// std::cout << "\n" << i << " " << sum << "\n";
|
|
72
|
-
_gillespieSampler[i] = std::make_unique<DiscreteDistribution>(qRates);
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// simulateTree::simulateTree(const tree& _inEt,
|
|
77
|
-
// const stochasticProcess& sp,
|
|
78
|
-
// const alphabet* alph) : _sp(sp) {
|
|
79
|
-
// _et = _inEt;
|
|
80
|
-
// // _sp = sp;
|
|
81
|
-
// _alph = alph;
|
|
82
|
-
// _avgSubtitutionsPerSite = 0.0;
|
|
83
|
-
// };
|
|
84
|
-
|
|
85
|
-
rateMatrixSim::~rateMatrixSim() {
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
// void rateMatrixSim::setSeed(size_t seed) {
|
|
89
|
-
// _seed = seed;
|
|
90
|
-
// _mt_rand->seed(seed);
|
|
91
|
-
// }
|
|
92
|
-
|
|
93
|
-
void rateMatrixSim::setRng(mt19937_64 *rng) {
|
|
94
|
-
_mt_rand = rng;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
// const mt19937_64& rateMatrixSim::getRng(){
|
|
99
|
-
// return *_mt_rand;
|
|
100
|
-
// }
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
void rateMatrixSim::generate_substitution_log(int seqLength) {
|
|
104
|
-
std::vector<MDOUBLE> ratesVec(seqLength);
|
|
105
|
-
|
|
106
|
-
MDOUBLE sumOfRatesAcrossSites = 0.0;
|
|
107
|
-
_rateCategories.resize(seqLength);
|
|
108
|
-
for (int h = 0; h < seqLength; h++) {
|
|
109
|
-
int selectedRandomCategory = _rateSampler->drawSample() - 1;
|
|
110
|
-
_rateCategories[h] = selectedRandomCategory;
|
|
111
|
-
if (selectedRandomCategory >= _sp->categories()) {
|
|
112
|
-
ratesVec[h] = 0.0;
|
|
113
|
-
continue;
|
|
114
|
-
}
|
|
115
|
-
ratesVec[h] = _sp->rates(selectedRandomCategory);
|
|
116
|
-
sumOfRatesAcrossSites += ratesVec[h];
|
|
117
|
-
}
|
|
118
|
-
if (_saveRates) _siteRates.insert(_siteRates.end(), ratesVec.begin(), ratesVec.end());
|
|
119
|
-
// MDOUBLE sumOfRatesNoramlizingFactor = 1.0 / sumOfRatesAcrossSites;
|
|
120
|
-
|
|
121
|
-
// _siteSampler = std::make_unique<DiscreteDistribution>(ratesVec, sumOfRatesNoramlizingFactor);
|
|
122
|
-
_rootSequence.resize(seqLength);
|
|
123
|
-
generateRootSeq(seqLength, ratesVec);
|
|
124
|
-
if ((*_nodesToSave)[_et->getRoot()->id()]) saveSequence(_et->getRoot()->id(), _et->getRoot()->name());
|
|
125
|
-
|
|
126
|
-
mutateSeqRecuresively(_et->getRoot(), seqLength);
|
|
127
|
-
_subManager.clear();
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
void rateMatrixSim::mutateSeqRecuresively(tree::nodeP currentNode, int seqLength) {
|
|
131
|
-
if (currentNode->isLeaf()) return;
|
|
132
|
-
|
|
133
|
-
for (auto &node: currentNode->getSons()) {
|
|
134
|
-
mutateSeqAlongBranch(node, seqLength);
|
|
135
|
-
if ((*_nodesToSave)[node->id()]) saveSequence(node->id(), node->name());
|
|
136
|
-
mutateSeqRecuresively(node, seqLength);
|
|
137
|
-
if (!_subManager.isEmpty(node->id())) {
|
|
138
|
-
_subManager.undoSubs(node->id(), _rootSequence, _rateCategories, _sp.get());
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
void rateMatrixSim::mutateSeqAlongBranch(tree::nodeP currentNode, int seqLength) {
|
|
144
|
-
const MDOUBLE distToFather = currentNode->dis2father();
|
|
145
|
-
mutateEntireSeq(currentNode, seqLength);
|
|
146
|
-
|
|
147
|
-
// if (distToFather > 0.5) {
|
|
148
|
-
// mutateEntireSeq(currentNode, seqLength);
|
|
149
|
-
// } else {
|
|
150
|
-
// mutateSeqGillespie(currentNode, seqLength, distToFather);
|
|
151
|
-
// }
|
|
152
|
-
// testSumOfRates();
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
void rateMatrixSim::mutateEntireSeq(tree::nodeP currentNode, int seqLength) {
|
|
157
|
-
const int nodeId = currentNode->id();
|
|
158
|
-
const int parentId = currentNode->father()->id();
|
|
159
|
-
|
|
160
|
-
for (size_t site = 0; site < seqLength; ++site) {
|
|
161
|
-
ALPHACHAR parentChar = _rootSequence[site];//_subManager.getCharacter(parentId, site, _rootSequence);
|
|
162
|
-
if (_rateCategories[site] == _sp->categories()) continue;
|
|
163
|
-
ALPHACHAR nextChar = _cpijGam.getRandomChar(_rateCategories[site], nodeId, parentChar);
|
|
164
|
-
if (nextChar != parentChar){
|
|
165
|
-
_subManager.handleEvent(nodeId, site, nextChar, _rateCategories, _sp.get(), _rootSequence);
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
void rateMatrixSim::mutateSeqGillespie(tree::nodeP currentNode, int seqLength, MDOUBLE distToParent) {
|
|
172
|
-
// std::cout << "mutating sequence using Gillespie!\n";
|
|
173
|
-
|
|
174
|
-
const int nodeId = currentNode->id();
|
|
175
|
-
const int parentId = currentNode->father()->id();
|
|
176
|
-
MDOUBLE branchLength = distToParent;
|
|
177
|
-
|
|
178
|
-
double lambdaParam = _subManager.getReactantsSum();
|
|
179
|
-
std::exponential_distribution<double> distribution(-lambdaParam);
|
|
180
|
-
double waitingTime = distribution(*_mt_rand);
|
|
181
|
-
if (waitingTime < 0) {
|
|
182
|
-
std::cout << branchLength << " " << lambdaParam << " " << waitingTime << "\n";
|
|
183
|
-
errorMsg::reportError("waiting time is negative :(");
|
|
184
|
-
}
|
|
185
|
-
while (waitingTime < branchLength) {
|
|
186
|
-
if (waitingTime < 0) {
|
|
187
|
-
std::cout << branchLength << " " << lambdaParam << " " << waitingTime << "\n";
|
|
188
|
-
errorMsg::reportError("waiting time is negative :(");
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
int mutatedSite = _subManager.sampleSite(*_mt_rand);
|
|
192
|
-
ALPHACHAR parentChar = _rootSequence[mutatedSite];
|
|
193
|
-
ALPHACHAR nextChar = _gillespieSampler[parentChar]->drawSample() - 1;
|
|
194
|
-
// std::cout << (int)parentChar << "->" << (int)nextChar << "\n";
|
|
195
|
-
_subManager.handleEvent(nodeId, mutatedSite, nextChar, _rateCategories, _sp.get(), _rootSequence);
|
|
196
|
-
|
|
197
|
-
lambdaParam = _subManager.getReactantsSum();
|
|
198
|
-
branchLength = branchLength - waitingTime;
|
|
199
|
-
std::exponential_distribution<double> distribution(-lambdaParam);
|
|
200
|
-
waitingTime = distribution(*_mt_rand);
|
|
201
|
-
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
void rateMatrixSim::generateRootSeq(int seqLength, std::vector<MDOUBLE>& ratesVec) {
|
|
209
|
-
size_t rootID = _et->getRoot()->id();
|
|
210
|
-
for (int i = 0; i < seqLength; i++) {
|
|
211
|
-
ALPHACHAR newChar = _frequencySampler->drawSample() - 1;
|
|
212
|
-
// ratesVec[i] = ratesVec[i]*(-_sp->Qij(newChar, newChar));
|
|
213
|
-
_rootSequence[i] = newChar;
|
|
214
|
-
}
|
|
215
|
-
// std::cout << ">Root-sequence\n" << _rootSequence << "\n";
|
|
216
|
-
// std::cout << ">Rates\n" << ratesVec;
|
|
217
|
-
_subManager.handleRootSequence(seqLength, ratesVec, _sp.get(), _rootSequence);
|
|
218
|
-
|
|
219
|
-
_rootSequence.setAlphabet(_alph);
|
|
220
|
-
_rootSequence.setName(_et->getRoot()->name());
|
|
221
|
-
_rootSequence.setID(_et->getRoot()->id());
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
void rateMatrixSim::saveSequence(const int &nodeId,const std::string &name) {
|
|
226
|
-
sequence temp(_rootSequence);
|
|
227
|
-
temp.setName(name);
|
|
228
|
-
temp.setID(nodeId);
|
|
229
|
-
// std::cout << temp << "\n";
|
|
230
|
-
_simulatedSequences->add(temp);
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
// sequenceContainer rateMatrixSim::toSeqData() {
|
|
234
|
-
// sequenceContainer myseqData;
|
|
235
|
-
// for (int i=0; i < _simulatedSequences.size(); ++i) {
|
|
236
|
-
// myseqData.add(*_simulatedSequences[i]);
|
|
237
|
-
// }
|
|
238
|
-
// return myseqData;
|
|
239
|
-
// }
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
std::unique_ptr<sequenceContainer> rateMatrixSim::getSequenceContainer() {
|
|
244
|
-
// std::unique_ptr<sequenceContainer> myseqData = std::make_unique<sequenceContainer>();
|
|
245
|
-
// // sequenceContainer myseqData;
|
|
246
|
-
// for (int i=0; i < _simulatedSequences.size(); ++i) {
|
|
247
|
-
// tree::nodeP theCurNode = _et->findNodeById(_simulatedSequences[i]->id());
|
|
248
|
-
// if (theCurNode == NULL)
|
|
249
|
-
// errorMsg::reportError("could not find the specified name: " + _simulatedSequences[i]->name());
|
|
250
|
-
// if (theCurNode->isInternal()) continue;
|
|
251
|
-
auto outputSequences = std::move(_simulatedSequences);
|
|
252
|
-
_simulatedSequences = std::make_unique<sequenceContainer>();
|
|
253
|
-
// myseqData->add(*std::move(_simulatedSequences[i]));
|
|
254
|
-
// }
|
|
255
|
-
|
|
256
|
-
return outputSequences;
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
bool rateMatrixSim::testSumOfRates() {
|
|
261
|
-
MDOUBLE sumOfRates = 0.0;
|
|
262
|
-
for (size_t i = 0; i < _rootSequence.seqLen(); i++) {
|
|
263
|
-
ALPHACHAR currentChar = _rootSequence[i];
|
|
264
|
-
MDOUBLE currentQii = _sp->Qij(currentChar, currentChar);
|
|
265
|
-
MDOUBLE currentRate = _sp->rates(_rateCategories[i]);
|
|
266
|
-
sumOfRates += (currentQii*currentRate);
|
|
267
|
-
}
|
|
268
|
-
MDOUBLE preCalculatedSum = _subManager.getReactantsSum();
|
|
269
|
-
if (abs(preCalculatedSum - sumOfRates) > 1e-6) {
|
|
270
|
-
std::cout << "preCalculatedSum=" << preCalculatedSum << " "
|
|
271
|
-
<< "sumOfRates=" << sumOfRates;
|
|
272
|
-
errorMsg::reportError("Error in sum of rates calculation!");
|
|
273
|
-
}
|
|
274
|
-
std::cout << "preCalculatedSum is correct\n" << "preCalculatedSum=" << preCalculatedSum << " "
|
|
275
|
-
<< "sumOfRates=" << sumOfRates << "\n";
|
|
276
|
-
|
|
277
|
-
return true;
|
|
278
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|