msasim 24.8.1__tar.gz → 24.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of msasim might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: msasim
3
- Version: 24.8.1
3
+ Version: 24.9.0
4
4
  Summary: A fast MSA simulator
5
5
  Home-page: https://github.com/elyawy/Sailfish-backend
6
6
  Author: Elya Wygoda
@@ -10,6 +10,7 @@ from enum import Enum
10
10
  MODEL_CODES = _Sailfish.modelCode
11
11
 
12
12
  class SIMULATION_TYPE(Enum):
13
+ NOSUBS = 0
13
14
  DNA = 1
14
15
  PROTEIN = 2
15
16
 
@@ -413,13 +414,15 @@ class Simulator:
413
414
  raise ValueError(f"failed to verify simProtocol")
414
415
 
415
416
  if not simulation_type:
416
- warnings.warn(f"simulation type not provided -> running protein simulation")
417
- simulation_type = SIMULATION_TYPE.PROTEIN
417
+ warnings.warn(f"simulation type not provided -> running indel only simulation")
418
+ simulation_type = SIMULATION_TYPE.NOSUBS
418
419
 
419
420
  if simulation_type == SIMULATION_TYPE.PROTEIN:
420
421
  self._alphabet = _Sailfish.alphabetCode.AMINOACID
421
422
  elif simulation_type == SIMULATION_TYPE.DNA:
422
423
  self._alphabet = _Sailfish.alphabetCode.NUCLEOTIDE
424
+ elif simulation_type == SIMULATION_TYPE.NOSUBS:
425
+ self._alphabet = _Sailfish.alphabetCode.NULLCODE
423
426
  else:
424
427
  raise ValueError(f"unknown simulation type, please provde one of the following: {[e.name for e in SIMULATION_TYPE]}")
425
428
 
@@ -525,8 +528,9 @@ class Simulator:
525
528
  self.get_sequences_to_save())
526
529
 
527
530
  # sim.init_substitution_sim(mFac)
528
- substitutions = self.gen_substitutions(msa.get_length())
529
- msa.fill_substitutions(substitutions)
531
+ if self._simulation_type != SIMULATION_TYPE.NOSUBS:
532
+ substitutions = self.gen_substitutions(msa.get_length())
533
+ msa.fill_substitutions(substitutions)
530
534
 
531
535
  Msas.append(msa)
532
536
  return Msas
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: msasim
3
- Version: 24.8.1
3
+ Version: 24.9.0
4
4
  Summary: A fast MSA simulator
5
5
  Home-page: https://github.com/elyawy/Sailfish-backend
6
6
  Author: Elya Wygoda
@@ -9,7 +9,7 @@ from datetime import datetime
9
9
  now = datetime.now()
10
10
 
11
11
 
12
- __version__ = f"{now.year % 100}.{now.month+1}.1"
12
+ __version__ = f"{now.year % 100}.{now.month+1}.0"
13
13
 
14
14
  # The main interface is through Pybind11Extension.
15
15
  # * You can add cxx_std=11/14/17, and then build_ext can be removed.
@@ -2,7 +2,7 @@
2
2
 
3
3
  #include <pybind11/pybind11.h>
4
4
  #include <pybind11/stl.h>
5
-
5
+ #include <memory>
6
6
 
7
7
  #include "./Simulator.h"
8
8
 
@@ -80,6 +80,7 @@ PYBIND11_MODULE(_Sailfish, m) {
80
80
  .def(py::init<>());
81
81
 
82
82
  py::enum_<alphabetCode>(m, "alphabetCode")
83
+ .value("NULLCODE", alphabetCode::NULLCODE)
83
84
  .value("NUCLEOTIDE", alphabetCode::NUCLEOTIDE)
84
85
  .value("AMINOACID", alphabetCode::AMINOACID)
85
86
  .export_values();
@@ -140,8 +141,8 @@ PYBIND11_MODULE(_Sailfish, m) {
140
141
 
141
142
 
142
143
  py::class_<MSA>(m, "Msa")
143
- .def(py::init<size_t, size_t, const std::vector<bool>&>())
144
- .def(py::init<BlockMap, tree::TreeNode*, const std::vector<bool>&>())
144
+ .def(py::init<size_t, size_t, const std::vector<bool>& >())
145
+ .def(py::init<BlockMap, tree::TreeNode*, const std::vector<bool>& >())
145
146
  .def("generate_msas", &MSA::generateMSAs)
146
147
  .def("length", &MSA::getMSAlength)
147
148
  .def("num_sequences", &MSA::getNumberOfSequences)
@@ -18,14 +18,13 @@
18
18
  // _et(_inEt), _sp(sp),_alph(alph),_avgSubtitutionsPerSite(0.0) {
19
19
  // };
20
20
 
21
- rateMatrixSim::rateMatrixSim(modelFactory& mFac) :
21
+ rateMatrixSim::rateMatrixSim(modelFactory& mFac, std::shared_ptr<std::vector<bool>> nodesToSave) :
22
22
  _et(mFac.getTree()), _sp(mFac.getStochasticProcess()), _alph(mFac.getAlphabet()),
23
23
  _cpijGam(), _rootSequence(mFac.getAlphabet()), _subManager(mFac.getTree()->getNodesNum()),
24
- _nodesToSave(_et->getNodesNum(), false), _saveRates(false), _biased_coin(0,1) {
24
+ _nodesToSave(nodesToSave), _saveRates(false), _biased_coin(0,1) {
25
25
  // _et = mFac.getTree();
26
26
  // _sp = mFac.getStochasticProcess();
27
27
  // _alph = mFac.getAlphabet();
28
- setSaveStateLeaves(_et->getRoot());
29
28
 
30
29
  size_t alphaSize = _sp->alphabetSize();
31
30
 
@@ -49,13 +48,6 @@ rateMatrixSim::rateMatrixSim(modelFactory& mFac) :
49
48
 
50
49
  };
51
50
 
52
- void rateMatrixSim::setSaveStateLeaves(const tree::nodeP &node) {
53
- for(auto &node: node->getSons()) {
54
- if (node->isLeaf()) _nodesToSave[node->id()] = true;
55
- setSaveStateLeaves(node);
56
- }
57
- }
58
-
59
51
  void rateMatrixSim::setSaveRates(bool saveRates) {
60
52
  _saveRates = saveRates;
61
53
  }
@@ -120,7 +112,7 @@ void rateMatrixSim::generate_substitution_log(int seqLength) {
120
112
  // _siteSampler = std::make_unique<DiscreteDistribution>(ratesVec, sumOfRatesNoramlizingFactor);
121
113
  _rootSequence.resize(seqLength);
122
114
  generateRootSeq(seqLength, ratesVec);
123
- if (_nodesToSave[_et->getRoot()->id()]) saveSequence(_et->getRoot()->id(), _et->getRoot()->name());
115
+ if ((*_nodesToSave)[_et->getRoot()->id()]) saveSequence(_et->getRoot()->id(), _et->getRoot()->name());
124
116
 
125
117
  mutateSeqRecuresively(_et->getRoot(), seqLength);
126
118
  _subManager.clear();
@@ -131,7 +123,7 @@ void rateMatrixSim::mutateSeqRecuresively(tree::nodeP currentNode, int seqLength
131
123
 
132
124
  for (auto &node: currentNode->getSons()) {
133
125
  mutateSeqAlongBranch(node, seqLength);
134
- if (_nodesToSave[node->id()]) saveSequence(node->id(), node->name());
126
+ if ((*_nodesToSave)[node->id()]) saveSequence(node->id(), node->name());
135
127
  mutateSeqRecuresively(node, seqLength);
136
128
 
137
129
  if (!_subManager.isEmpty(currentNode->id())) {
@@ -253,35 +245,6 @@ std::unique_ptr<sequenceContainer> rateMatrixSim::getSequenceContainer() {
253
245
  return std::move(outputSequences);
254
246
  }
255
247
 
256
- void rateMatrixSim::setNodesToSave(std::vector<size_t> nodeIDs) {
257
- std::fill(_nodesToSave.begin(), _nodesToSave.end(), false);
258
- for(auto &nodeID: nodeIDs) {
259
- _nodesToSave[nodeID] = true;
260
- }
261
- }
262
-
263
- void rateMatrixSim::setSaveAllNodes() {
264
- for (size_t i = 0; i < _nodesToSave.size(); i++) {
265
- _nodesToSave[i] = true;
266
- }
267
- }
268
-
269
- void rateMatrixSim::setSaveRoot() {
270
- _nodesToSave[0] = true;
271
- }
272
-
273
-
274
- void rateMatrixSim::changeNodeSaveState(size_t nodeID) {
275
- _nodesToSave[nodeID] = !_nodesToSave[nodeID];
276
- }
277
-
278
- bool rateMatrixSim::getNodeSaveState(size_t nodeID) {
279
- return _nodesToSave[nodeID];
280
- }
281
-
282
- const std::vector<bool>& rateMatrixSim::getNodesSaveList() {
283
- return _nodesToSave;
284
- }
285
248
 
286
249
  bool rateMatrixSim::testSumOfRates() {
287
250
  MDOUBLE sumOfRates = 0.0;
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes