IsoSpecPy 2.3.0.dev11__cp313-cp313-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,693 @@
1
+ /*!
2
+ Copyright (C) 2015-2020 Mateusz Łącki and Michał Startek.
3
+
4
+ This file is part of IsoSpec.
5
+
6
+ IsoSpec is free software: you can redistribute it and/or modify
7
+ it under the terms of the Simplified ("2-clause") BSD licence.
8
+
9
+ IsoSpec is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12
+
13
+ You should have received a copy of the Simplified BSD Licence
14
+ along with IsoSpec. If not, see <https://opensource.org/licenses/BSD-2-Clause>.
15
+ */
16
+
17
+ #pragma once
18
+
19
+ #include <unordered_map>
20
+ #include <queue>
21
+ #include <limits>
22
+ #include <string>
23
+ #include <vector>
24
+ #include <algorithm>
25
+ #include "platform.h"
26
+ #include "dirtyAllocator.h"
27
+ #include "summator.h"
28
+ #include "operators.h"
29
+ #include "marginalTrek++.h"
30
+
31
+
32
+
33
+ namespace IsoSpec
34
+ {
35
+
36
+ // This function is NOT guaranteed to be secure against malicious input. It should be used only for debugging.
37
+ unsigned int parse_formula(const char* formula,
38
+ std::vector<double>& isotope_masses,
39
+ std::vector<double>& isotope_probabilities,
40
+ int** isotopeNumbers,
41
+ int** atomCounts,
42
+ unsigned int* confSize,
43
+ bool use_nominal_masses = false);
44
+
45
+
46
+ //! The Iso class for the calculation of the isotopic distribution.
47
+ /*!
48
+ It contains full description of the molecule for which one would like to calculate the isotopic distribution.
49
+ */
50
+ class ISOSPEC_EXPORT_SYMBOL Iso {
51
+ private:
52
+ //! Set up the marginal isotopic envelopes, corresponding to subisotopologues.
53
+ /*!
54
+ \param _isotopeMasses A table of masses of isotopes of the elements in the chemical formula,
55
+ e.g. {12.0, 13.003355, 1.007825, 2.014102} for C100H202.
56
+ \param _isotopeProbabilities A table of isotope frequencies of the elements in the chemical formula,
57
+ e.g. {.989212, .010788, .999885, .000115} for C100H202.
58
+ */
59
+ void setupMarginals(const double* _isotopeMasses,
60
+ const double* _isotopeProbabilities);
61
+ bool disowned; /*!< A variable showing if the Iso class was specialized by its child-class. If so, then the description of the molecules has been transfered there and Iso is a carcass class, dead as a dodo, an ex-class if you will. */
62
+
63
+ protected:
64
+ int dimNumber; /*!< The number of elements in the chemical formula of the molecule. */
65
+ int* isotopeNumbers; /*!< A table with numbers of isotopes for each element. */
66
+ int* atomCounts; /*!< A table with numbers of isotopes for each element. */
67
+ unsigned int confSize; /*!< The number of bytes needed to represent the counts of isotopes present in the extended chemical formula. */
68
+ int allDim; /*!< The total number of isotopes of elements present in a chemical formula, e.g. for H2O (water) it is 2+3=5. */
69
+ Marginal** marginals; /*!< The table of pointers to the distributions of individual subisotopologues. */
70
+
71
+ bool doMarginalsNeedSorting() const;
72
+
73
+ public:
74
+ Iso();
75
+
76
+ //! General constructror.
77
+ /*!
78
+ \param _dimNumber The number of elements in the formula, e.g. for C100H202 it would be 2, as there are only carbon and hydrogen atoms.
79
+ \param _isotopeNumbers A table with numbers of isotopes for each element, e.g. for C100H202 it would be {2, 2}, because both C and H have two stable isotopes.
80
+ \param _atomCounts Number of atoms of each element in the formula, e.g. for C100H202 corresponds to {100, 202}.
81
+ \param _isotopeMasses A table of tables of masses of isotopes of the elements in the chemical formula, e.g. {{12.0, 13.003355}, {1.007825, 2.014102}} for C100H202.
82
+ \param _isotopeProbabilities A table of tables of isotope frequencies of the elements in the chemical formula, e.g. {{.989212, .010788}, {.999885, .000115}} for C100H202.
83
+ */
84
+ Iso(
85
+ int _dimNumber,
86
+ const int* _isotopeNumbers,
87
+ const int* _atomCounts,
88
+ const double* _isotopeMasses,
89
+ const double* _isotopeProbabilities
90
+ );
91
+ Iso(
92
+ int _dimNumber,
93
+ const int* _isotopeNumbers,
94
+ const int* _atomCounts,
95
+ const double* const * _isotopeMasses,
96
+ const double* const * _isotopeProbabilities
97
+ );
98
+
99
+ //! Constructor from the formula object.
100
+ Iso(const char* formula, bool use_nominal_masses = false); // NOLINT(runtime/explicit) - constructor deliberately left to be used as a conversion
101
+
102
+ //! Constructor from C++ std::string chemical formula.
103
+ inline Iso(const std::string& formula, bool use_nominal_masses = false) : Iso(formula.c_str(), use_nominal_masses) {} // NOLINT(runtime/explicit) - constructor deliberately left to be used as a conversion
104
+
105
+ //! Constructor (named) from aminoacid FASTA sequence as C string.
106
+ /*!
107
+ \param fasta An aminoacid FASTA sequence. May be upper/lower/mixed case, may contain selenocystein (U) or xleucine (J).
108
+ Other characters, including FASTA codes of indeterminate chemical formula (X, *, -, B, ...) are silently ignored.
109
+ That means "AEDA", "AE-DA", "EAXXDA", "AE DA" will all result in the same chemical formula.
110
+ Subisotopologues will be in order: CHNOS, possibly with Se added at an end if present.
111
+ \use_nominal_masses Whether to use nucleon number instead of the real mass of each isotope during calculations.
112
+ \add_water Whether the chain should have the terminating -H and -OH groups at the N and C terminus, respectively.
113
+ */
114
+ static Iso FromFASTA(const char* fasta, bool use_nominal_masses = false, bool add_water = true);
115
+
116
+ //! Constructor (named) from aminoacid FASTA sequence as C++ std::string. See above for details.
117
+ static inline Iso FromFASTA(const std::string& fasta, bool use_nominal_masses = false, bool add_water = true) { return FromFASTA(fasta.c_str(), use_nominal_masses, add_water); }
118
+
119
+ //! The move constructor.
120
+ Iso(Iso&& other);
121
+
122
+ /* We're not exactly following standard copy and assign semantics with Iso objects, so delete the default assign constructor just in case, so noone tries to use it. Copy ctor declared below. */
123
+ Iso& operator=(const Iso& other) = delete;
124
+
125
+ //! The copy constructor.
126
+ /*!
127
+ \param other The other instance of the Iso class.
128
+ \param fullcopy If false, copy only the number of atoms in the formula, the size of the configuration, the total number of isotopes, and the probability of the mode isotopologue.
129
+ */
130
+ Iso(const Iso& other, bool fullcopy);
131
+
132
+ //! Destructor.
133
+ virtual ~Iso();
134
+
135
+ //! Get the mass of the lightest peak in the isotopic distribution.
136
+ double getLightestPeakMass() const;
137
+
138
+ //! Get the mass of the heaviest peak in the isotopic distribution.
139
+ double getHeaviestPeakMass() const;
140
+
141
+ /*!
142
+ Get the mass of the monoisotopic peak in the isotopic distribution. Monoisotopc molecule is defined as
143
+ consisting only of the most frequent isotopes of each element. These are often (but not always) the
144
+ lightest ones, making this often (but again, not always) equal to getLightestPeakMass()
145
+ */
146
+ double getMonoisotopicPeakMass() const;
147
+
148
+ //! Get the log-probability of the mode-configuration (if there are many modes, they share this value).
149
+ double getModeLProb() const;
150
+
151
+ //! Get the logprobability of the least probable subisotopologue.
152
+ double getUnlikeliestPeakLProb() const;
153
+
154
+ //! Get the mass of the mode-configuration (if there are many modes, it is undefined which one will be selected).
155
+ double getModeMass() const;
156
+
157
+ //! Get the theoretical average mass of the molecule.
158
+ double getTheoreticalAverageMass() const;
159
+
160
+ //! Get the theoretical variance of the distribution.
161
+ double variance() const;
162
+
163
+ //! Get the standard deviation of the theoretical distribution.
164
+ double stddev() const { return sqrt(variance()); }
165
+
166
+ //! Get the number of elements in the chemical formula of the molecule.
167
+ inline int getDimNumber() const { return dimNumber; }
168
+
169
+ //! Get the total number of isotopes of elements present in a chemical formula.
170
+ inline int getAllDim() const { return allDim; }
171
+
172
+ //! Add an element to the molecule. Note: this method can only be used BEFORE Iso is used to construct an IsoGenerator instance.
173
+ void addElement(int atomCount, int noIsotopes, const double* isotopeMasses, const double* isotopeProbabilities);
174
+
175
+ //! Save estimates of logarithms of target sizes of marginals using Gaussian approximation into argument array. Array priorities must have length equal to dimNumber.
176
+ void saveMarginalLogSizeEstimates(double* priorities, double target_total_prob) const;
177
+ };
178
+
179
+
180
+ //! The generator of isotopologues.
181
+ /*!
182
+ This class provides the common interface for all isotopic generators.
183
+ */
184
+ class ISOSPEC_EXPORT_SYMBOL IsoGenerator : public Iso
185
+ {
186
+ public:
187
+ const double mode_lprob;
188
+
189
+ protected:
190
+ double* partialLProbs; /*!< The prefix sum of the log-probabilities of the current isotopologue. */
191
+ double* partialMasses; /*!< The prefix sum of the masses of the current isotopologue. */
192
+ double* partialProbs; /*!< The prefix product of the probabilities of the current isotopologue. */
193
+
194
+ public:
195
+ //! Advance to the next, not yet visited, most probable isotopologue.
196
+ /*!
197
+ \return Return false if it is not possible to advance.
198
+ */
199
+ virtual bool advanceToNextConfiguration() = 0;
200
+
201
+ //! Get the log-probability of the current isotopologue.
202
+ /*!
203
+ \return The log-probability of the current isotopologue.
204
+ */
205
+ virtual double lprob() const { return partialLProbs[0]; }
206
+
207
+ //! Get the mass of the current isotopologue.
208
+ /*!
209
+ \return The mass of the current isotopologue.
210
+ */
211
+ virtual double mass() const { return partialMasses[0]; }
212
+
213
+ //! Get the probability of the current isotopologue.
214
+ /*!
215
+ \return The probability of the current isotopologue.
216
+ */
217
+ virtual double prob() const { return partialProbs[0]; }
218
+
219
+ //! Write the signature of configuration into target memory location. It must be large enough to accomodate it.
220
+ virtual void get_conf_signature(int* space) const = 0;
221
+
222
+ //! Move constructor.
223
+ IsoGenerator(Iso&& iso, bool alloc_partials = true); // NOLINT(runtime/explicit) - constructor deliberately left to be used as a conversion
224
+
225
+ //! Destructor.
226
+ virtual ~IsoGenerator();
227
+ };
228
+
229
+
230
+
231
+ //! The generator of isotopologues sorted by their probability of occurrence.
232
+ /*!
233
+ The subsequent isotopologues are generated with diminishing probability, starting from the mode.
234
+ This algorithm take O(N*log(N)) to compute the N isotopologues because of using the Priority Queue data structure.
235
+ Obtaining the N isotopologues can be achieved in O(N) if they are not required to be spit out in the descending order.
236
+ */
237
+ template<typename MarginalType>
238
+ class ISOSPEC_EXPORT_SYMBOL IsoOrderedGeneratorTemplate: public IsoGenerator
239
+ {
240
+ private:
241
+ MarginalType** marginalResults; /*!< Table of pointers to marginal distributions of subisotopologues. */
242
+ std::priority_queue<void*, pod_vector<void*>, ConfOrder> pq; /*!< The priority queue used to generate isotopologues ordered by descending probability. */
243
+ void* topConf; /*!< Most probable configuration. */
244
+ DirtyAllocator allocator; /*!< Structure used for alocating memory for isotopologues. */
245
+ const pod_vector<double>** logProbs; /*!< Obtained log-probabilities. */
246
+ const pod_vector<double>** masses; /*!< Obtained masses. */
247
+ double currentLProb; /*!< The log-probability of the current isotopologue. */
248
+ double currentMass; /*!< The mass of the current isotopologue. */
249
+ double currentProb; /*!< The probability of the current isotopologue. */
250
+ int ccount;
251
+
252
+ public:
253
+ IsoOrderedGeneratorTemplate(const IsoOrderedGeneratorTemplate& other) = delete;
254
+ IsoOrderedGeneratorTemplate& operator=(const IsoOrderedGeneratorTemplate& other) = delete;
255
+
256
+ bool advanceToNextConfiguration() override final;
257
+
258
+ //! Save the counts of isotopes in the space.
259
+ /*!
260
+ \param space An array where counts of isotopes shall be written.
261
+ Must be as big as the overall number of isotopes.
262
+ */
263
+ inline void get_conf_signature(int* space) const override final
264
+ {
265
+ if constexpr (std::is_same<MarginalType, MarginalTrek>::value)
266
+ {
267
+ int* c = getConf(topConf);
268
+
269
+ if (ccount >= 0)
270
+ c[ccount]--;
271
+
272
+ for(int ii = 0; ii < dimNumber; ii++)
273
+ {
274
+ memcpy(space, marginalResults[ii]->confs()[c[ii]], isotopeNumbers[ii]*sizeof(int));
275
+ space += isotopeNumbers[ii];
276
+ }
277
+
278
+ if (ccount >= 0)
279
+ c[ccount]++;
280
+ }
281
+ else
282
+ throw std::runtime_error("IsoOrderedGeneratorTemplate::get_conf_signature() called on a non-MarginalTrek generator. This is not supported yet.");
283
+ };
284
+
285
+ //! The move-contstructor.
286
+ IsoOrderedGeneratorTemplate(Iso&& iso, int _tabSize = 1000, int _hashSize = 1000); // NOLINT(runtime/explicit) - constructor deliberately left to be used as a conversion
287
+
288
+ //! Destructor.
289
+ virtual ~IsoOrderedGeneratorTemplate();
290
+
291
+ inline void get_conf_by_indexes(int* space)
292
+ {
293
+ if constexpr (std::is_same<MarginalType, SingleAtomMarginal<false>>::value)
294
+ {
295
+ if(dimNumber == 0)
296
+ return;
297
+
298
+ int* c = getConf(topConf);
299
+ space[0] = std::max(c[0]-1, 0);
300
+
301
+ for(int ii = 1; ii < dimNumber; ii++)
302
+ space[ii] = c[ii];
303
+ }
304
+ }
305
+ };
306
+
307
+ using IsoOrderedGenerator = IsoOrderedGeneratorTemplate<MarginalTrek>;
308
+
309
+
310
+ //! The generator of isotopologues above a given threshold value.
311
+ /*!
312
+ Attention: the calculated configurations are only partially ordeded and the user should not assume they will be ordered.
313
+ This algorithm computes N isotopologues in O(N).
314
+ It is a considerable advantage w.r.t. the IsoOrderedGenerator.
315
+ */
316
+ class ISOSPEC_EXPORT_SYMBOL IsoThresholdGenerator: public IsoGenerator
317
+ {
318
+ private:
319
+ int* counter; /*!< An array storing the position of an isotopologue in terms of the subisotopologues ordered by decreasing probability. */
320
+ double* maxConfsLPSum;
321
+ const double Lcutoff; /*!< The logarithm of the lower bound on the calculated probabilities. */
322
+ PrecalculatedMarginal** marginalResults;
323
+ PrecalculatedMarginal** marginalResultsUnsorted;
324
+ int* marginalOrder;
325
+
326
+ const double* lProbs_ptr;
327
+ const double* lProbs_ptr_start;
328
+ double* partialLProbs_second;
329
+ double partialLProbs_second_val, lcfmsv;
330
+ bool empty;
331
+
332
+ public:
333
+ IsoThresholdGenerator(const IsoThresholdGenerator& other) = delete;
334
+ IsoThresholdGenerator& operator=(const IsoThresholdGenerator& other) = delete;
335
+
336
+ inline void get_conf_signature(int* space) const override final
337
+ {
338
+ counter[0] = lProbs_ptr - lProbs_ptr_start;
339
+ if(marginalOrder != nullptr)
340
+ {
341
+ for(int ii = 0; ii < dimNumber; ii++)
342
+ {
343
+ int jj = marginalOrder[ii];
344
+ memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]), isotopeNumbers[ii]*sizeof(int));
345
+ space += isotopeNumbers[ii];
346
+ }
347
+ }
348
+ else
349
+ {
350
+ for(int ii = 0; ii < dimNumber; ii++)
351
+ {
352
+ memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]), isotopeNumbers[ii]*sizeof(int));
353
+ space += isotopeNumbers[ii];
354
+ }
355
+ }
356
+ };
357
+
358
+ //! The move-constructor.
359
+ /*!
360
+ \param iso An instance of the Iso class.
361
+ \param _threshold The threshold value.
362
+ \param _absolute If true, the _threshold is interpreted as the absolute minimal peak height for the isotopologues.
363
+ If false, the _threshold is the fraction of the heighest peak's probability.
364
+ \param tabSize The size of the extension of the table with configurations.
365
+ \param hashSize The size of the hash-table used to store subisotopologues and check if they have been already calculated.
366
+ */
367
+ IsoThresholdGenerator(Iso&& iso, double _threshold, bool _absolute = true, int _tabSize = 1000, int _hashSize = 1000, bool reorder_marginals = true);
368
+
369
+ ~IsoThresholdGenerator();
370
+
371
+ // Perform highly aggressive inling as this function is often called as while(advanceToNextConfiguration()) {}
372
+ // which leads to an extremely tight loop and some compilers miss this (potentially due to the length of the function).
373
+ ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
374
+ {
375
+ lProbs_ptr++;
376
+
377
+ if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
378
+ {
379
+ return true;
380
+ }
381
+
382
+ // If we reached this point, a carry is needed
383
+
384
+ int idx = 0;
385
+ lProbs_ptr = lProbs_ptr_start;
386
+
387
+ int * cntr_ptr = counter;
388
+
389
+ while(idx < dimNumber-1)
390
+ {
391
+ // counter[idx] = 0;
392
+ *cntr_ptr = 0;
393
+ idx++;
394
+ cntr_ptr++;
395
+ // counter[idx]++;
396
+ (*cntr_ptr)++;
397
+ partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
398
+ if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= Lcutoff)
399
+ {
400
+ partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
401
+ partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
402
+ recalc(idx-1);
403
+ return true;
404
+ }
405
+ }
406
+
407
+ terminate_search();
408
+ return false;
409
+ }
410
+
411
+
412
+ ISOSPEC_FORCE_INLINE double lprob() const override final { return partialLProbs_second_val + (*(lProbs_ptr)); }
413
+ ISOSPEC_FORCE_INLINE double mass() const override final { return partialMasses[1] + marginalResults[0]->get_mass(lProbs_ptr - lProbs_ptr_start); }
414
+ ISOSPEC_FORCE_INLINE double prob() const override final { return partialProbs[1] * marginalResults[0]->get_prob(lProbs_ptr - lProbs_ptr_start); }
415
+
416
+ //! Block the subsequent search of isotopologues.
417
+ void terminate_search();
418
+
419
+ /*! Reset the generator to the beginning of the sequence. Allows it to be reused, eg. to go through the conf space once, calculate
420
+ the amount of space needed to store configurations, then to allocate that memory, and go through it again, this time saving
421
+ configurations (and *is* in fact faster than allocating a std::vector and depending on it to grow as needed. This is cheaper
422
+ than throwing away the generator and making a new one too: marginal distributions don't need to be recalculated. */
423
+ void reset();
424
+
425
+ /*! Count the number of configurations in the distribution. This can be used to pre-allocate enough memory to store it (e.g.
426
+ * std::vector's reserve() method - this is faster than depending on the vector's dynamic resizing, even though it means that
427
+ * the configuration space is walked through twice. This method has to be called before the first call to advanceToNextConfiguration
428
+ * and has undefined results (incl. segfaults) otherwise. */
429
+ size_t count_confs();
430
+
431
+ private:
432
+ //! Recalculate the current partial log-probabilities, masses, and probabilities.
433
+ ISOSPEC_FORCE_INLINE void recalc(int idx)
434
+ {
435
+ for(; idx > 0; idx--)
436
+ {
437
+ partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
438
+ partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
439
+ partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
440
+ }
441
+ partialLProbs_second_val = *partialLProbs_second;
442
+ partialLProbs[0] = *partialLProbs_second + marginalResults[0]->get_lProb(counter[0]);
443
+ lcfmsv = Lcutoff - partialLProbs_second_val;
444
+ }
445
+
446
+ ISOSPEC_FORCE_INLINE void short_recalc(int idx)
447
+ {
448
+ for(; idx > 0; idx--)
449
+ partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
450
+ partialLProbs_second_val = *partialLProbs_second;
451
+ partialLProbs[0] = *partialLProbs_second + marginalResults[0]->get_lProb(counter[0]);
452
+ lcfmsv = Lcutoff - partialLProbs_second_val;
453
+ }
454
+ };
455
+
456
+
457
+
458
+
459
+ template<typename MarginalType>
460
+ class ISOSPEC_EXPORT_SYMBOL IsoLayeredGeneratorTemplate : public IsoGenerator
461
+ {
462
+ private:
463
+ int* counter; /*!< An array storing the position of an isotopologue in terms of the subisotopologues ordered by decreasing probability. */
464
+ double* maxConfsLPSum;
465
+ double currentLThreshold, lastLThreshold;
466
+ MarginalType** marginalResults;
467
+ MarginalType** marginalResultsUnsorted;
468
+ int* marginalOrder;
469
+
470
+ const double* lProbs_ptr;
471
+ const double* lProbs_ptr_start;
472
+ const double** resetPositions;
473
+ double* partialLProbs_second;
474
+ double partialLProbs_second_val, lcfmsv, last_lcfmsv;
475
+ bool marginalsNeedSorting;
476
+
477
+
478
+ public:
479
+ IsoLayeredGeneratorTemplate(const IsoLayeredGeneratorTemplate& other) = delete;
480
+ IsoLayeredGeneratorTemplate& operator=(const IsoLayeredGeneratorTemplate& other) = delete;
481
+
482
+ inline void get_conf_signature(int* space) const override final
483
+ {
484
+ counter[0] = lProbs_ptr - lProbs_ptr_start;
485
+ if(marginalOrder != nullptr)
486
+ {
487
+ for(int ii = 0; ii < dimNumber; ii++)
488
+ {
489
+ int jj = marginalOrder[ii];
490
+ memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]), isotopeNumbers[ii]*sizeof(int));
491
+ space += isotopeNumbers[ii];
492
+ }
493
+ }
494
+ else
495
+ {
496
+ for(int ii = 0; ii < dimNumber; ii++)
497
+ {
498
+ memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]), isotopeNumbers[ii]*sizeof(int));
499
+ space += isotopeNumbers[ii];
500
+ }
501
+ }
502
+ };
503
+
504
+ inline double get_currentLThreshold() const { return currentLThreshold; }
505
+
506
+ IsoLayeredGeneratorTemplate(Iso&& iso, int _tabSize = 1000, int _hashSize = 1000, bool reorder_marginals = true, double t_prob_hint = 0.99); // NOLINT(runtime/explicit) - constructor deliberately left to be used as a conversion
507
+
508
+ ~IsoLayeredGeneratorTemplate();
509
+
510
+ ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
511
+ {
512
+ do
513
+ {
514
+ if(advanceToNextConfigurationWithinLayer())
515
+ return true;
516
+ } while(IsoLayeredGeneratorTemplate<MarginalType>::nextLayer(-2.0));
517
+ return false;
518
+ }
519
+
520
+ ISOSPEC_FORCE_INLINE bool advanceToNextConfigurationWithinLayer()
521
+ {
522
+ do{
523
+ lProbs_ptr++;
524
+
525
+ if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
526
+ return true;
527
+ }
528
+ while(carry()); // NOLINT(whitespace/empty_loop_body) - cpplint bug, that's not an empty loop body, that's a do{...}while(...) construct
529
+ return false;
530
+ }
531
+
532
+ ISOSPEC_FORCE_INLINE double lprob() const override final { return partialLProbs_second_val + (*(lProbs_ptr)); };
533
+ ISOSPEC_FORCE_INLINE double mass() const override final { return partialMasses[1] + marginalResults[0]->get_mass(lProbs_ptr - lProbs_ptr_start); };
534
+ ISOSPEC_FORCE_INLINE double prob() const override final { return partialProbs[1] * marginalResults[0]->get_prob(lProbs_ptr - lProbs_ptr_start); };
535
+
536
+ //! Block the subsequent search of isotopologues.
537
+ void terminate_search();
538
+
539
+
540
+ //! Recalculate the current partial log-probabilities, masses, and probabilities.
541
+ ISOSPEC_FORCE_INLINE void recalc(int idx)
542
+ {
543
+ for(; idx > 0; idx--)
544
+ {
545
+ partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
546
+ partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
547
+ partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
548
+ }
549
+ partialLProbs_second_val = *partialLProbs_second;
550
+ partialLProbs[0] = partialLProbs_second_val + marginalResults[0]->get_lProb(counter[0]);
551
+ lcfmsv = currentLThreshold - partialLProbs_second_val;
552
+ last_lcfmsv = lastLThreshold - partialLProbs_second_val;
553
+ }
554
+
555
+ bool nextLayer(double offset);
556
+
557
+ void get_conf_by_indexes(int* space) const
558
+ {
559
+ if constexpr (std::is_same<MarginalType, SingleAtomMarginal<true>>::value)
560
+ {
561
+ counter[0] = lProbs_ptr - lProbs_ptr_start;
562
+ if(marginalOrder != nullptr)
563
+ {
564
+ for(int ii = 0; ii < dimNumber; ii++)
565
+ {
566
+ int jj = marginalOrder[ii];
567
+ space[ii] = marginalResultsUnsorted[ii]->get_original_position(counter[jj]);
568
+ }
569
+ }
570
+ else
571
+ {
572
+ for(int ii = 0; ii < dimNumber; ii++)
573
+ space[ii] = marginalResultsUnsorted[ii]->get_original_position(counter[ii]);
574
+ }
575
+ }
576
+ else
577
+ throw std::runtime_error("IsoLayeredGeneratorTemplate::get_conf_by_indexes() called on a non-SingleAtomMarginal generator. This is not supported yet.");
578
+ }
579
+
580
+ private:
581
+ bool carry();
582
+ };
583
+ using IsoLayeredGenerator = IsoLayeredGeneratorTemplate<LayeredMarginal>;
584
+
585
+ template<typename IsoType>
586
+ class IsoStochasticGeneratorTemplate : public IsoGenerator
587
+ {
588
+ IsoType ILG;
589
+ size_t to_sample_left;
590
+ const double precision;
591
+ const double beta_bias;
592
+ double confs_prob;
593
+ double chasing_prob;
594
+ size_t current_count;
595
+ std::mt19937& rdvariate_gen; /*!< The random number generator used to generate random numbers. */
596
+
597
+ public:
598
+ IsoStochasticGeneratorTemplate(Iso&& iso, size_t no_molecules, double precision = 0.9999, double beta_bias = 5.0, std::mt19937& rdvariate_gen = random_gen);
599
+
600
+ ISOSPEC_FORCE_INLINE size_t count() const { return current_count; }
601
+
602
+ ISOSPEC_FORCE_INLINE double mass() const override final { return ILG.mass(); }
603
+
604
+ ISOSPEC_FORCE_INLINE double prob() const override final { return static_cast<double>(count()); }
605
+
606
+ ISOSPEC_FORCE_INLINE double lprob() const override final { return log(prob()); }
607
+
608
+ ISOSPEC_FORCE_INLINE void get_conf_signature(int* space) const override final { ILG.get_conf_signature(space); }
609
+
610
+ ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
611
+ {
612
+ /* This function will be used mainly in very small, tight loops, therefore it makes sense to
613
+ * aggressively inline it, despite its seemingly large body.
614
+ */
615
+ while(true)
616
+ {
617
+ double curr_conf_prob_left, current_prob;
618
+
619
+ if(to_sample_left <= 0)
620
+ return false;
621
+
622
+ if(confs_prob < chasing_prob)
623
+ {
624
+ // Beta was last
625
+ current_count = 1;
626
+ to_sample_left--;
627
+ if(!ILG.advanceToNextConfiguration())
628
+ return false;
629
+ current_prob = ILG.prob();
630
+ confs_prob += current_prob;
631
+ while(confs_prob <= chasing_prob)
632
+ {
633
+ if(!ILG.advanceToNextConfiguration())
634
+ return false;
635
+ current_prob = ILG.prob();
636
+ confs_prob += current_prob;
637
+ }
638
+ if(to_sample_left <= 0)
639
+ return true;
640
+ curr_conf_prob_left = confs_prob - chasing_prob;
641
+ }
642
+ else
643
+ {
644
+ // Binomial was last
645
+ current_count = 0;
646
+ if(!ILG.advanceToNextConfiguration())
647
+ return false;
648
+ current_prob = ILG.prob();
649
+ confs_prob += current_prob;
650
+ curr_conf_prob_left = current_prob;
651
+ }
652
+
653
+ double prob_left_to_1 = precision - chasing_prob;
654
+ double expected_confs = curr_conf_prob_left * to_sample_left / prob_left_to_1;
655
+
656
+ if(expected_confs <= beta_bias)
657
+ {
658
+ // Beta mode: we keep making beta jumps until we leave the current configuration
659
+ chasing_prob += rdvariate_beta_1_b(to_sample_left, rdvariate_gen) * prob_left_to_1;
660
+ while(chasing_prob <= confs_prob)
661
+ {
662
+ current_count++;
663
+ to_sample_left--;
664
+ if(to_sample_left == 0)
665
+ return true;
666
+ prob_left_to_1 = precision - chasing_prob;
667
+ chasing_prob += rdvariate_beta_1_b(to_sample_left, rdvariate_gen) * prob_left_to_1;
668
+ }
669
+ if(current_count > 0)
670
+ return true;
671
+ }
672
+ else
673
+ {
674
+ // Binomial mode: a single binomial step
675
+ size_t rbin = rdvariate_binom(to_sample_left, curr_conf_prob_left/prob_left_to_1, rdvariate_gen);
676
+ current_count += rbin;
677
+ to_sample_left -= rbin;
678
+ chasing_prob = confs_prob;
679
+ if(current_count > 0)
680
+ return true;
681
+ }
682
+ };
683
+ }
684
+
685
+ ISOSPEC_FORCE_INLINE void get_indexes(int* space)
686
+ {
687
+ ILG.get_conf_by_indexes(space);
688
+ }
689
+ };
690
+
691
+ using IsoStochasticGenerator = IsoStochasticGeneratorTemplate<IsoLayeredGenerator>;
692
+
693
+ } // namespace IsoSpec