isotree 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (151) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -1
  3. data/LICENSE.txt +2 -2
  4. data/README.md +32 -14
  5. data/ext/isotree/ext.cpp +144 -31
  6. data/ext/isotree/extconf.rb +7 -7
  7. data/lib/isotree/isolation_forest.rb +110 -30
  8. data/lib/isotree/version.rb +1 -1
  9. data/vendor/isotree/LICENSE +1 -1
  10. data/vendor/isotree/README.md +165 -27
  11. data/vendor/isotree/include/isotree.hpp +2111 -0
  12. data/vendor/isotree/include/isotree_oop.hpp +394 -0
  13. data/vendor/isotree/inst/COPYRIGHTS +62 -0
  14. data/vendor/isotree/src/RcppExports.cpp +525 -52
  15. data/vendor/isotree/src/Rwrapper.cpp +1931 -268
  16. data/vendor/isotree/src/c_interface.cpp +953 -0
  17. data/vendor/isotree/src/crit.hpp +4232 -0
  18. data/vendor/isotree/src/dist.hpp +1886 -0
  19. data/vendor/isotree/src/exp_depth_table.hpp +134 -0
  20. data/vendor/isotree/src/extended.hpp +1444 -0
  21. data/vendor/isotree/src/external_facing_generic.hpp +399 -0
  22. data/vendor/isotree/src/fit_model.hpp +2401 -0
  23. data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
  24. data/vendor/isotree/src/helpers_iforest.hpp +813 -0
  25. data/vendor/isotree/src/{impute.cpp → impute.hpp} +353 -122
  26. data/vendor/isotree/src/indexer.cpp +515 -0
  27. data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
  28. data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
  29. data/vendor/isotree/src/isoforest.hpp +1659 -0
  30. data/vendor/isotree/src/isotree.hpp +1804 -392
  31. data/vendor/isotree/src/isotree_exportable.hpp +99 -0
  32. data/vendor/isotree/src/merge_models.cpp +159 -16
  33. data/vendor/isotree/src/mult.hpp +1321 -0
  34. data/vendor/isotree/src/oop_interface.cpp +842 -0
  35. data/vendor/isotree/src/oop_interface.hpp +278 -0
  36. data/vendor/isotree/src/other_helpers.hpp +219 -0
  37. data/vendor/isotree/src/predict.hpp +1932 -0
  38. data/vendor/isotree/src/python_helpers.hpp +134 -0
  39. data/vendor/isotree/src/ref_indexer.hpp +154 -0
  40. data/vendor/isotree/src/robinmap/LICENSE +21 -0
  41. data/vendor/isotree/src/robinmap/README.md +483 -0
  42. data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
  43. data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1620 -0
  44. data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
  45. data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
  46. data/vendor/isotree/src/serialize.cpp +4300 -139
  47. data/vendor/isotree/src/sql.cpp +141 -59
  48. data/vendor/isotree/src/subset_models.cpp +174 -0
  49. data/vendor/isotree/src/utils.hpp +3808 -0
  50. data/vendor/isotree/src/xoshiro.hpp +467 -0
  51. data/vendor/isotree/src/ziggurat.hpp +405 -0
  52. metadata +38 -104
  53. data/vendor/cereal/LICENSE +0 -24
  54. data/vendor/cereal/README.md +0 -85
  55. data/vendor/cereal/include/cereal/access.hpp +0 -351
  56. data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
  57. data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
  58. data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
  59. data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
  60. data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
  61. data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
  62. data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
  63. data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
  64. data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
  65. data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
  66. data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
  67. data/vendor/cereal/include/cereal/details/util.hpp +0 -84
  68. data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
  69. data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
  70. data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
  71. data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
  72. data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
  73. data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
  74. data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
  75. data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
  76. data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
  77. data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
  78. data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
  79. data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
  80. data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
  81. data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
  82. data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
  83. data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
  84. data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
  85. data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
  86. data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
  87. data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
  88. data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
  89. data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
  90. data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
  91. data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
  92. data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
  93. data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
  94. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
  95. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
  96. data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
  97. data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
  98. data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
  99. data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
  100. data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
  101. data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
  102. data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
  103. data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
  104. data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
  105. data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
  106. data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
  107. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
  108. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
  109. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
  110. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
  111. data/vendor/cereal/include/cereal/macros.hpp +0 -154
  112. data/vendor/cereal/include/cereal/specialize.hpp +0 -139
  113. data/vendor/cereal/include/cereal/types/array.hpp +0 -79
  114. data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
  115. data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
  116. data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
  117. data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
  118. data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
  119. data/vendor/cereal/include/cereal/types/common.hpp +0 -129
  120. data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
  121. data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
  122. data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
  123. data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
  124. data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
  125. data/vendor/cereal/include/cereal/types/list.hpp +0 -62
  126. data/vendor/cereal/include/cereal/types/map.hpp +0 -36
  127. data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
  128. data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
  129. data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
  130. data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
  131. data/vendor/cereal/include/cereal/types/set.hpp +0 -103
  132. data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
  133. data/vendor/cereal/include/cereal/types/string.hpp +0 -61
  134. data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
  135. data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
  136. data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
  137. data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
  138. data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
  139. data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
  140. data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
  141. data/vendor/cereal/include/cereal/version.hpp +0 -52
  142. data/vendor/isotree/src/Makevars +0 -4
  143. data/vendor/isotree/src/crit.cpp +0 -912
  144. data/vendor/isotree/src/dist.cpp +0 -749
  145. data/vendor/isotree/src/extended.cpp +0 -790
  146. data/vendor/isotree/src/fit_model.cpp +0 -1090
  147. data/vendor/isotree/src/helpers_iforest.cpp +0 -324
  148. data/vendor/isotree/src/isoforest.cpp +0 -771
  149. data/vendor/isotree/src/mult.cpp +0 -607
  150. data/vendor/isotree/src/predict.cpp +0 -853
  151. data/vendor/isotree/src/utils.cpp +0 -1566
@@ -1,1566 +0,0 @@
1
- /* Isolation forests and variations thereof, with adjustments for incorporation
2
- * of categorical variables and missing values.
3
- * Writen for C++11 standard and aimed at being used in R and Python.
4
- *
5
- * This library is based on the following works:
6
- * [1] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
7
- * "Isolation forest."
8
- * 2008 Eighth IEEE International Conference on Data Mining. IEEE, 2008.
9
- * [2] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
10
- * "Isolation-based anomaly detection."
11
- * ACM Transactions on Knowledge Discovery from Data (TKDD) 6.1 (2012): 3.
12
- * [3] Hariri, Sahand, Matias Carrasco Kind, and Robert J. Brunner.
13
- * "Extended Isolation Forest."
14
- * arXiv preprint arXiv:1811.02141 (2018).
15
- * [4] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
16
- * "On detecting clustered anomalies using SCiForest."
17
- * Joint European Conference on Machine Learning and Knowledge Discovery in Databases. Springer, Berlin, Heidelberg, 2010.
18
- * [5] https://sourceforge.net/projects/iforest/
19
- * [6] https://math.stackexchange.com/questions/3388518/expected-number-of-paths-required-to-separate-elements-in-a-binary-tree
20
- * [7] Quinlan, J. Ross. C4. 5: programs for machine learning. Elsevier, 2014.
21
- * [8] Cortes, David. "Distance approximation using Isolation Forests." arXiv preprint arXiv:1910.12362 (2019).
22
- * [9] Cortes, David. "Imputing missing values with unsupervised random trees." arXiv preprint arXiv:1911.06646 (2019).
23
- *
24
- * BSD 2-Clause License
25
- * Copyright (c) 2020, David Cortes
26
- * All rights reserved.
27
- * Redistribution and use in source and binary forms, with or without
28
- * modification, are permitted provided that the following conditions are met:
29
- * * Redistributions of source code must retain the above copyright notice, this
30
- * list of conditions and the following disclaimer.
31
- * * Redistributions in binary form must reproduce the above copyright notice,
32
- * this list of conditions and the following disclaimer in the documentation
33
- * and/or other materials provided with the distribution.
34
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
35
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
37
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
38
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
40
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
41
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
42
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44
- */
45
- #include "isotree.hpp"
46
-
47
- /* ceil(log2(x)) done with bit-wise operations ensures perfect precision (and it's faster too)
48
- https://stackoverflow.com/questions/2589096/find-most-significant-bit-left-most-that-is-set-in-a-bit-array
49
- https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers */
50
- #if SIZE_MAX == UINT32_MAX /* 32-bit systems */
51
- static const int MultiplyDeBruijnBitPosition[32] =
52
- {
53
- 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
54
- 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
55
- };
56
- size_t log2ceil( size_t v )
57
- {
58
-
59
- v--;
60
- v |= v >> 1; // first round down to one less than a power of 2
61
- v |= v >> 2;
62
- v |= v >> 4;
63
- v |= v >> 8;
64
- v |= v >> 16;
65
-
66
- return MultiplyDeBruijnBitPosition[( uint32_t )( v * 0x07C4ACDDU ) >> 27] + 1;
67
- }
68
- #elif SIZE_MAX == UINT64_MAX /* 64-bit systems */
69
- static const uint64_t tab64[64] = {
70
- 63, 0, 58, 1, 59, 47, 53, 2,
71
- 60, 39, 48, 27, 54, 33, 42, 3,
72
- 61, 51, 37, 40, 49, 18, 28, 20,
73
- 55, 30, 34, 11, 43, 14, 22, 4,
74
- 62, 57, 46, 52, 38, 26, 32, 41,
75
- 50, 36, 17, 19, 29, 10, 13, 21,
76
- 56, 45, 25, 31, 35, 16, 9, 12,
77
- 44, 24, 15, 8, 23, 7, 6, 5};
78
-
79
- size_t log2ceil(size_t value)
80
- {
81
- value--;
82
- value |= value >> 1;
83
- value |= value >> 2;
84
- value |= value >> 4;
85
- value |= value >> 8;
86
- value |= value >> 16;
87
- value |= value >> 32;
88
- return tab64[((uint64_t)((value - (value >> 1))*0x07EDD5E59A4E28C2)) >> 58] + 1;
89
- }
90
- #else /* other architectures - might not be entirely precise, and will be slower */
91
- size_t log2ceil(size_t x) {return (size_t)(ceill(log2l((long double) x)));}
92
- #endif
93
-
94
- /* http://fredrik-j.blogspot.com/2009/02/how-not-to-compute-harmonic-numbers.html
95
- https://en.wikipedia.org/wiki/Harmonic_number */
96
- #define THRESHOLD_EXACT_H 256 /* above this will get approximated */
97
- double harmonic(size_t n)
98
- {
99
- if (n > THRESHOLD_EXACT_H)
100
- return logl((long double)n) + (long double)0.5772156649;
101
- else
102
- return harmonic_recursive((double)1, (double)(n + 1));
103
- }
104
-
105
- double harmonic_recursive(double a, double b)
106
- {
107
- if (b == a + 1) return 1 / a;
108
- double m = floor((a + b) / 2);
109
- return harmonic_recursive(a, m) + harmonic_recursive(m, b);
110
- }
111
-
112
- /* https://stats.stackexchange.com/questions/423542/isolation-forest-and-average-expected-depth-formula
113
- https://math.stackexchange.com/questions/3333220/expected-average-depth-in-random-binary-tree-constructed-top-to-bottom */
114
- double expected_avg_depth(size_t sample_size)
115
- {
116
- switch(sample_size)
117
- {
118
- case 1: return 0.;
119
- case 2: return 1.;
120
- case 3: return 5.0/3.0;
121
- case 4: return 13.0/6.0;
122
- case 5: return 77.0/30.0;
123
- case 6: return 29.0/10.0;
124
- case 7: return 223.0/70.0;
125
- case 8: return 481.0/140.0;
126
- case 9: return 4609.0/1260.0;
127
- default:
128
- {
129
- return 2 * (harmonic(sample_size) - 1);
130
- }
131
- }
132
- }
133
-
134
- double expected_avg_depth(long double approx_sample_size)
135
- {
136
- if (approx_sample_size < 1.5)
137
- return 0;
138
- else if (approx_sample_size < 2.5)
139
- return 1;
140
- else if (approx_sample_size <= THRESHOLD_EXACT_H)
141
- return expected_avg_depth((size_t) roundl(approx_sample_size));
142
- else
143
- return 2 * logl(approx_sample_size) - (long double)1.4227843351;
144
- }
145
-
146
- /* https://math.stackexchange.com/questions/3388518/expected-number-of-paths-required-to-separate-elements-in-a-binary-tree */
147
- #define THRESHOLD_EXACT_S 87670 /* difference is <5e-4 */
148
- double expected_separation_depth(size_t n)
149
- {
150
- switch(n)
151
- {
152
- case 0: return 0.;
153
- case 1: return 0.;
154
- case 2: return 1.;
155
- case 3: return 1. + (1./3.);
156
- case 4: return 1. + (1./3.) + (2./9.);
157
- case 5: return 1.71666666667;
158
- case 6: return 1.84;
159
- case 7: return 1.93809524;
160
- case 8: return 2.01836735;
161
- case 9: return 2.08551587;
162
- case 10: return 2.14268078;
163
- default:
164
- {
165
- if (n >= THRESHOLD_EXACT_S)
166
- return 3;
167
- else
168
- return expected_separation_depth_hotstart((double)2.14268078, (size_t)10, n);
169
- }
170
- }
171
- }
172
-
173
- double expected_separation_depth_hotstart(double curr, size_t n_curr, size_t n_final)
174
- {
175
- if (n_final >= 1360)
176
- {
177
- if (n_final >= THRESHOLD_EXACT_S)
178
- return 3;
179
- else if (n_final >= 40774)
180
- return 2.999;
181
- else if (n_final >= 18844)
182
- return 2.998;
183
- else if (n_final >= 11956)
184
- return 2.997;
185
- else if (n_final >= 8643)
186
- return 2.996;
187
- else if (n_final >= 6713)
188
- return 2.995;
189
- else if (n_final >= 4229)
190
- return 2.9925;
191
- else if (n_final >= 3040)
192
- return 2.99;
193
- else if (n_final >= 2724)
194
- return 2.989;
195
- else if (n_final >= 1902)
196
- return 2.985;
197
- else if (n_final >= 1360)
198
- return 2.98;
199
-
200
- /* Note on the chosen precision: when calling it on smaller sample sizes,
201
- the standard error of the separation depth will be larger, thus it's less
202
- critical to get it right down to the smallest possible precision, while for
203
- larger samples the standard error of the separation depth will be smaller */
204
- }
205
-
206
- for (size_t i = n_curr + 1; i <= n_final; i++)
207
- curr += (-curr * (double)i + 3. * (double)i - 4.) / ((double)i * ((double)(i-1)));
208
- return curr;
209
- }
210
-
211
- /* linear interpolation */
212
- double expected_separation_depth(long double n)
213
- {
214
- if (n >= THRESHOLD_EXACT_S)
215
- return 3;
216
- double s_l = expected_separation_depth((size_t) floorl(n));
217
- long double u = ceill(n);
218
- double s_u = s_l + (-s_l * u + 3. * u - 4.) / (u * (u - 1.));
219
- double diff = n - floorl(n);
220
- return s_l + diff * s_u;
221
- }
222
-
223
- #define ix_comb(i, j, n, ncomb) ( ((ncomb) + ((j) - (i))) - 1 - (((n) - (i)) * ((n) - (i) - 1)) / 2 )
224
- void increase_comb_counter(size_t ix_arr[], size_t st, size_t end, size_t n, double counter[], double exp_remainder)
225
- {
226
- size_t i, j;
227
- size_t ncomb = (n * (n - 1)) / 2;
228
- if (exp_remainder <= 1)
229
- for (size_t el1 = st; el1 < end; el1++)
230
- {
231
- for (size_t el2 = el1 + 1; el2 <= end; el2++)
232
- {
233
- i = std::min(ix_arr[el1], ix_arr[el2]);
234
- j = std::max(ix_arr[el1], ix_arr[el2]);
235
- // counter[i * (n - (i+1)/2) + j - i - 1]++; /* beaware integer division */
236
- counter[ix_comb(i, j, n, ncomb)]++;
237
- }
238
- }
239
- else
240
- for (size_t el1 = st; el1 < end; el1++)
241
- {
242
- for (size_t el2 = el1 + 1; el2 <= end; el2++)
243
- {
244
- i = std::min(ix_arr[el1], ix_arr[el2]);
245
- j = std::max(ix_arr[el1], ix_arr[el2]);
246
- counter[ix_comb(i, j, n, ncomb)] += exp_remainder;
247
- }
248
- }
249
- }
250
-
251
- void increase_comb_counter(size_t ix_arr[], size_t st, size_t end, size_t n,
252
- double *restrict counter, double *restrict weights, double exp_remainder)
253
- {
254
- size_t i, j;
255
- size_t ncomb = (n * (n - 1)) / 2;
256
- if (exp_remainder <= 1)
257
- for (size_t el1 = st; el1 < end; el1++)
258
- {
259
- for (size_t el2 = el1 + 1; el2 <= end; el2++)
260
- {
261
- i = std::min(ix_arr[el1], ix_arr[el2]);
262
- j = std::max(ix_arr[el1], ix_arr[el2]);
263
- // counter[i * (n - (i+1)/2) + j - i - 1] += weights[i] * weights[j]; /* beaware integer division */
264
- counter[ix_comb(i, j, n, ncomb)] += weights[i] * weights[j];
265
- }
266
- }
267
- else
268
- for (size_t el1 = st; el1 < end; el1++)
269
- {
270
- for (size_t el2 = el1 + 1; el2 <= end; el2++)
271
- {
272
- i = std::min(ix_arr[el1], ix_arr[el2]);
273
- j = std::max(ix_arr[el1], ix_arr[el2]);
274
- counter[ix_comb(i, j, n, ncomb)] += weights[i] * weights[j] * exp_remainder;
275
- }
276
- }
277
- }
278
-
279
- /* Note to self: don't try merge this into a template with the one above, as the other one has 'restrict' qualifier */
280
- void increase_comb_counter(size_t ix_arr[], size_t st, size_t end, size_t n,
281
- double counter[], std::unordered_map<size_t, double> &weights, double exp_remainder)
282
- {
283
- size_t i, j;
284
- size_t ncomb = (n * (n - 1)) / 2;
285
- if (exp_remainder <= 1)
286
- for (size_t el1 = st; el1 < end; el1++)
287
- {
288
- for (size_t el2 = el1 + 1; el2 <= end; el2++)
289
- {
290
- i = std::min(ix_arr[el1], ix_arr[el2]);
291
- j = std::max(ix_arr[el1], ix_arr[el2]);
292
- // counter[i * (n - (i+1)/2) + j - i - 1] += weights[i] * weights[j]; /* beaware integer division */
293
- counter[ix_comb(i, j, n, ncomb)] += weights[i] * weights[j];
294
- }
295
- }
296
- else
297
- for (size_t el1 = st; el1 < end; el1++)
298
- {
299
- for (size_t el2 = el1 + 1; el2 <= end; el2++)
300
- {
301
- i = std::min(ix_arr[el1], ix_arr[el2]);
302
- j = std::max(ix_arr[el1], ix_arr[el2]);
303
- counter[ix_comb(i, j, n, ncomb)] += weights[i] * weights[j] * exp_remainder;
304
- }
305
- }
306
- }
307
-
308
- void increase_comb_counter_in_groups(size_t ix_arr[], size_t st, size_t end, size_t split_ix, size_t n,
309
- double counter[], double exp_remainder)
310
- {
311
- size_t n_group = 0;
312
- for (size_t ix = st; ix <= end; ix++)
313
- if (ix_arr[ix] < split_ix)
314
- n_group++;
315
- else
316
- break;
317
-
318
- n = n - split_ix;
319
-
320
- if (exp_remainder <= 1)
321
- for (size_t ix1 = st; ix1 < st + n_group; ix1++)
322
- for (size_t ix2 = st + n_group; ix2 <= end; ix2++)
323
- counter[ix_arr[ix1] * n + ix_arr[ix2] - split_ix]++;
324
- else
325
- for (size_t ix1 = st; ix1 < st + n_group; ix1++)
326
- for (size_t ix2 = st + n_group; ix2 <= end; ix2++)
327
- counter[ix_arr[ix1] * n + ix_arr[ix2] - split_ix] += exp_remainder;
328
- }
329
-
330
- void increase_comb_counter_in_groups(size_t ix_arr[], size_t st, size_t end, size_t split_ix, size_t n,
331
- double *restrict counter, double *restrict weights, double exp_remainder)
332
- {
333
- size_t n_group = 0;
334
- for (size_t ix = st; ix <= end; ix++)
335
- if (ix_arr[ix] < split_ix)
336
- n_group++;
337
- else
338
- break;
339
-
340
- n = n - split_ix;
341
-
342
- if (exp_remainder <= 1)
343
- for (size_t ix1 = st; ix1 < st + n_group; ix1++)
344
- for (size_t ix2 = st + n_group; ix2 <= end; ix2++)
345
- counter[ix_arr[ix1] * n + ix_arr[ix2] - split_ix]
346
- +=
347
- weights[ix_arr[ix1]] * weights[ix_arr[ix2]];
348
- else
349
- for (size_t ix1 = st; ix1 < st + n_group; ix1++)
350
- for (size_t ix2 = st + n_group; ix2 <= end; ix2++)
351
- counter[ix_arr[ix1] * n + ix_arr[ix2] - split_ix]
352
- +=
353
- weights[ix_arr[ix1]] * weights[ix_arr[ix2]] * exp_remainder;
354
- }
355
-
356
- void tmat_to_dense(double *restrict tmat, double *restrict dmat, size_t n, bool diag_to_one)
357
- {
358
- size_t ncomb = (n * (n - 1)) / 2;
359
- for (size_t i = 0; i < (n-1); i++)
360
- {
361
- for (size_t j = i + 1; j < n; j++)
362
- {
363
- // dmat[i + j * n] = dmat[j + i * n] = tmat[i * (n - (i+1)/2) + j - i - 1];
364
- dmat[i + j * n] = dmat[j + i * n] = tmat[ix_comb(i, j, n, ncomb)];
365
- }
366
- }
367
- if (diag_to_one)
368
- for (size_t i = 0; i < n; i++)
369
- dmat[i + i * n] = 1;
370
- else
371
- for (size_t i = 0; i < n; i++)
372
- dmat[i + i * n] = 0;
373
- }
374
-
375
- /* Note: do NOT divide by (n-1) as in some situations it will still need to calculate
376
- the standard deviation with 1-2 observations only (e.g. when using the extended model
377
- and some column has many rows but only 2 non-missing values, or when using the non-pooled
378
- std criterion) */
379
- #define SD_MIN 1e-12
380
- double calc_sd_raw(size_t cnt, long double sum, long double sum_sq)
381
- {
382
- if (cnt <= 1)
383
- return 0.;
384
- else
385
- return sqrtl(fmax(SD_MIN, (sum_sq - (square(sum) / (long double)cnt)) / (long double)cnt ));
386
- }
387
-
388
- long double calc_sd_raw_l(size_t cnt, long double sum, long double sum_sq)
389
- {
390
- if (cnt <= 1)
391
- return 0.;
392
- else
393
- return sqrtl(fmaxl(SD_MIN, (sum_sq - (square(sum) / (long double)cnt)) / (long double)cnt ));
394
- }
395
-
396
- void build_btree_sampler(std::vector<double> &btree_weights, double *restrict sample_weights,
397
- size_t nrows, size_t &log2_n, size_t &btree_offset)
398
- {
399
- /* build a perfectly-balanced binary search tree in which each node will
400
- hold the sum of the weights of its children */
401
- log2_n = log2ceil(nrows);
402
- if (!btree_weights.size())
403
- btree_weights.resize(pow2(log2_n + 1), 0);
404
- else
405
- btree_weights.assign(btree_weights.size(), 0);
406
- btree_offset = pow2(log2_n) - 1;
407
-
408
- std::copy(sample_weights, sample_weights + nrows, btree_weights.begin() + btree_offset);
409
- for (size_t ix = btree_weights.size() - 1; ix > 0; ix--)
410
- btree_weights[ix_parent(ix)] += btree_weights[ix];
411
-
412
- if (is_na_or_inf(btree_weights[0]))
413
- {
414
- fprintf(stderr, "Numeric precision error with sample weights, will not use them.\n");
415
- log2_n = 0;
416
- btree_weights.clear();
417
- btree_weights.shrink_to_fit();
418
- }
419
- }
420
-
421
- void sample_random_rows(std::vector<size_t> &ix_arr, size_t nrows, bool with_replacement,
422
- RNG_engine &rnd_generator, std::vector<size_t> &ix_all,
423
- double sample_weights[], std::vector<double> &btree_weights,
424
- size_t log2_n, size_t btree_offset, std::vector<bool> &is_repeated)
425
- {
426
- size_t ntake = ix_arr.size();
427
-
428
- /* if with replacement, just generate random uniform numbers */
429
- if (with_replacement)
430
- {
431
- if (sample_weights == NULL)
432
- {
433
- std::uniform_int_distribution<size_t> runif(0, nrows - 1);
434
- for (size_t &ix : ix_arr)
435
- ix = runif(rnd_generator);
436
- }
437
-
438
- else
439
- {
440
- std::discrete_distribution<size_t> runif(sample_weights, sample_weights + nrows);
441
- for (size_t &ix : ix_arr)
442
- ix = runif(rnd_generator);
443
- }
444
- }
445
-
446
- /* if all the elements are needed, don't bother with any sampling */
447
- else if (ntake == nrows)
448
- {
449
- std::iota(ix_arr.begin(), ix_arr.end(), (size_t)0);
450
- }
451
-
452
-
453
- /* if there are sample weights, use binary trees to keep track and update weight
454
- https://stackoverflow.com/questions/57599509/c-random-non-repeated-integers-with-weights */
455
- else if (sample_weights != NULL)
456
- {
457
- double rnd_subrange, w_left;
458
- double curr_subrange;
459
- size_t curr_ix;
460
- for (size_t &ix : ix_arr)
461
- {
462
- /* go down the tree by drawing a random number and
463
- checking if it falls in the left or right ranges */
464
- curr_ix = 0;
465
- curr_subrange = btree_weights[0];
466
- for (size_t lev = 0; lev < log2_n; lev++)
467
- {
468
- rnd_subrange = std::uniform_real_distribution<double>(0, curr_subrange)(rnd_generator);
469
- w_left = btree_weights[ix_child(curr_ix)];
470
- curr_ix = ix_child(curr_ix) + (rnd_subrange >= w_left);
471
- curr_subrange = btree_weights[curr_ix];
472
- }
473
-
474
- /* finally, determine element to choose in this iteration */
475
- ix = curr_ix - btree_offset;
476
-
477
- /* now remove the weight of the chosen element */
478
- btree_weights[curr_ix] = 0;
479
- for (size_t lev = 0; lev < log2_n; lev++)
480
- {
481
- curr_ix = ix_parent(curr_ix);
482
- btree_weights[curr_ix] = btree_weights[ix_child(curr_ix)]
483
- + btree_weights[ix_child(curr_ix) + 1];
484
- }
485
- }
486
- }
487
-
488
- /* if no sample weights and not with replacement (most common case expected),
489
- then use different algorithms depending on the sampled fraction */
490
- else
491
- {
492
-
493
- /* if sampling a larger fraction, fill an array enumerating the rows, shuffle, and take first N */
494
- if (ntake >= (nrows / 2))
495
- {
496
-
497
- if (!ix_all.size())
498
- ix_all.resize(nrows);
499
-
500
- /* in order for random seeds to always be reproducible, don't re-use previous shuffles */
501
- std::iota(ix_all.begin(), ix_all.end(), (size_t)0);
502
-
503
- /* If the number of sampled elements is large, do a full shuffle, enjoy simd-instructs when copying over */
504
- if (ntake >= ((nrows * 3)/4))
505
- {
506
- std::shuffle(ix_all.begin(), ix_all.end(), rnd_generator);
507
- ix_arr.assign(ix_all.begin(), ix_all.begin() + ntake);
508
- }
509
-
510
- /* otherwise, do only a partial shuffle (use Yates algorithm) and copy elements along the way */
511
- else
512
- {
513
- size_t chosen;
514
- for (size_t i = nrows - 1; i >= nrows - ntake; i--)
515
- {
516
- chosen = std::uniform_int_distribution<size_t>(0, i)(rnd_generator);
517
- ix_arr[nrows - i - 1] = ix_all[chosen];
518
- ix_all[chosen] = ix_all[i];
519
- }
520
- }
521
-
522
- }
523
-
524
- /* If the sample size is small, use Floyd's random sampling algorithm
525
- https://stackoverflow.com/questions/2394246/algorithm-to-select-a-single-random-combination-of-values */
526
- else
527
- {
528
-
529
- size_t candidate;
530
-
531
- /* if the sample size is relatively large, use a temporary boolean vector */
532
- if (((long double)ntake / (long double)nrows) > (1. / 20.))
533
- {
534
-
535
- if (!is_repeated.size())
536
- is_repeated.resize(nrows, false);
537
- else
538
- is_repeated.assign(is_repeated.size(), false);
539
-
540
- for (size_t rnd_ix = nrows - ntake; rnd_ix < nrows; rnd_ix++)
541
- {
542
- candidate = std::uniform_int_distribution<size_t>(0, rnd_ix)(rnd_generator);
543
- if (is_repeated[candidate])
544
- {
545
- ix_arr[ntake - (nrows - rnd_ix)] = rnd_ix;
546
- is_repeated[rnd_ix] = true;
547
- }
548
-
549
- else
550
- {
551
- ix_arr[ntake - (nrows - rnd_ix)] = candidate;
552
- is_repeated[candidate] = true;
553
- }
554
- }
555
-
556
- }
557
-
558
- /* if the sample size is very small, use an unordered set */
559
- else
560
- {
561
-
562
- std::unordered_set<size_t> repeated_set;
563
- for (size_t rnd_ix = nrows - ntake; rnd_ix < nrows; rnd_ix++)
564
- {
565
- candidate = std::uniform_int_distribution<size_t>(0, rnd_ix)(rnd_generator);
566
- if (repeated_set.find(candidate) == repeated_set.end()) /* TODO: switch to C++20 'contains' */
567
- {
568
- ix_arr[ntake - (nrows - rnd_ix)] = candidate;
569
- repeated_set.insert(candidate);
570
- }
571
-
572
- else
573
- {
574
- ix_arr[ntake - (nrows - rnd_ix)] = rnd_ix;
575
- repeated_set.insert(rnd_ix);
576
- }
577
- }
578
-
579
- }
580
-
581
- }
582
-
583
- }
584
- }
585
-
586
- /* https://stackoverflow.com/questions/57599509/c-random-non-repeated-integers-with-weights */
587
- void weighted_shuffle(size_t *restrict outp, size_t n, double *restrict weights, double *restrict buffer_arr, RNG_engine &rnd_generator)
588
- {
589
- /* determine smallest power of two that is larger than N */
590
- size_t tree_levels = log2ceil(n);
591
-
592
- /* initialize vector with place-holders for perfectly-balanced tree */
593
- std::fill(buffer_arr, buffer_arr + pow2(tree_levels + 1), (double)0);
594
-
595
- /* compute sums for the tree leaves at each node */
596
- size_t offset = pow2(tree_levels) - 1;
597
- for (size_t ix = 0; ix < n; ix++) {
598
- buffer_arr[ix + offset] = weights[ix];
599
- }
600
- for (size_t ix = pow2(tree_levels+1) - 1; ix > 0; ix--) {
601
- buffer_arr[ix_parent(ix)] += buffer_arr[ix];
602
- }
603
-
604
- /* sample according to uniform distribution */
605
- double rnd_subrange, w_left;
606
- double curr_subrange;
607
- int curr_ix;
608
-
609
- for (size_t el = 0; el < n; el++)
610
- {
611
- /* go down the tree by drawing a random number and
612
- checking if it falls in the left or right sub-ranges */
613
- curr_ix = 0;
614
- curr_subrange = buffer_arr[0];
615
- for (size_t lev = 0; lev < tree_levels; lev++)
616
- {
617
- rnd_subrange = std::uniform_real_distribution<double>(0., curr_subrange)(rnd_generator);
618
- w_left = buffer_arr[ix_child(curr_ix)];
619
- curr_ix = ix_child(curr_ix) + (rnd_subrange >= w_left);
620
- curr_subrange = buffer_arr[curr_ix];
621
- }
622
-
623
- /* finally, add element from this iteration */
624
- outp[el] = curr_ix - offset;
625
-
626
- /* now remove the weight of the chosen element */
627
- buffer_arr[curr_ix] = 0;
628
- for (size_t lev = 0; lev < tree_levels; lev++)
629
- {
630
- curr_ix = ix_parent(curr_ix);
631
- buffer_arr[curr_ix] = buffer_arr[ix_child(curr_ix)]
632
- + buffer_arr[ix_child(curr_ix) + 1];
633
- }
634
- }
635
-
636
- }
637
-
638
- /* For hyperplane intersections */
639
- size_t divide_subset_split(size_t ix_arr[], double x[], size_t st, size_t end, double split_point)
640
- {
641
- size_t temp;
642
- size_t st_orig = st;
643
- for (size_t row = st_orig; row <= end; row++)
644
- {
645
- if (x[row - st_orig] <= split_point)
646
- {
647
- temp = ix_arr[st];
648
- ix_arr[st] = ix_arr[row];
649
- ix_arr[row] = temp;
650
- st++;
651
- }
652
- }
653
- return st;
654
- }
655
-
656
- /* For numerical columns */
657
- void divide_subset_split(size_t ix_arr[], double x[], size_t st, size_t end, double split_point,
658
- MissingAction missing_action, size_t &st_NA, size_t &end_NA, size_t &split_ix)
659
- {
660
- size_t temp;
661
-
662
- /* if NAs are not to be bothered with, just need to do a single pass */
663
- if (missing_action == Fail)
664
- {
665
- /* move to the left if it's l.e. split point */
666
- for (size_t row = st; row <= end; row++)
667
- {
668
- if (x[ix_arr[row]] <= split_point)
669
- {
670
- temp = ix_arr[st];
671
- ix_arr[st] = ix_arr[row];
672
- ix_arr[row] = temp;
673
- st++;
674
- }
675
- }
676
- split_ix = st;
677
- }
678
-
679
- /* otherwise, first put to the left all l.e. and not NA, then all NAs to the end of the left */
680
- else
681
- {
682
- for (size_t row = st; row <= end; row++)
683
- {
684
- if (!isnan(x[ix_arr[row]]) && x[ix_arr[row]] <= split_point)
685
- {
686
- temp = ix_arr[st];
687
- ix_arr[st] = ix_arr[row];
688
- ix_arr[row] = temp;
689
- st++;
690
- }
691
- }
692
- st_NA = st;
693
-
694
- for (size_t row = st; row <= end; row++)
695
- {
696
- if (isnan(x[ix_arr[row]]))
697
- {
698
- temp = ix_arr[st];
699
- ix_arr[st] = ix_arr[row];
700
- ix_arr[row] = temp;
701
- st++;
702
- }
703
- }
704
- end_NA = st;
705
- }
706
- }
707
-
708
- /* For sparse numeric columns */
709
- void divide_subset_split(size_t ix_arr[], size_t st, size_t end, size_t col_num,
710
- double Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[], double split_point,
711
- MissingAction missing_action, size_t &st_NA, size_t &end_NA, size_t &split_ix)
712
- {
713
- /* TODO: this is a mess, needs refactoring */
714
- /* TODO: when moving zeros, would be better to instead move by '>' (opposite as in here) */
715
- if (Xc_indptr[col_num] == Xc_indptr[col_num + 1])
716
- {
717
- if (missing_action == Fail)
718
- {
719
- split_ix = (0 <= split_point)? (end+1) : st;
720
- }
721
-
722
- else
723
- {
724
- st_NA = (0 <= split_point)? (end+1) : st;
725
- end_NA = (0 <= split_point)? (end+1) : st;
726
- }
727
-
728
- }
729
-
730
- size_t st_col = Xc_indptr[col_num];
731
- size_t end_col = Xc_indptr[col_num + 1] - 1;
732
- size_t curr_pos = st_col;
733
- size_t ind_end_col = Xc_ind[end_col];
734
- size_t temp;
735
- bool move_zeros = 0 <= split_point;
736
- size_t *ptr_st = std::lower_bound(ix_arr + st, ix_arr + end + 1, Xc_ind[st_col]);
737
-
738
- if (move_zeros && ptr_st > ix_arr + st)
739
- st = ptr_st - ix_arr;
740
-
741
- if (missing_action == Fail)
742
- {
743
- if (move_zeros)
744
- {
745
- for (size_t *row = ptr_st;
746
- row != ix_arr + end + 1;
747
- )
748
- {
749
- if (curr_pos >= end_col + 1)
750
- {
751
- for (size_t *r = row; r <= ix_arr + end; r++)
752
- {
753
- temp = ix_arr[st];
754
- ix_arr[st] = *r;
755
- *r = temp;
756
- st++;
757
- }
758
- break;
759
- }
760
-
761
- if (Xc_ind[curr_pos] == *row)
762
- {
763
- if (Xc[curr_pos] <= split_point)
764
- {
765
- temp = ix_arr[st];
766
- ix_arr[st] = *row;
767
- *row = temp;
768
- st++;
769
- }
770
- if (curr_pos == end_col && row < ix_arr + end)
771
- for (size_t *r = row + 1; r <= ix_arr + end; r++)
772
- {
773
- temp = ix_arr[st];
774
- ix_arr[st] = *r;
775
- *r = temp;
776
- st++;
777
- }
778
- if (row == ix_arr + end || curr_pos == end_col) break;
779
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *(++row)) - Xc_ind;
780
- }
781
-
782
- else
783
- {
784
- if (Xc_ind[curr_pos] > *row)
785
- {
786
- while (row <= ix_arr + end && Xc_ind[curr_pos] > *row)
787
- {
788
- temp = ix_arr[st];
789
- ix_arr[st] = *row;
790
- *row = temp;
791
- st++; row++;
792
- }
793
- }
794
-
795
- else
796
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *row) - Xc_ind;
797
- }
798
- }
799
- }
800
-
801
- else /* don't move zeros */
802
- {
803
- for (size_t *row = ptr_st;
804
- row != ix_arr + end + 1 && curr_pos != end_col + 1 && ind_end_col >= *row;
805
- )
806
- {
807
- if (Xc_ind[curr_pos] == *row)
808
- {
809
- if (Xc[curr_pos] <= split_point)
810
- {
811
- temp = ix_arr[st];
812
- ix_arr[st] = *row;
813
- *row = temp;
814
- st++;
815
- }
816
- if (row == ix_arr + end || curr_pos == end_col) break;
817
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *(++row)) - Xc_ind;
818
- }
819
-
820
- else
821
- {
822
- if (Xc_ind[curr_pos] > *row)
823
- row = std::lower_bound(row + 1, ix_arr + end + 1, Xc_ind[curr_pos]);
824
- else
825
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *row) - Xc_ind;
826
- }
827
- }
828
- }
829
-
830
- split_ix = st;
831
- }
832
-
833
- else /* can have NAs */
834
- {
835
-
836
- bool has_NAs = false;
837
- if (move_zeros)
838
- {
839
- for (size_t *row = ptr_st;
840
- row != ix_arr + end + 1;
841
- )
842
- {
843
- if (curr_pos >= end_col + 1)
844
- {
845
- for (size_t *r = row; r <= ix_arr + end; r++)
846
- {
847
- temp = ix_arr[st];
848
- ix_arr[st] = *r;
849
- *r = temp;
850
- st++;
851
- }
852
- break;
853
- }
854
-
855
- if (Xc_ind[curr_pos] == *row)
856
- {
857
- if (isnan(Xc[curr_pos]))
858
- has_NAs = true;
859
- else if (Xc[curr_pos] <= split_point)
860
- {
861
- temp = ix_arr[st];
862
- ix_arr[st] = *row;
863
- *row = temp;
864
- st++;
865
- }
866
- if (curr_pos == end_col && row < ix_arr + end)
867
- for (size_t *r = row + 1; r <= ix_arr + end; r++)
868
- {
869
- temp = ix_arr[st];
870
- ix_arr[st] = *r;
871
- *r = temp;
872
- st++;
873
- }
874
- if (row == ix_arr + end || curr_pos == end_col) break;
875
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *(++row)) - Xc_ind;
876
- }
877
-
878
- else
879
- {
880
- if (Xc_ind[curr_pos] > *row)
881
- {
882
- while (row <= ix_arr + end && Xc_ind[curr_pos] > *row)
883
- {
884
- temp = ix_arr[st];
885
- ix_arr[st] = *row;
886
- *row = temp;
887
- st++; row++;
888
- }
889
- }
890
-
891
- else
892
- {
893
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *row) - Xc_ind;
894
- }
895
- }
896
- }
897
- }
898
-
899
- else /* don't move zeros */
900
- {
901
- for (size_t *row = ptr_st;
902
- row != ix_arr + end + 1 && curr_pos != end_col + 1 && ind_end_col >= *row;
903
- )
904
- {
905
- if (Xc_ind[curr_pos] == *row)
906
- {
907
- if (isnan(Xc[curr_pos])) has_NAs = true;
908
- if (Xc[curr_pos] <= split_point && !isnan(Xc[curr_pos]))
909
- {
910
- temp = ix_arr[st];
911
- ix_arr[st] = *row;
912
- *row = temp;
913
- st++;
914
- }
915
- if (row == ix_arr + end || curr_pos == end_col) break;
916
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *(++row)) - Xc_ind;
917
- }
918
-
919
- else
920
- {
921
- if (Xc_ind[curr_pos] > *row)
922
- row = std::lower_bound(row + 1, ix_arr + end + 1, Xc_ind[curr_pos]);
923
- else
924
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *row) - Xc_ind;
925
- }
926
- }
927
- }
928
-
929
-
930
- st_NA = st;
931
- if (has_NAs)
932
- {
933
- curr_pos = st_col;
934
- std::sort(ix_arr + st, ix_arr + end + 1);
935
- for (size_t *row = ix_arr + st;
936
- row != ix_arr + end + 1 && curr_pos != end_col + 1 && ind_end_col >= *row;
937
- )
938
- {
939
- if (Xc_ind[curr_pos] == *row)
940
- {
941
- if (isnan(Xc[curr_pos]))
942
- {
943
- temp = ix_arr[st];
944
- ix_arr[st] = *row;
945
- *row = temp;
946
- st++;
947
- }
948
- if (row == ix_arr + end || curr_pos == end_col) break;
949
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *(++row)) - Xc_ind;
950
- }
951
-
952
- else
953
- {
954
- if (Xc_ind[curr_pos] > *row)
955
- row = std::lower_bound(row + 1, ix_arr + end + 1, Xc_ind[curr_pos]);
956
- else
957
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *row) - Xc_ind;
958
- }
959
- }
960
- }
961
- end_NA = st;
962
-
963
- }
964
-
965
- }
966
-
967
- /* For categorical columns split by subset */
968
- void divide_subset_split(size_t ix_arr[], int x[], size_t st, size_t end, char split_categ[],
969
- MissingAction missing_action, size_t &st_NA, size_t &end_NA, size_t &split_ix)
970
- {
971
- size_t temp;
972
-
973
- /* if NAs are not to be bothered with, just need to do a single pass */
974
- if (missing_action == Fail)
975
- {
976
- /* move to the left if it's l.e. than the split point */
977
- for (size_t row = st; row <= end; row++)
978
- {
979
- if (split_categ[ x[ix_arr[row]] ] == 1)
980
- {
981
- temp = ix_arr[st];
982
- ix_arr[st] = ix_arr[row];
983
- ix_arr[row] = temp;
984
- st++;
985
- }
986
- }
987
- split_ix = st;
988
- }
989
-
990
- /* otherwise, first put to the left all l.e. and not NA, then all NAs to the end of the left */
991
- else
992
- {
993
- for (size_t row = st; row <= end; row++)
994
- {
995
- if (x[ix_arr[row]] >= 0 && split_categ[ x[ix_arr[row]] ] == 1)
996
- {
997
- temp = ix_arr[st];
998
- ix_arr[st] = ix_arr[row];
999
- ix_arr[row] = temp;
1000
- st++;
1001
- }
1002
- }
1003
- st_NA = st;
1004
-
1005
- for (size_t row = st; row <= end; row++)
1006
- {
1007
- if (x[ix_arr[row]] < 0)
1008
- {
1009
- temp = ix_arr[st];
1010
- ix_arr[st] = ix_arr[row];
1011
- ix_arr[row] = temp;
1012
- st++;
1013
- }
1014
- }
1015
- end_NA = st;
1016
- }
1017
- }
1018
-
1019
- /* For categorical columns split by subset, used at prediction time (with similarity) */
1020
- void divide_subset_split(size_t ix_arr[], int x[], size_t st, size_t end, char split_categ[],
1021
- int ncat, MissingAction missing_action, NewCategAction new_cat_action,
1022
- bool move_new_to_left, size_t &st_NA, size_t &end_NA, size_t &split_ix)
1023
- {
1024
- size_t temp;
1025
-
1026
- /* if NAs are not to be bothered with, just need to do a single pass */
1027
- if (missing_action == Fail && new_cat_action != Weighted)
1028
- {
1029
- if (new_cat_action == Smallest && move_new_to_left)
1030
- {
1031
- for (size_t row = st; row <= end; row++)
1032
- {
1033
- if (split_categ[ x[ix_arr[row]] ] == 1 || x[ix_arr[row]] >= ncat)
1034
- {
1035
- temp = ix_arr[st];
1036
- ix_arr[st] = ix_arr[row];
1037
- ix_arr[row] = temp;
1038
- st++;
1039
- }
1040
- }
1041
- }
1042
-
1043
- else
1044
- {
1045
- for (size_t row = st; row <= end; row++)
1046
- {
1047
- if (split_categ[ x[ix_arr[row]] ] == 1)
1048
- {
1049
- temp = ix_arr[st];
1050
- ix_arr[st] = ix_arr[row];
1051
- ix_arr[row] = temp;
1052
- st++;
1053
- }
1054
- }
1055
- }
1056
-
1057
- split_ix = st;
1058
- }
1059
-
1060
- /* otherwise, first put to the left all l.e. and not NA, then all NAs to the end of the left */
1061
- else
1062
- {
1063
- for (size_t row = st; row <= end; row++)
1064
- {
1065
- if (x[ix_arr[row]] >= 0 && split_categ[ x[ix_arr[row]] ] == 1)
1066
- {
1067
- temp = ix_arr[st];
1068
- ix_arr[st] = ix_arr[row];
1069
- ix_arr[row] = temp;
1070
- st++;
1071
- }
1072
- }
1073
- st_NA = st;
1074
-
1075
- if (new_cat_action == Weighted)
1076
- {
1077
- for (size_t row = st; row <= end; row++)
1078
- {
1079
- if (x[ix_arr[row]] < 0 || split_categ[ x[ix_arr[row]] ] == (-1))
1080
- {
1081
- temp = ix_arr[st];
1082
- ix_arr[st] = ix_arr[row];
1083
- ix_arr[row] = temp;
1084
- st++;
1085
- }
1086
- }
1087
- }
1088
-
1089
- else
1090
- {
1091
- for (size_t row = st; row <= end; row++)
1092
- {
1093
- if (x[ix_arr[row]] < 0)
1094
- {
1095
- temp = ix_arr[st];
1096
- ix_arr[st] = ix_arr[row];
1097
- ix_arr[row] = temp;
1098
- st++;
1099
- }
1100
- }
1101
- }
1102
-
1103
- end_NA = st;
1104
- }
1105
- }
1106
-
1107
- /* For categoricals split on a single category */
1108
- void divide_subset_split(size_t ix_arr[], int x[], size_t st, size_t end, int split_categ,
1109
- MissingAction missing_action, size_t &st_NA, size_t &end_NA, size_t &split_ix)
1110
- {
1111
- size_t temp;
1112
-
1113
- /* if NAs are not to be bothered with, just need to do a single pass */
1114
- if (missing_action == Fail)
1115
- {
1116
- /* move to the left if it's l.e. than the split point */
1117
- for (size_t row = st; row <= end; row++)
1118
- {
1119
- if (x[ix_arr[row]] == split_categ)
1120
- {
1121
- temp = ix_arr[st];
1122
- ix_arr[st] = ix_arr[row];
1123
- ix_arr[row] = temp;
1124
- st++;
1125
- }
1126
- }
1127
- split_ix = st;
1128
- }
1129
-
1130
- /* otherwise, first put to the left all l.e. and not NA, then all NAs to the end of the left */
1131
- else
1132
- {
1133
- for (size_t row = st; row <= end; row++)
1134
- {
1135
- if (x[ix_arr[row]] == split_categ)
1136
- {
1137
- temp = ix_arr[st];
1138
- ix_arr[st] = ix_arr[row];
1139
- ix_arr[row] = temp;
1140
- st++;
1141
- }
1142
- }
1143
- st_NA = st;
1144
-
1145
- for (size_t row = st; row <= end; row++)
1146
- {
1147
- if (x[ix_arr[row]] < 0)
1148
- {
1149
- temp = ix_arr[st];
1150
- ix_arr[st] = ix_arr[row];
1151
- ix_arr[row] = temp;
1152
- st++;
1153
- }
1154
- }
1155
- end_NA = st;
1156
- }
1157
- }
1158
-
1159
- /* For categoricals split on sub-set that turned out to have 2 categories only (prediction-time) */
1160
- void divide_subset_split(size_t ix_arr[], int x[], size_t st, size_t end,
1161
- MissingAction missing_action, NewCategAction new_cat_action,
1162
- bool move_new_to_left, size_t &st_NA, size_t &end_NA, size_t &split_ix)
1163
- {
1164
- size_t temp;
1165
-
1166
- /* if NAs are not to be bothered with, just need to do a single pass */
1167
- if (missing_action == Fail)
1168
- {
1169
- /* move to the left if it's l.e. than the split point */
1170
- if (new_cat_action == Smallest && move_new_to_left)
1171
- {
1172
- for (size_t row = st; row <= end; row++)
1173
- {
1174
- if (x[ix_arr[row]] == 0 || x[ix_arr[row]] > 1)
1175
- {
1176
- temp = ix_arr[st];
1177
- ix_arr[st] = ix_arr[row];
1178
- ix_arr[row] = temp;
1179
- st++;
1180
- }
1181
- }
1182
- }
1183
-
1184
- else
1185
- {
1186
- for (size_t row = st; row <= end; row++)
1187
- {
1188
- if (x[ix_arr[row]] == 0)
1189
- {
1190
- temp = ix_arr[st];
1191
- ix_arr[st] = ix_arr[row];
1192
- ix_arr[row] = temp;
1193
- st++;
1194
- }
1195
- }
1196
- }
1197
- split_ix = st;
1198
- }
1199
-
1200
- /* otherwise, first put to the left all l.e. and not NA, then all NAs to the end of the left */
1201
- else
1202
- {
1203
- if (new_cat_action == Smallest && move_new_to_left)
1204
- {
1205
- for (size_t row = st; row <= end; row++)
1206
- {
1207
- if (x[ix_arr[row]] == 0 || x[ix_arr[row]] > 1)
1208
- {
1209
- temp = ix_arr[st];
1210
- ix_arr[st] = ix_arr[row];
1211
- ix_arr[row] = temp;
1212
- st++;
1213
- }
1214
- }
1215
- st_NA = st;
1216
-
1217
- for (size_t row = st; row <= end; row++)
1218
- {
1219
- if (x[ix_arr[row]] < 0)
1220
- {
1221
- temp = ix_arr[st];
1222
- ix_arr[st] = ix_arr[row];
1223
- ix_arr[row] = temp;
1224
- st++;
1225
- }
1226
- }
1227
- end_NA = st;
1228
- }
1229
-
1230
- else
1231
- {
1232
- for (size_t row = st; row <= end; row++)
1233
- {
1234
- if (x[ix_arr[row]] == 0)
1235
- {
1236
- temp = ix_arr[st];
1237
- ix_arr[st] = ix_arr[row];
1238
- ix_arr[row] = temp;
1239
- st++;
1240
- }
1241
- }
1242
- st_NA = st;
1243
-
1244
- for (size_t row = st; row <= end; row++)
1245
- {
1246
- if (x[ix_arr[row]] < 0)
1247
- {
1248
- temp = ix_arr[st];
1249
- ix_arr[st] = ix_arr[row];
1250
- ix_arr[row] = temp;
1251
- st++;
1252
- }
1253
- }
1254
- end_NA = st;
1255
- }
1256
- }
1257
- }
1258
-
1259
- /* for regular numeric columns */
1260
- void get_range(size_t ix_arr[], double x[], size_t st, size_t end,
1261
- MissingAction missing_action, double &xmin, double &xmax, bool &unsplittable)
1262
- {
1263
- xmin = HUGE_VAL;
1264
- xmax = -HUGE_VAL;
1265
-
1266
- if (missing_action == Fail)
1267
- {
1268
- for (size_t row = st; row <= end; row++)
1269
- {
1270
- xmin = (x[ix_arr[row]] < xmin)? x[ix_arr[row]] : xmin;
1271
- xmax = (x[ix_arr[row]] > xmax)? x[ix_arr[row]] : xmax;
1272
- }
1273
- }
1274
-
1275
-
1276
- else
1277
- {
1278
- for (size_t row = st; row <= end; row++)
1279
- {
1280
- xmin = fmin(xmin, x[ix_arr[row]]);
1281
- xmax = fmax(xmax, x[ix_arr[row]]);
1282
- }
1283
- }
1284
-
1285
- unsplittable = (xmin == xmax) || (xmin == HUGE_VAL && xmax == -HUGE_VAL);
1286
- }
1287
-
1288
- /* for sparse inputs */
1289
- void get_range(size_t ix_arr[], size_t st, size_t end, size_t col_num,
1290
- double Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
1291
- MissingAction missing_action, double &xmin, double &xmax, bool &unsplittable)
1292
- {
1293
- /* ix_arr must already be sorted beforehand */
1294
- xmin = HUGE_VAL;
1295
- xmax = -HUGE_VAL;
1296
-
1297
- size_t st_col = Xc_indptr[col_num];
1298
- size_t end_col = Xc_indptr[col_num + 1];
1299
- size_t nnz_col = end_col - st_col;
1300
- end_col--;
1301
- size_t curr_pos = st_col;
1302
-
1303
- if (!nnz_col ||
1304
- Xc_ind[st_col] > ix_arr[end] ||
1305
- ix_arr[st] > Xc_ind[end_col]
1306
- )
1307
- {
1308
- unsplittable = true;
1309
- return;
1310
- }
1311
-
1312
- if (nnz_col < end - st + 1 ||
1313
- Xc_ind[st_col] > ix_arr[st] ||
1314
- Xc_ind[end_col] < ix_arr[end]
1315
- )
1316
- {
1317
- xmin = 0;
1318
- xmax = 0;
1319
- }
1320
-
1321
- size_t ind_end_col = Xc_ind[end_col];
1322
- size_t nmatches = 0;
1323
-
1324
- if (missing_action == Fail)
1325
- {
1326
- for (size_t *row = std::lower_bound(ix_arr + st, ix_arr + end + 1, Xc_ind[st_col]);
1327
- row != ix_arr + end + 1 && curr_pos != end_col + 1 && ind_end_col >= *row;
1328
- )
1329
- {
1330
- if (Xc_ind[curr_pos] == *row)
1331
- {
1332
- nmatches++;
1333
- xmin = (Xc[curr_pos] < xmin)? Xc[curr_pos] : xmin;
1334
- xmax = (Xc[curr_pos] > xmax)? Xc[curr_pos] : xmax;
1335
- if (row == ix_arr + end || curr_pos == end_col) break;
1336
- curr_pos = std::lower_bound(Xc_ind + curr_pos, Xc_ind + end_col + 1, *(++row)) - Xc_ind;
1337
- }
1338
-
1339
- else
1340
- {
1341
- if (Xc_ind[curr_pos] > *row)
1342
- row = std::lower_bound(row + 1, ix_arr + end + 1, Xc_ind[curr_pos]);
1343
- else
1344
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *row) - Xc_ind;
1345
- }
1346
- }
1347
- }
1348
-
1349
- else /* can have NAs */
1350
- {
1351
- for (size_t *row = std::lower_bound(ix_arr + st, ix_arr + end + 1, Xc_ind[st_col]);
1352
- row != ix_arr + end + 1 && curr_pos != end_col + 1 && ind_end_col >= *row;
1353
- )
1354
- {
1355
- if (Xc_ind[curr_pos] == *row)
1356
- {
1357
- nmatches++;
1358
- xmin = fmin(xmin, Xc[curr_pos]);
1359
- xmax = fmax(xmax, Xc[curr_pos]);
1360
- if (row == ix_arr + end || curr_pos == end_col) break;
1361
- curr_pos = std::lower_bound(Xc_ind + curr_pos, Xc_ind + end_col + 1, *(++row)) - Xc_ind;
1362
- }
1363
-
1364
- else
1365
- {
1366
- if (Xc_ind[curr_pos] > *row)
1367
- row = std::lower_bound(row + 1, ix_arr + end + 1, Xc_ind[curr_pos]);
1368
- else
1369
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *row) - Xc_ind;
1370
- }
1371
- }
1372
-
1373
- }
1374
-
1375
- if (nmatches < (end - st + 1))
1376
- {
1377
- xmin = fmin(xmin, 0);
1378
- xmax = fmax(xmax, 0);
1379
- }
1380
- unsplittable = (xmin == xmax) || (xmin == HUGE_VAL && xmax == -HUGE_VAL);
1381
-
1382
- }
1383
-
1384
-
1385
- void get_categs(size_t ix_arr[], int x[], size_t st, size_t end, int ncat,
1386
- MissingAction missing_action, char categs[], size_t &npresent, bool &unsplittable)
1387
- {
1388
- std::fill(categs, categs + ncat, -1);
1389
- npresent = 0;
1390
- for (size_t row = st; row <= end; row++)
1391
- if (x[ix_arr[row]] >= 0)
1392
- categs[x[ix_arr[row]]] = 1;
1393
-
1394
- npresent = std::accumulate(categs,
1395
- categs + ncat,
1396
- (size_t)0,
1397
- [](const size_t a, const char b){return a + (b > 0);}
1398
- );
1399
-
1400
- unsplittable = npresent < 2;
1401
- }
1402
-
1403
- long double calculate_sum_weights(std::vector<size_t> &ix_arr, size_t st, size_t end, size_t curr_depth,
1404
- std::vector<double> &weights_arr, std::unordered_map<size_t, double> &weights_map)
1405
- {
1406
- if (curr_depth > 0 && weights_arr.size())
1407
- return std::accumulate(ix_arr.begin() + st,
1408
- ix_arr.begin() + end + 1,
1409
- (long double)0,
1410
- [&weights_arr](const long double a, const size_t ix){return a + weights_arr[ix];});
1411
- else if (curr_depth > 0 && weights_map.size())
1412
- return std::accumulate(ix_arr.begin() + st,
1413
- ix_arr.begin() + end + 1,
1414
- (long double)0,
1415
- [&weights_map](const long double a, const size_t ix){return a + weights_map[ix];});
1416
- else
1417
- return -HUGE_VAL;
1418
- }
1419
-
1420
- size_t move_NAs_to_front(size_t ix_arr[], size_t st, size_t end, double x[])
1421
- {
1422
- size_t st_non_na = st;
1423
- size_t temp;
1424
-
1425
- for (size_t row = st; row <= end; row++)
1426
- {
1427
- if (is_na_or_inf(x[ix_arr[row]]))
1428
- {
1429
- temp = ix_arr[st_non_na];
1430
- ix_arr[st_non_na] = ix_arr[row];
1431
- ix_arr[row] = temp;
1432
- st_non_na++;
1433
- }
1434
- }
1435
-
1436
- return st_non_na;
1437
- }
1438
-
1439
- size_t move_NAs_to_front(size_t ix_arr[], size_t st, size_t end, size_t col_num, double Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[])
1440
- {
1441
- size_t st_non_na = st;
1442
- size_t temp;
1443
-
1444
- size_t st_col = Xc_indptr[col_num];
1445
- size_t end_col = Xc_indptr[col_num + 1] - 1;
1446
- size_t curr_pos = st_col;
1447
- size_t ind_end_col = Xc_ind[end_col];
1448
- std::sort(ix_arr + st, ix_arr + end + 1);
1449
- size_t *ptr_st = std::lower_bound(ix_arr + st, ix_arr + end + 1, Xc_ind[st_col]);
1450
-
1451
- for (size_t *row = ptr_st;
1452
- row != ix_arr + end + 1 && curr_pos != end_col + 1 && ind_end_col >= *row;
1453
- )
1454
- {
1455
- if (Xc_ind[curr_pos] == *row)
1456
- {
1457
- if (is_na_or_inf(Xc[curr_pos]))
1458
- {
1459
- temp = ix_arr[st_non_na];
1460
- ix_arr[st_non_na] = *row;
1461
- *row = temp;
1462
- st_non_na++;
1463
- }
1464
-
1465
- if (row == ix_arr + end || curr_pos == end_col) break;
1466
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *(++row)) - Xc_ind;
1467
- }
1468
-
1469
- else
1470
- {
1471
- if (Xc_ind[curr_pos] > *row)
1472
- row = std::lower_bound(row + 1, ix_arr + end + 1, Xc_ind[curr_pos]);
1473
- else
1474
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *row) - Xc_ind;
1475
- }
1476
- }
1477
-
1478
- return st_non_na;
1479
- }
1480
-
1481
- size_t move_NAs_to_front(size_t ix_arr[], size_t st, size_t end, int x[])
1482
- {
1483
- size_t st_non_na = st;
1484
- size_t temp;
1485
-
1486
- for (size_t row = st; row <= end; row++)
1487
- {
1488
- if (x[ix_arr[row]] < 0)
1489
- {
1490
- temp = ix_arr[st_non_na];
1491
- ix_arr[st_non_na] = ix_arr[row];
1492
- ix_arr[row] = temp;
1493
- st_non_na++;
1494
- }
1495
- }
1496
-
1497
- return st_non_na;
1498
- }
1499
-
1500
- size_t center_NAs(size_t *restrict ix_arr, size_t st_left, size_t st, size_t curr_pos)
1501
- {
1502
- size_t temp;
1503
- for (size_t row = st_left; row < st; row++)
1504
- {
1505
- temp = ix_arr[--curr_pos];
1506
- ix_arr[curr_pos] = ix_arr[row];
1507
- ix_arr[row] = temp;
1508
- }
1509
-
1510
- return curr_pos;
1511
- }
1512
-
1513
- void todense(size_t ix_arr[], size_t st, size_t end,
1514
- size_t col_num, double *restrict Xc, sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
1515
- double *restrict buffer_arr)
1516
- {
1517
- std::fill(buffer_arr, buffer_arr + (end - st + 1), (double)0);
1518
-
1519
- size_t st_col = Xc_indptr[col_num];
1520
- size_t end_col = Xc_indptr[col_num + 1] - 1;
1521
- size_t curr_pos = st_col;
1522
- size_t ind_end_col = Xc_ind[end_col];
1523
- size_t *ptr_st = std::lower_bound(ix_arr + st, ix_arr + end + 1, Xc_ind[st_col]);
1524
-
1525
- for (size_t *row = ptr_st;
1526
- row != ix_arr + end + 1 && curr_pos != end_col + 1 && ind_end_col >= *row;
1527
- )
1528
- {
1529
- if (Xc_ind[curr_pos] == *row)
1530
- {
1531
- buffer_arr[row - (ix_arr + st)] = Xc[curr_pos];
1532
- if (row == ix_arr + end || curr_pos == end_col) break;
1533
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *(++row)) - Xc_ind;
1534
- }
1535
-
1536
- else
1537
- {
1538
- if (Xc_ind[curr_pos] > *row)
1539
- row = std::lower_bound(row + 1, ix_arr + end + 1, Xc_ind[curr_pos]);
1540
- else
1541
- curr_pos = std::lower_bound(Xc_ind + curr_pos + 1, Xc_ind + end_col + 1, *row) - Xc_ind;
1542
- }
1543
- }
1544
- }
1545
-
1546
- /* Function to handle interrupt signals */
1547
- void set_interrup_global_variable(int s)
1548
- {
1549
- fprintf(stderr, "Error: procedure was interrupted\n");
1550
- #pragma omp critical
1551
- {
1552
- interrupt_switch = true;
1553
- }
1554
- }
1555
-
1556
- /* Return the #def'd constants from standard header. This is in order to determine if the return
1557
- value from the 'fit_model' function is a success or failure within Cython, which does not
1558
- allow importing #def'd macro values. */
1559
- int return_EXIT_SUCCESS()
1560
- {
1561
- return EXIT_SUCCESS;
1562
- }
1563
- int return_EXIT_FAILURE()
1564
- {
1565
- return EXIT_FAILURE;
1566
- }