isotree 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -1
  3. data/LICENSE.txt +2 -2
  4. data/README.md +32 -14
  5. data/ext/isotree/ext.cpp +144 -31
  6. data/ext/isotree/extconf.rb +7 -7
  7. data/lib/isotree/isolation_forest.rb +110 -30
  8. data/lib/isotree/version.rb +1 -1
  9. data/vendor/isotree/LICENSE +1 -1
  10. data/vendor/isotree/README.md +165 -27
  11. data/vendor/isotree/include/isotree.hpp +2111 -0
  12. data/vendor/isotree/include/isotree_oop.hpp +394 -0
  13. data/vendor/isotree/inst/COPYRIGHTS +62 -0
  14. data/vendor/isotree/src/RcppExports.cpp +525 -52
  15. data/vendor/isotree/src/Rwrapper.cpp +1931 -268
  16. data/vendor/isotree/src/c_interface.cpp +953 -0
  17. data/vendor/isotree/src/crit.hpp +4232 -0
  18. data/vendor/isotree/src/dist.hpp +1886 -0
  19. data/vendor/isotree/src/exp_depth_table.hpp +134 -0
  20. data/vendor/isotree/src/extended.hpp +1444 -0
  21. data/vendor/isotree/src/external_facing_generic.hpp +399 -0
  22. data/vendor/isotree/src/fit_model.hpp +2401 -0
  23. data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
  24. data/vendor/isotree/src/helpers_iforest.hpp +813 -0
  25. data/vendor/isotree/src/{impute.cpp → impute.hpp} +353 -122
  26. data/vendor/isotree/src/indexer.cpp +515 -0
  27. data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
  28. data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
  29. data/vendor/isotree/src/isoforest.hpp +1659 -0
  30. data/vendor/isotree/src/isotree.hpp +1804 -392
  31. data/vendor/isotree/src/isotree_exportable.hpp +99 -0
  32. data/vendor/isotree/src/merge_models.cpp +159 -16
  33. data/vendor/isotree/src/mult.hpp +1321 -0
  34. data/vendor/isotree/src/oop_interface.cpp +842 -0
  35. data/vendor/isotree/src/oop_interface.hpp +278 -0
  36. data/vendor/isotree/src/other_helpers.hpp +219 -0
  37. data/vendor/isotree/src/predict.hpp +1932 -0
  38. data/vendor/isotree/src/python_helpers.hpp +134 -0
  39. data/vendor/isotree/src/ref_indexer.hpp +154 -0
  40. data/vendor/isotree/src/robinmap/LICENSE +21 -0
  41. data/vendor/isotree/src/robinmap/README.md +483 -0
  42. data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
  43. data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1620 -0
  44. data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
  45. data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
  46. data/vendor/isotree/src/serialize.cpp +4300 -139
  47. data/vendor/isotree/src/sql.cpp +141 -59
  48. data/vendor/isotree/src/subset_models.cpp +174 -0
  49. data/vendor/isotree/src/utils.hpp +3808 -0
  50. data/vendor/isotree/src/xoshiro.hpp +467 -0
  51. data/vendor/isotree/src/ziggurat.hpp +405 -0
  52. metadata +38 -104
  53. data/vendor/cereal/LICENSE +0 -24
  54. data/vendor/cereal/README.md +0 -85
  55. data/vendor/cereal/include/cereal/access.hpp +0 -351
  56. data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
  57. data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
  58. data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
  59. data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
  60. data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
  61. data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
  62. data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
  63. data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
  64. data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
  65. data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
  66. data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
  67. data/vendor/cereal/include/cereal/details/util.hpp +0 -84
  68. data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
  69. data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
  70. data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
  71. data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
  72. data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
  73. data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
  74. data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
  75. data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
  76. data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
  77. data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
  78. data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
  79. data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
  80. data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
  81. data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
  82. data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
  83. data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
  84. data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
  85. data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
  86. data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
  87. data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
  88. data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
  89. data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
  90. data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
  91. data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
  92. data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
  93. data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
  94. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
  95. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
  96. data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
  97. data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
  98. data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
  99. data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
  100. data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
  101. data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
  102. data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
  103. data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
  104. data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
  105. data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
  106. data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
  107. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
  108. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
  109. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
  110. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
  111. data/vendor/cereal/include/cereal/macros.hpp +0 -154
  112. data/vendor/cereal/include/cereal/specialize.hpp +0 -139
  113. data/vendor/cereal/include/cereal/types/array.hpp +0 -79
  114. data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
  115. data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
  116. data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
  117. data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
  118. data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
  119. data/vendor/cereal/include/cereal/types/common.hpp +0 -129
  120. data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
  121. data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
  122. data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
  123. data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
  124. data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
  125. data/vendor/cereal/include/cereal/types/list.hpp +0 -62
  126. data/vendor/cereal/include/cereal/types/map.hpp +0 -36
  127. data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
  128. data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
  129. data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
  130. data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
  131. data/vendor/cereal/include/cereal/types/set.hpp +0 -103
  132. data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
  133. data/vendor/cereal/include/cereal/types/string.hpp +0 -61
  134. data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
  135. data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
  136. data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
  137. data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
  138. data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
  139. data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
  140. data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
  141. data/vendor/cereal/include/cereal/version.hpp +0 -52
  142. data/vendor/isotree/src/Makevars +0 -4
  143. data/vendor/isotree/src/crit.cpp +0 -912
  144. data/vendor/isotree/src/dist.cpp +0 -749
  145. data/vendor/isotree/src/extended.cpp +0 -790
  146. data/vendor/isotree/src/fit_model.cpp +0 -1090
  147. data/vendor/isotree/src/helpers_iforest.cpp +0 -324
  148. data/vendor/isotree/src/isoforest.cpp +0 -771
  149. data/vendor/isotree/src/mult.cpp +0 -607
  150. data/vendor/isotree/src/predict.cpp +0 -853
  151. data/vendor/isotree/src/utils.cpp +0 -1566
@@ -18,11 +18,29 @@
18
18
  * [5] https://sourceforge.net/projects/iforest/
19
19
  * [6] https://math.stackexchange.com/questions/3388518/expected-number-of-paths-required-to-separate-elements-in-a-binary-tree
20
20
  * [7] Quinlan, J. Ross. C4. 5: programs for machine learning. Elsevier, 2014.
21
- * [8] Cortes, David. "Distance approximation using Isolation Forests." arXiv preprint arXiv:1910.12362 (2019).
22
- * [9] Cortes, David. "Imputing missing values with unsupervised random trees." arXiv preprint arXiv:1911.06646 (2019).
21
+ * [8] Cortes, David.
22
+ * "Distance approximation using Isolation Forests."
23
+ * arXiv preprint arXiv:1910.12362 (2019).
24
+ * [9] Cortes, David.
25
+ * "Imputing missing values with unsupervised random trees."
26
+ * arXiv preprint arXiv:1911.06646 (2019).
27
+ * [10] https://math.stackexchange.com/questions/3333220/expected-average-depth-in-random-binary-tree-constructed-top-to-bottom
28
+ * [11] Cortes, David.
29
+ * "Revisiting randomized choices in isolation forests."
30
+ * arXiv preprint arXiv:2110.13402 (2021).
31
+ * [12] Guha, Sudipto, et al.
32
+ * "Robust random cut forest based anomaly detection on streams."
33
+ * International conference on machine learning. PMLR, 2016.
34
+ * [13] Cortes, David.
35
+ * "Isolation forests: looking beyond tree depth."
36
+ * arXiv preprint arXiv:2111.11639 (2021).
37
+ * [14] Ting, Kai Ming, Yue Zhu, and Zhi-Hua Zhou.
38
+ * "Isolation kernel and its effect on SVM"
39
+ * Proceedings of the 24th ACM SIGKDD
40
+ * International Conference on Knowledge Discovery & Data Mining. 2018.
23
41
  *
24
42
  * BSD 2-Clause License
25
- * Copyright (c) 2020, David Cortes
43
+ * Copyright (c) 2019-2022, David Cortes
26
44
  * All rights reserved.
27
45
  * Redistribution and use in source and binary forms, with or without
28
46
  * modification, are permitted provided that the following conditions are met:
@@ -44,219 +62,4362 @@
44
62
  */
45
63
  #include "isotree.hpp"
46
64
 
47
- #ifdef _ENABLE_CEREAL
65
+ /* TODO: add option to serialize as JSON file */
48
66
 
67
+ using std::uint8_t;
68
+ using std::int8_t;
69
+ using std::uint16_t;
70
+ using std::int16_t;
71
+ using std::uint32_t;
72
+ using std::int32_t;
73
+ using std::uint64_t;
74
+ using std::int64_t;
49
75
 
50
- template <class T>
51
- void serialize_obj(T &obj, std::ostream &output)
76
+ /* https://stackoverflow.com/questions/16696297/ftell-at-a-position-past-2gb */
77
+ /* TODO: do CLANG and ICC have similar functionality? */
78
+ #if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) && (SIZE_MAX >= UINT64_MAX)
79
+ # ifdef _MSC_VER
80
+ # include <stdio.h>
81
+ # define fseek_ _fseeki64
82
+ # define ftell_ _ftelli64
83
+ # define fpos_t_ __int64
84
+ # elif defined(__GNUG__) || defined(__GNUC__)
85
+ # ifndef _FILE_OFFSET_BITS
86
+ # define _FILE_OFFSET_BITS 64
87
+ # endif
88
+ # include <stdio.h>
89
+ # define fseek_ fseeko
90
+ # define ftell_ ftello
91
+ # define fpos_t_ off_t
92
+ # else
93
+ using std::feof;
94
+ using std::fwrite;
95
+ using std::fread;
96
+ using std::fopen;
97
+ using std::fclose;
98
+ using std::ftell;
99
+ using std::fseek;
100
+ # define fseek_ fseek
101
+ # define ftell_ ftell
102
+ # define fpos_t_ long /* <- might overflow with large files */
103
+ # endif
104
+ #else
105
+ using std::feof;
106
+ using std::fwrite;
107
+ using std::fread;
108
+ using std::fopen;
109
+ using std::fclose;
110
+ using std::ftell;
111
+ using std::fseek;
112
+ # define fseek_ fseek
113
+ # define ftell_ ftell
114
+ # define fpos_t_ long
115
+ #endif
116
+
117
+ #if defined(DBL_MANT_DIG) && (DBL_MANT_DIG == 53) && (FLT_RADIX == 2)
118
+ #define HAS_IEEE_DOUBLE
119
+ #endif
120
+
121
+ #if INT_MAX == INT16_MAX
122
+ #define HAS_INT16
123
+ #elif INT_MAX == INT32_MAX
124
+ #define HAS_INT32
125
+ #elif INT_MAX == INT64_MAX
126
+ #define HAS_INT64
127
+ #else
128
+ #define HAS_INT_OTHER
129
+ #endif
130
+
131
+ #if SIZE_MAX == UINT32_MAX
132
+ #define HAS_SIZE32
133
+ #elif SIZE_MAX == UINT64_MAX
134
+ #define HAS_SIZE64
135
+ #else
136
+ #define HAS_SIZE_OTHER
137
+ #endif
138
+
139
+ const char *watermark = "isotree_model";
140
+ const char *incomplete_watermark = "incomplete___";
141
+ static const size_t SIZE_WATERMARK = 13;
142
+ enum DoubleTypeStructure {IsNormalDouble=1, IsAbnormalDouble=2};
143
+ enum PlatformSize {Is16Bit=1, Is32Bit=2, Is64Bit=3, IsOther=4};
144
+ enum PlatformEndianness {PlatformLittleEndian=1, PlatformBigEndian=2};
145
+ enum ModelTypes {
146
+ IsoForestModel=1,
147
+ ExtIsoForestModel=2,
148
+ ImputerModel=3,
149
+ IndexerModel=5,
150
+ AllObjectsCombined=4
151
+ };
152
+ enum EndingIndicator {
153
+ EndsHere=0,
154
+ HasSingleVarModelNext=1,
155
+ HasExtModelNext=2,
156
+ HasImputerNext=3,
157
+ HasIndexerNext=11,
158
+ HasSingleVarModelPlusImputerNext=4,
159
+ HasSingleVarModelPlusIndexerNext=12,
160
+ HasSingleVarModelPlusImputerPlusIndexerNext=13,
161
+ HasExtModelPlusImputerNext=5,
162
+ HasExtModelPlusIndexerNext=14,
163
+ HasExtModelPlusImputerPlusIndexerNext=15,
164
+ HasSingleVarModelPlusMetadataNext=6,
165
+ HasSingleVarModelPlusIndexerPlusMetadataNext=16,
166
+ HasExtModelPlusMetadataNext=7,
167
+ HasExtModelPlusIndexerPlusMetadataNext=17,
168
+ HasSingleVarModelPlusImputerPlusMetadataNext=8,
169
+ HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext=18,
170
+ HasExtModelPlusImputerPlusMetadataNext=9,
171
+ HasExtModelPlusImputerPlusIndexerPlusMetadataNext=19,
172
+ HasMoreTreesNext=10
173
+ };
174
+
175
+ #ifdef _MSC_VER
176
+ #include <stdlib.h>
177
+ void swap16b(char *bytes) noexcept
178
+ {
179
+ if (std::numeric_limits<unsigned short>::max() == UINT16_MAX) {
180
+ unsigned short temp;
181
+ memcpy(&temp, bytes, sizeof(unsigned short));
182
+ temp = _byteswap_ushort(temp);
183
+ memcpy(bytes, &temp, sizeof(unsigned short));
184
+ }
185
+
186
+ else {
187
+ std::swap(bytes[0], bytes[1]);
188
+ }
189
+ }
190
+ void swap32b(char *bytes) noexcept
191
+ {
192
+ if (std::numeric_limits<unsigned long>::max() == UINT32_MAX) {
193
+ unsigned long temp;
194
+ memcpy(&temp, bytes, sizeof(unsigned long));
195
+ temp = _byteswap_ulong(temp);
196
+ memcpy(bytes, &temp, sizeof(unsigned long));
197
+ }
198
+
199
+ else {
200
+ std::swap(bytes[0], bytes[3]);
201
+ std::swap(bytes[1], bytes[2]);
202
+ }
203
+ }
204
+ void swap64b(char *bytes) noexcept
205
+ {
206
+ unsigned __int64 temp;
207
+ memcpy(&temp, bytes, sizeof(unsigned __int64));
208
+ temp = _byteswap_uint64(temp);
209
+ memcpy(bytes, &temp, sizeof(unsigned __int64));
210
+ }
211
+ #elif defined(__GNUC__) && (__GNUC__ >= 5) && !defined(_WIN32)
212
+ void swap16b(char *bytes) noexcept
213
+ {
214
+ uint16_t temp;
215
+ memcpy(&temp, bytes, sizeof(uint16_t));
216
+ temp = __builtin_bswap16(temp);
217
+ memcpy(bytes, &temp, sizeof(uint16_t));
218
+ }
219
+ void swap32b(char *bytes) noexcept
220
+ {
221
+ uint32_t temp;
222
+ memcpy(&temp, bytes, sizeof(uint32_t));
223
+ temp = __builtin_bswap32(temp);
224
+ memcpy(bytes, &temp, sizeof(uint32_t));
225
+ }
226
+ void swap64b(char *bytes) noexcept
227
+ {
228
+ uint64_t temp;
229
+ memcpy(&temp, bytes, sizeof(uint64_t));
230
+ temp = __builtin_bswap64(temp);
231
+ memcpy(bytes, &temp, sizeof(uint64_t));
232
+ }
233
+ #else
234
+ void swap16b(char *bytes) noexcept
235
+ {
236
+ std::swap(bytes[0], bytes[1]);
237
+ }
238
+ void swap32b(char *bytes) noexcept
239
+ {
240
+ std::swap(bytes[0], bytes[3]);
241
+ std::swap(bytes[1], bytes[2]);
242
+ }
243
+ void swap64b(char *bytes) noexcept
244
+ {
245
+ std::swap(bytes[0], bytes[7]);
246
+ std::swap(bytes[1], bytes[6]);
247
+ std::swap(bytes[2], bytes[5]);
248
+ std::swap(bytes[3], bytes[4]);
249
+ }
250
+ #endif
251
+ void endian_swap(float &bytes) noexcept
252
+ {
253
+ #ifdef HAS_IEEE_DOUBLE
254
+ swap32b((char*)&bytes);
255
+ #else
256
+ std::reverse((char*)&bytes, (char*)&bytes + sizeof(float));
257
+ #endif
258
+ }
259
+ void endian_swap(double &bytes) noexcept
260
+ {
261
+ #ifdef HAS_IEEE_DOUBLE
262
+ swap64b((char*)&bytes);
263
+ #else
264
+ std::reverse((char*)&bytes, (char*)&bytes + sizeof(double));
265
+ #endif
266
+ }
267
+ void endian_swap(uint8_t &bytes) noexcept
268
+ {
269
+ return;
270
+ }
271
+ void endian_swap(uint16_t &bytes) noexcept
272
+ {
273
+ swap16b((char*)&bytes);
274
+ }
275
+ void endian_swap(uint32_t &bytes) noexcept
276
+ {
277
+ swap32b((char*)&bytes);
278
+ }
279
+ void endian_swap(uint64_t &bytes) noexcept
280
+ {
281
+ swap64b((char*)&bytes);
282
+ }
283
+ void endian_swap(int8_t &bytes) noexcept
284
+ {
285
+ return;
286
+ }
287
+ void endian_swap(int16_t &bytes) noexcept
288
+ {
289
+ swap16b((char*)&bytes);
290
+ }
291
+ void endian_swap(int32_t &bytes) noexcept
292
+ {
293
+ swap32b((char*)&bytes);
294
+ }
295
+ void endian_swap(int64_t &bytes) noexcept
296
+ {
297
+ swap64b((char*)&bytes);
298
+ }
299
+ /* Note: on macOS, some compilers will take 'size_t' as different from 'uin64_t',
300
+ hence it needs a separate one. However, in other compiler and platforms this
301
+ leads to a a duplicated function definition, and thus needs this separation
302
+ in names (otherwise, compilers such as GCC will not compile it). */
303
+ void endian_swap_size_t(char *bytes) noexcept
304
+ {
305
+ #if (SIZE_MAX == UINT32_MAX)
306
+ swap32b(bytes);
307
+ #elif (SIZE_MAX == UINT64_MAX)
308
+ swap64b(bytes);
309
+ #else
310
+ std::reverse(bytes, bytes + sizeof(size_t));
311
+ #endif
312
+ }
313
+ void endian_swap_int(char *bytes) noexcept
52
314
  {
53
- cereal::BinaryOutputArchive archive(output);
54
- archive(obj);
315
+ #if (INT_MAX == INT16_MAX)
316
+ swap16b(bytes);
317
+ #elif (INT_MAX == INT32_MAX)
318
+ swap32b(bytes);
319
+ #elif (SIZE_MAX == INT64_MAX)
320
+ swap64b(bytes);
321
+ #else
322
+ std::reverse(bytes, bytes + sizeof(int));
323
+ #endif
55
324
  }
56
325
  template <class T>
57
- std::string serialize_obj(T &obj)
326
+ void endian_swap(T &bytes) noexcept
327
+ {
328
+ std::reverse((char*)&bytes, (char*)&bytes + sizeof(T));
329
+ }
330
+
331
+ template <class dtype>
332
+ void swap_endianness(dtype *ptr, size_t n_els) noexcept
58
333
  {
59
- std::stringstream ss;
334
+ #ifndef __GNUC__
335
+ if (std::is_same<dtype, size_t>::value)
336
+ {
337
+ for (size_t ix = 0; ix < n_els; ix++)
338
+ endian_swap_size_t((char*)&ptr[ix]);
339
+ return;
340
+ }
341
+
342
+ else if (std::is_same<dtype, int>::value)
60
343
  {
61
- cereal::BinaryOutputArchive archive(ss);
62
- archive(obj);
344
+ for (size_t ix = 0; ix < n_els; ix++)
345
+ endian_swap_int((char*)&ptr[ix]);
346
+ return;
63
347
  }
64
- return ss.str();
348
+ #endif
349
+
350
+ for (size_t ix = 0; ix < n_els; ix++)
351
+ endian_swap(ptr[ix]);
65
352
  }
66
- template <class T, class I>
67
- void deserialize_obj(T &output, I &serialized)
353
+
354
+ const char* set_return_position(const char *in) noexcept
68
355
  {
69
- cereal::BinaryInputArchive archive(serialized);
70
- archive(output);
356
+ return in;
71
357
  }
72
- template <class T>
73
- void deserialize_obj(T &output, std::string &serialized, bool move_str)
358
+
359
+ char* set_return_position(char *in) noexcept
74
360
  {
75
- std::stringstream ss;
76
- if (move_str)
77
- ss.str(std::move(serialized));
78
- else
79
- /* Bug with GCC4 not implementing the move method for stringsreams
80
- https://stackoverflow.com/questions/50926506/deleted-function-std-basic-stringstream-in-linux-with-g
81
- https://github.com/david-cortes/isotree/issues/7 */
82
- // ss = std::stringstream(serialized); /* <- fails with GCC4, CRAN complains */
83
- {
84
- std::string str_copy = serialized;
85
- ss.str(str_copy);
86
- }
87
- deserialize_obj(output, ss);
361
+ return in;
88
362
  }
89
363
 
364
+ fpos_t_ set_return_position(FILE *in)
365
+ {
366
+ return ftell_(in);
367
+ }
90
368
 
91
- /* Serialization and de-serialization functions using Cereal
92
- *
93
- * Parameters
94
- * ==========
95
- * - model (in)
96
- * A model object to serialize, after being fitted through function 'fit_iforest'.
97
- * - imputer (in)
98
- * An imputer object to serialize, after being fitted through function 'fit_iforest'
99
- * with 'build_imputer=true'.
100
- * - output_obj (out)
101
- * An already-allocated object into which a serialized object of the same class will
102
- * be de-serialized. The contents of this object will be overwritten. Should be initialized
103
- * through the default constructor (e.g. 'new ExtIsoForest' or 'ExtIsoForest()').
104
- * - output (out)
105
- * An output stream (any type will do) in which to save/persist/serialize the
106
- * model or imputer object using the cereal library. In the functions that do not
107
- * take this parameter, it will be returned as a string containing the raw bytes.
108
- * - serialized (in)
109
- * The input stream which contains the serialized/saved/persisted model or imputer object,
110
- * which will be de-serialized into 'output'.
111
- * - output_file_path
112
- * File name into which to write the serialized model or imputer object as raw bytes.
113
- * Note that, on Windows, passing non-ASCII characters will fail, and in such case,
114
- * you might instead want to use instead the versions that take 'wchar_t', which are
115
- * only available in the MSVC compiler (it uses 'std::ofstream' internally, which as
116
- * of C++20, is not required by the standard to accept 'wchar_t' in its constructor).
117
- * Be aware that it will only write raw bytes, thus metadata such as CPU endianness
118
- * will be lost. If you need to transfer files berween e.g. an x86 computer and a SPARC
119
- * server, you'll have to use other methods.
120
- * This functionality is intended for being easily wrapper into scripting languages
121
- * without having to copy the contents to to some intermediate language.
122
- * - input_file_path
123
- * File name from which to read a serialized model or imputer object as raw bytes.
124
- * See the description for 'output_file_path' for more details.
125
- * - move_str
126
- * Whether to move ('std::move') the contents of the string passed as input in order
127
- * to speed things up and avoid making a redundant copy of the raw bytes. If passing
128
- * 'true', the input string will be rendered empty afterwards.
129
- */
130
- void serialize_isoforest(IsoForest &model, std::ostream &output)
369
+ #define pos_type_istream decltype(std::declval<std::istream>().tellg())
370
+
371
+ pos_type_istream set_return_position(std::istream &in)
372
+ {
373
+ return in.tellg();
374
+ }
375
+
376
+ pos_type_istream set_return_position(std::ostream &in)
131
377
  {
132
- serialize_obj(model, output);
378
+ return in.tellp();
133
379
  }
134
- void serialize_isoforest(IsoForest &model, const char *output_file_path)
380
+
381
+ void return_to_position(const char *&in, const char *saved_position) noexcept
382
+ {
383
+ in = saved_position;
384
+ }
385
+
386
+ void return_to_position(char *&in, char *saved_position) noexcept
135
387
  {
136
- std::ofstream output(output_file_path);
137
- serialize_obj(model, output);
388
+ in = saved_position;
138
389
  }
139
- std::string serialize_isoforest(IsoForest &model)
390
+
391
+ void return_to_position(FILE *&in, fpos_t_ saved_position)
140
392
  {
141
- return serialize_obj(model);
393
+ fseek_(in, saved_position, SEEK_SET);
142
394
  }
143
- void deserialize_isoforest(IsoForest &output_obj, std::istream &serialized)
395
+
396
+ void return_to_position(std::istream &in, pos_type_istream saved_position)
144
397
  {
145
- deserialize_obj(output_obj, serialized);
398
+ in.seekg(saved_position);
146
399
  }
147
- void deserialize_isoforest(IsoForest &output_obj, const char *input_file_path)
400
+
401
+ void return_to_position(std::ostream &in, pos_type_istream saved_position)
148
402
  {
149
- std::ifstream serialized(input_file_path);
150
- deserialize_obj(output_obj, serialized);
403
+ in.seekp(saved_position);
151
404
  }
152
- void deserialize_isoforest(IsoForest &output_obj, std::string &serialized, bool move_str)
405
+
406
+
407
+ bool has_wchar_t_file_serializers() noexcept
153
408
  {
154
- deserialize_obj(output_obj, serialized, move_str);
409
+ #ifdef WCHAR_T_FUNS
410
+ return true;
411
+ #else
412
+ return false;
413
+ #endif
155
414
  }
156
415
 
416
+ void throw_errno()
417
+ {
418
+ throw std::runtime_error("Error " + std::to_string(errno) + " " + strerror(errno) + "\n");
419
+ }
157
420
 
421
+ void throw_ferror(FILE *file)
422
+ {
423
+ if (!errno) fflush(file);
424
+ throw_errno();
425
+ }
158
426
 
159
- void serialize_ext_isoforest(ExtIsoForest &model, std::ostream &output)
427
+ void throw_feoferr()
160
428
  {
161
- serialize_obj(model, output);
429
+ throw std::runtime_error("Error: file ended unexpectedly.\n");
162
430
  }
163
- void serialize_ext_isoforest(ExtIsoForest &model, const char *output_file_path)
431
+
432
+ template <class dtype, class saved_type>
433
+ void convert_dtype(void *ptr_write_, std::vector<char> &buffer, size_t n_els)
164
434
  {
165
- std::ofstream output(output_file_path);
166
- serialize_obj(model, output);
435
+ dtype *ptr_write = (dtype*)ptr_write_;
436
+ saved_type *ptr_read = (saved_type*)buffer.data();
437
+
438
+ if ((sizeof(dtype) <= sizeof(saved_type)) &&
439
+ (saved_type)std::numeric_limits<dtype>::max() < std::numeric_limits<saved_type>::max())
440
+ {
441
+ const saved_type maxval = (saved_type) std::numeric_limits<dtype>::max();
442
+ for (size_t el = 0; el < n_els; el++)
443
+ if (unlikely(ptr_read[el] > maxval))
444
+ throw std::runtime_error("Error: serialized model has values too large for the current machine's types.\n");
445
+ }
446
+
447
+ for (size_t el = 0; el < n_els; el++)
448
+ ptr_write[el] = (dtype)ptr_read[el];
167
449
  }
168
- std::string serialize_ext_isoforest(ExtIsoForest &model)
450
+
451
+ template <class dtype>
452
+ void write_bytes(const void *ptr, const size_t n_els, char *&out) noexcept
169
453
  {
170
- return serialize_obj(model);
454
+ if (n_els == 0) return;
455
+ memcpy(out, ptr, n_els * sizeof(dtype));
456
+ out += n_els * sizeof(dtype);
171
457
  }
172
- void deserialize_ext_isoforest(ExtIsoForest &output_obj, std::istream &serialized)
458
+
459
+ template <class dtype>
460
+ void write_bytes(const void *ptr, const size_t n_els, std::ostream &out)
173
461
  {
174
- deserialize_obj(output_obj, serialized);
462
+ if (n_els == 0) return;
463
+ out.write((char*)ptr, n_els * sizeof(dtype));
464
+ if (unlikely(out.bad())) throw_errno();
175
465
  }
176
- void deserialize_ext_isoforest(ExtIsoForest &output_obj, const char *input_file_path)
466
+
467
+ template <class dtype>
468
+ void write_bytes(const void *ptr, const size_t n_els, FILE *&out)
177
469
  {
178
- std::ifstream serialized(input_file_path);
179
- deserialize_obj(output_obj, serialized);
470
+ if (n_els == 0) return;
471
+ size_t n_written = fwrite(ptr, sizeof(dtype), n_els, out);
472
+ if (n_written != n_els || ferror(out)) throw_ferror(out);
180
473
  }
181
- void deserialize_ext_isoforest(ExtIsoForest &output_obj, std::string &serialized, bool move_str)
474
+
475
+ template <class dtype>
476
+ void read_bytes(void *ptr, const size_t n_els, const char *&in) noexcept
182
477
  {
183
- deserialize_obj(output_obj, serialized, move_str);
478
+ if (n_els == 0) return;
479
+ memcpy(ptr, in, n_els * sizeof(dtype));
480
+ in += n_els * sizeof(dtype);
184
481
  }
185
482
 
483
+ template <class dtype, class saved_type>
484
+ void read_bytes(void *ptr, const size_t n_els, const char *&in, std::vector<char> &buffer, const bool diff_endian)
485
+ {
486
+ if (std::is_same<dtype, saved_type>::value)
487
+ {
488
+ read_bytes<dtype>(ptr, n_els, in);
489
+ if (unlikely(diff_endian)) swap_endianness((dtype*)ptr, n_els);
490
+ return;
491
+ }
492
+ if (n_els == 0) return;
493
+ if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
494
+ buffer.resize((size_t)2 * n_els * sizeof(saved_type));
495
+ memcpy(buffer.data(), in, n_els * sizeof(saved_type));
496
+ in += n_els * sizeof(saved_type);
186
497
 
498
+ if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
499
+ convert_dtype<dtype, saved_type>(ptr, buffer, n_els);
500
+ }
187
501
 
502
+ template <class dtype>
503
+ void read_bytes(void *ptr, const size_t n_els, char *&in) noexcept
504
+ {
505
+ if (n_els == 0) return;
506
+ memcpy(ptr, in, n_els * sizeof(dtype));
507
+ in += n_els * sizeof(dtype);
508
+ }
188
509
 
189
- void serialize_imputer(Imputer &imputer, std::ostream &output)
510
+ template <class dtype, class saved_type>
511
+ void read_bytes(void *ptr, const size_t n_els, char *&in, std::vector<char> &buffer, const bool diff_endian)
190
512
  {
191
- serialize_obj(imputer, output);
513
+ if (std::is_same<dtype, saved_type>::value)
514
+ {
515
+ read_bytes<dtype>(ptr, n_els, in);
516
+ if (unlikely(diff_endian)) swap_endianness((dtype*)ptr, n_els);
517
+ return;
518
+ }
519
+ if (n_els == 0) return;
520
+ if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
521
+ buffer.resize((size_t)2 * n_els * sizeof(saved_type));
522
+ memcpy(buffer.data(), in, n_els * sizeof(saved_type));
523
+ in += n_els * sizeof(saved_type);
524
+
525
+ if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
526
+ convert_dtype<dtype, saved_type>(ptr, buffer, n_els);
192
527
  }
193
- void serialize_imputer(Imputer &imputer, const char *output_file_path)
528
+
529
+ template <class dtype>
530
+ void read_bytes(void *ptr, const size_t n_els, std::istream &in)
194
531
  {
195
- std::ofstream output(output_file_path);
196
- serialize_obj(imputer, output);
532
+ if (n_els == 0) return;
533
+ in.read((char*)ptr, n_els * sizeof(dtype));
534
+ if (unlikely(in.bad())) throw_errno();
197
535
  }
198
- std::string serialize_imputer(Imputer &imputer)
536
+
537
+ template <class dtype, class saved_type>
538
+ void read_bytes(void *ptr, const size_t n_els, std::istream &in, std::vector<char> &buffer, const bool diff_endian)
199
539
  {
200
- return serialize_obj(imputer);
540
+ if (std::is_same<dtype, saved_type>::value)
541
+ {
542
+ read_bytes<dtype>(ptr, n_els, in);
543
+ if (unlikely(diff_endian)) swap_endianness((dtype*)ptr, n_els);
544
+ return;
545
+ }
546
+ if (n_els == 0) return;
547
+ if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
548
+ buffer.resize((size_t)2 * n_els * sizeof(saved_type));
549
+ in.read((char*)buffer.data(), n_els * sizeof(saved_type));
550
+ if (unlikely(in.bad())) throw_errno();
551
+
552
+ if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
553
+ convert_dtype<dtype, saved_type>(ptr, buffer, n_els);
201
554
  }
202
- void deserialize_imputer(Imputer &output_obj, std::istream &serialized)
555
+
556
+ template <class dtype>
557
+ void read_bytes(void *ptr, const size_t n_els, FILE *&in)
203
558
  {
204
- deserialize_obj(output_obj, serialized);
559
+ if (n_els == 0) return;
560
+ if (unlikely(feof(in))) throw_feoferr();
561
+ size_t n_read = fread(ptr, sizeof(dtype), n_els, in);
562
+ if (unlikely(n_read != n_els || ferror(in))) throw_ferror(in);
205
563
  }
206
- void deserialize_imputer(Imputer &output_obj, const char *input_file_path)
564
+
565
+ template <class dtype, class saved_type>
566
+ void read_bytes(void *ptr, const size_t n_els, FILE *&in, std::vector<char> &buffer, const bool diff_endian)
207
567
  {
208
- std::ifstream serialized(input_file_path);
209
- deserialize_obj(output_obj, serialized);
568
+ if (std::is_same<dtype, saved_type>::value)
569
+ {
570
+ read_bytes<dtype>(ptr, n_els, in);
571
+ if (unlikely(diff_endian)) swap_endianness((dtype*)ptr, n_els);
572
+ return;
573
+ }
574
+ if (n_els == 0) return;
575
+ if (unlikely(feof(in))) throw_feoferr();
576
+ if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
577
+ buffer.resize((size_t)2 * n_els * sizeof(saved_type));
578
+ size_t n_read = fread(buffer.data(), sizeof(saved_type), n_els, in);
579
+ if (unlikely(n_read != n_els || ferror(in))) throw_ferror(in);
580
+
581
+ if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
582
+ convert_dtype<dtype, saved_type>(ptr, buffer, n_els);
210
583
  }
211
- void deserialize_imputer(Imputer &output_obj, std::string &serialized, bool move_str)
584
+
585
+ template <class dtype>
586
+ void read_bytes(std::vector<dtype> &vec, const size_t n_els, const char *&in)
212
587
  {
213
- deserialize_obj(output_obj, serialized, move_str);
588
+ if (n_els)
589
+ vec.assign((dtype*)in, (dtype*)in + n_els);
590
+ else
591
+ vec.clear();
592
+ vec.shrink_to_fit();
593
+ in += n_els * sizeof(dtype);
214
594
  }
215
595
 
596
+ template <class dtype, class saved_type>
597
+ void read_bytes(std::vector<dtype> &vec, const size_t n_els, const char *&in, std::vector<char> &buffer, const bool diff_endian)
598
+ {
599
+ if (std::is_same<dtype, saved_type>::value)
600
+ {
601
+ read_bytes<dtype>(vec, n_els, in);
602
+ if (unlikely(diff_endian)) swap_endianness(vec.data(), n_els);
603
+ return;
604
+ }
605
+ if (n_els) {
606
+ if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
607
+ buffer.resize((size_t)2 * n_els * sizeof(saved_type));
608
+ read_bytes<saved_type>(buffer.data(), n_els, in);
609
+ vec.resize(n_els);
610
+ vec.shrink_to_fit();
611
+
612
+ if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
613
+ convert_dtype<dtype, saved_type>(vec.data(), buffer, n_els);
614
+ }
615
+
616
+ else {
617
+ vec.clear();
618
+ vec.shrink_to_fit();
619
+ }
216
620
 
217
- #ifdef _MSC_VER
218
- void serialize_isoforest(IsoForest &model, const wchar_t *output_file_path)
621
+ in += n_els * sizeof(saved_type);
622
+ }
623
+
624
+ template <class dtype>
625
+ void read_bytes(std::vector<dtype> &vec, const size_t n_els, std::istream &in)
219
626
  {
220
- std::ofstream output(output_file_path);
221
- serialize_obj(model, output);
627
+ vec.resize(n_els);
628
+ vec.shrink_to_fit();
629
+
630
+ if (n_els) {
631
+ in.read((char*)vec.data(), n_els * sizeof(dtype));
632
+ if (unlikely(in.bad())) throw_errno();
633
+ }
222
634
  }
223
- void deserialize_isoforest(IsoForest &output_obj, const wchar_t *input_file_path)
635
+
636
+ template <class dtype, class saved_type>
637
+ void read_bytes(std::vector<dtype> &vec, const size_t n_els, std::istream &in, std::vector<char> &buffer, const bool diff_endian)
224
638
  {
225
- std::ifstream serialized(input_file_path);
226
- deserialize_obj(output_obj, serialized);
639
+ if (std::is_same<dtype, saved_type>::value)
640
+ {
641
+ read_bytes<dtype>(vec, n_els, in);
642
+ if (unlikely(diff_endian)) swap_endianness(vec.data(), n_els);
643
+ return;
644
+ }
645
+ vec.resize(n_els);
646
+ vec.shrink_to_fit();
647
+
648
+ if (n_els) {
649
+ if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
650
+ buffer.resize((size_t)2 * n_els * sizeof(saved_type));
651
+ in.read(buffer.data(), n_els * sizeof(saved_type));
652
+ if (unlikely(in.bad())) throw_errno();
653
+
654
+ if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
655
+ convert_dtype<dtype, saved_type>(vec.data(), buffer, n_els);
656
+ }
227
657
  }
228
- void serialize_ext_isoforest(ExtIsoForest &model, const wchar_t *output_file_path)
658
+
659
+ template <class dtype>
660
+ void read_bytes(std::vector<dtype> &vec, const size_t n_els, FILE *&in)
229
661
  {
230
- std::ofstream output(output_file_path);
231
- serialize_obj(model, output);
662
+ vec.resize(n_els);
663
+ vec.shrink_to_fit();
664
+
665
+ if (n_els) {
666
+ if (unlikely(feof(in))) throw_feoferr();
667
+ size_t n_read = fread(vec.data(), sizeof(dtype), n_els, in);
668
+ if (unlikely(n_read != n_els || ferror(in))) throw_ferror(in);
669
+ }
232
670
  }
233
- void deserialize_ext_isoforest(ExtIsoForest &output_obj, const wchar_t *input_file_path)
671
+
672
+ template <class dtype, class saved_type>
673
+ void read_bytes(std::vector<dtype> &vec, const size_t n_els, FILE *&in, std::vector<char> &buffer, const bool diff_endian)
234
674
  {
235
- std::ifstream serialized(input_file_path);
236
- deserialize_obj(output_obj, serialized);
675
+ if (std::is_same<dtype, saved_type>::value)
676
+ {
677
+ read_bytes<dtype>(vec, n_els, in);
678
+ if (unlikely(diff_endian)) swap_endianness(vec.data(), n_els);
679
+ return;
680
+ }
681
+ vec.resize(n_els);
682
+ vec.shrink_to_fit();
683
+
684
+ if (n_els) {
685
+ if (unlikely(feof(in))) throw_feoferr();
686
+ if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
687
+ buffer.resize((size_t)2 * n_els * sizeof(saved_type));
688
+
689
+ size_t n_read = fread(buffer.data(), sizeof(saved_type), n_els, in);
690
+ if (unlikely(n_read != n_els || ferror(in))) throw_ferror(in);
691
+
692
+ if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
693
+ convert_dtype<dtype, saved_type>(vec.data(), buffer, n_els);
694
+ }
237
695
  }
238
- void serialize_imputer(Imputer &imputer, const wchar_t *output_file_path)
696
+
697
+ size_t get_size_node(const IsoTree &node) noexcept
239
698
  {
240
- std::ofstream output(output_file_path);
241
- serialize_obj(imputer, output);
699
+ size_t n_bytes = 0;
700
+ n_bytes += sizeof(uint8_t);
701
+ n_bytes += sizeof(int);
702
+ n_bytes += sizeof(double) * 6;
703
+ n_bytes += sizeof(size_t) * 4;
704
+ n_bytes += sizeof(signed char) * node.cat_split.size();
705
+ return n_bytes;
242
706
  }
243
- void deserialize_imputer(Imputer &output_obj, const wchar_t *input_file_path)
707
+
708
+ template <class otype>
709
+ void serialize_node(const IsoTree &node, otype &out)
244
710
  {
245
- std::ifstream serialized(input_file_path);
246
- deserialize_obj(output_obj, serialized);
711
+ if (interrupt_switch) return;
712
+
713
+ uint8_t data_en = (uint8_t)node.col_type;
714
+ write_bytes<uint8_t>((void*)&data_en, (size_t)1, out);
715
+
716
+ write_bytes<int>((void*)&node.chosen_cat, (size_t)1, out);
717
+
718
+ double data_doubles[] = {
719
+ node.num_split,
720
+ node.pct_tree_left,
721
+ node.score,
722
+ node.range_low,
723
+ node.range_high,
724
+ node.remainder
725
+ };
726
+ write_bytes<double>((void*)data_doubles, (size_t)6, out);
727
+
728
+ size_t data_sizets[] = {
729
+ node.col_num,
730
+ node.tree_left,
731
+ node.tree_right,
732
+ node.cat_split.size()
733
+ };
734
+ write_bytes<size_t>((void*)data_sizets, (size_t)4, out);
735
+
736
+ if (node.cat_split.size())
737
+ write_bytes<signed char>((void*)node.cat_split.data(), node.cat_split.size(), out);
247
738
  }
248
- bool has_msvc()
739
+
740
+ template <class itype>
741
+ void deserialize_node(IsoTree &node, itype &in)
249
742
  {
250
- return true;
743
+ if (interrupt_switch) return;
744
+
745
+ uint8_t data_en;
746
+ read_bytes<uint8_t>((void*)&data_en, (size_t)1, in);
747
+ node.col_type = (ColType)data_en;
748
+
749
+ read_bytes<int>((void*)&node.chosen_cat, (size_t)1, in);
750
+
751
+ double data_doubles[6];
752
+ read_bytes<double>((void*)data_doubles, (size_t)6, in);
753
+ node.num_split = data_doubles[0];
754
+ node.pct_tree_left = data_doubles[1];
755
+ node.score = data_doubles[2];
756
+ node.range_low = data_doubles[3];
757
+ node.range_high = data_doubles[4];
758
+ node.remainder = data_doubles[5];
759
+
760
+ size_t data_sizets[4];
761
+ read_bytes<size_t>((void*)data_sizets, (size_t)4, in);
762
+ node.col_num = data_sizets[0];
763
+ node.tree_left = data_sizets[1];
764
+ node.tree_right = data_sizets[2];
765
+ read_bytes<signed char>(node.cat_split, data_sizets[3], in);
251
766
  }
252
767
 
253
- #else
254
- bool has_msvc()
768
+ template <class itype, class saved_int_t, class saved_size_t>
769
+ void deserialize_node(IsoTree &node, itype &in, std::vector<char> &buffer, const bool diff_endian)
255
770
  {
256
- return false;
771
+ if (interrupt_switch) return;
772
+
773
+ uint8_t data_en;
774
+ read_bytes<uint8_t>((void*)&data_en, (size_t)1, in);
775
+ node.col_type = (ColType)data_en;
776
+
777
+ read_bytes<int, saved_int_t>((void*)&node.chosen_cat, (size_t)1, in, buffer, diff_endian);
778
+
779
+ double data_doubles[6];
780
+ read_bytes<double, double>((void*)data_doubles, (size_t)6, in, buffer, diff_endian);
781
+ node.num_split = data_doubles[0];
782
+ node.pct_tree_left = data_doubles[1];
783
+ node.score = data_doubles[2];
784
+ node.range_low = data_doubles[3];
785
+ node.range_high = data_doubles[4];
786
+ node.remainder = data_doubles[5];
787
+
788
+ size_t data_sizets[4];
789
+ read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)4, in, buffer, diff_endian);
790
+ node.col_num = data_sizets[0];
791
+ node.tree_left = data_sizets[1];
792
+ node.tree_right = data_sizets[2];
793
+ read_bytes<signed char, signed char>(node.cat_split, data_sizets[3], in, buffer, diff_endian);
794
+ }
795
+
796
+ size_t get_size_node(const IsoHPlane &node) noexcept
797
+ {
798
+ size_t n_bytes = 0;
799
+ n_bytes += sizeof(double) * 5;
800
+ n_bytes += sizeof(size_t) * 10;
801
+ n_bytes += sizeof(size_t) * node.col_num.size();
802
+ if (node.col_type.size()) {
803
+ n_bytes += sizeof(uint8_t)*node.col_type.size();
804
+ }
805
+ n_bytes += sizeof(double)*node.coef.size();
806
+ n_bytes += sizeof(double)*node.mean.size();
807
+ if (node.cat_coef.size()) {
808
+ for (const auto &vec : node.cat_coef) {
809
+ n_bytes += sizeof(size_t);
810
+ n_bytes += sizeof(double) * vec.size();
811
+ }
812
+ }
813
+ n_bytes += sizeof(int)*node.chosen_cat.size();
814
+ n_bytes += sizeof(double)*node.fill_val.size();
815
+ n_bytes += sizeof(double)*node.fill_new.size();
816
+ return n_bytes;
817
+ }
818
+
819
+ template <class otype>
820
+ void serialize_node(const IsoHPlane &node, otype &out, std::vector<uint8_t> &buffer)
821
+ {
822
+ if (interrupt_switch) return;
823
+
824
+ double data_doubles[] = {
825
+ node.split_point,
826
+ node.score,
827
+ node.range_low,
828
+ node.range_high,
829
+ node.remainder
830
+ };
831
+ write_bytes<double>((void*)data_doubles, (size_t)5, out);
832
+
833
+ size_t data_sizets[] = {
834
+ node.hplane_left,
835
+ node.hplane_right,
836
+ node.col_num.size(),
837
+ node.col_type.size(),
838
+ node.coef.size(),
839
+ node.mean.size(),
840
+ node.cat_coef.size(),
841
+ node.chosen_cat.size(),
842
+ node.fill_val.size(),
843
+ node.fill_new.size()
844
+ };
845
+ write_bytes<size_t>((void*)data_sizets, (size_t)10, out);
846
+
847
+ write_bytes<size_t>((void*)node.col_num.data(), node.col_num.size(), out);
848
+
849
+ if (node.col_type.size()) {
850
+ if (buffer.size() < node.col_type.size())
851
+ buffer.resize((size_t)2 * node.col_type.size());
852
+ for (size_t ix = 0; ix < node.col_type.size(); ix++)
853
+ buffer[ix] = (uint8_t)node.col_type[ix];
854
+ write_bytes<uint8_t>((void*)buffer.data(), node.col_type.size(), out);
855
+ }
856
+
857
+ write_bytes<double>((void*)node.coef.data(), node.coef.size(), out);
858
+
859
+ write_bytes<double>((void*)node.mean.data(), node.mean.size(), out);
860
+
861
+ if (node.cat_coef.size()) {
862
+ size_t veclen;
863
+ for (const auto &vec : node.cat_coef) {
864
+ veclen = vec.size();
865
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
866
+ write_bytes<double>((void*)vec.data(), vec.size(), out);
867
+ }
868
+ }
869
+
870
+ write_bytes<int>((void*)node.chosen_cat.data(), node.chosen_cat.size(), out);
871
+
872
+ write_bytes<double>((void*)node.fill_val.data(), node.fill_val.size(), out);
873
+
874
+ write_bytes<double>((void*)node.fill_new.data(), node.fill_new.size(), out);
875
+ }
876
+
877
+ template <class itype>
878
+ void deserialize_node(IsoHPlane &node, itype &in, std::vector<uint8_t> &buffer)
879
+ {
880
+ if (interrupt_switch) return;
881
+
882
+ double data_doubles[5];
883
+ read_bytes<double>((void*)data_doubles, (size_t)5, in);
884
+ node.split_point = data_doubles[0];
885
+ node.score = data_doubles[1];
886
+ node.range_low = data_doubles[2];
887
+ node.range_high = data_doubles[3];
888
+ node.remainder = data_doubles[4];
889
+
890
+ size_t data_sizets[10];
891
+ read_bytes<size_t>((void*)data_sizets, (size_t)10, in);
892
+
893
+ node.hplane_left = data_sizets[0];
894
+ node.hplane_right = data_sizets[1];
895
+
896
+ read_bytes<size_t>(node.col_num, data_sizets[2], in);
897
+
898
+ if (data_sizets[3]) {
899
+ node.col_type.resize(data_sizets[3]);
900
+ node.col_type.shrink_to_fit();
901
+ if (buffer.size() < data_sizets[3])
902
+ buffer.resize((size_t)2 * data_sizets[3]);
903
+ read_bytes<uint8_t>((void*)buffer.data(), data_sizets[3], in);
904
+ for (size_t ix = 0; ix < data_sizets[3]; ix++)
905
+ node.col_type[ix] = (ColType)buffer[ix];
906
+ }
907
+
908
+ read_bytes<double>(node.coef, data_sizets[4], in);
909
+
910
+ read_bytes<double>(node.mean, data_sizets[5], in);
911
+
912
+ if (data_sizets[6]) {
913
+ node.cat_coef.resize(data_sizets[6]);
914
+ node.cat_coef.shrink_to_fit();
915
+ size_t veclen;
916
+ for (auto &vec : node.cat_coef) {
917
+ read_bytes<size_t>((void*)&veclen, (size_t)1, in);
918
+ read_bytes<double>(vec, veclen, in);
919
+ }
920
+ }
921
+
922
+ read_bytes<int>(node.chosen_cat, data_sizets[7], in);
923
+
924
+ read_bytes<double>(node.fill_val, data_sizets[8], in);
925
+
926
+ read_bytes<double>(node.fill_new, data_sizets[9], in);
927
+ }
928
+
929
+ template <class itype, class saved_int_t, class saved_size_t>
930
+ void deserialize_node(IsoHPlane &node, itype &in, std::vector<uint8_t> &buffer, std::vector<char> &buffer2, const bool diff_endian)
931
+ {
932
+ if (interrupt_switch) return;
933
+
934
+ double data_doubles[5];
935
+ read_bytes<double, double>((void*)data_doubles, (size_t)5, in, buffer2, diff_endian);
936
+ node.split_point = data_doubles[0];
937
+ node.score = data_doubles[1];
938
+ node.range_low = data_doubles[2];
939
+ node.range_high = data_doubles[3];
940
+ node.remainder = data_doubles[4];
941
+
942
+ size_t data_sizets[10];
943
+ read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)10, in, buffer2, diff_endian);
944
+
945
+ node.hplane_left = data_sizets[0];
946
+ node.hplane_right = data_sizets[1];
947
+
948
+ read_bytes<size_t, saved_size_t>(node.col_num, data_sizets[2], in, buffer2, diff_endian);
949
+
950
+ if (data_sizets[3]) {
951
+ node.col_type.resize(data_sizets[3]);
952
+ node.col_type.shrink_to_fit();
953
+ if (buffer.size() < data_sizets[3])
954
+ buffer.resize((size_t)2 * data_sizets[3]);
955
+ read_bytes<uint8_t>((void*)buffer.data(), data_sizets[3], in);
956
+ for (size_t ix = 0; ix < data_sizets[3]; ix++)
957
+ node.col_type[ix] = (ColType)buffer[ix];
958
+ }
959
+
960
+ read_bytes<double, double>(node.coef, data_sizets[4], in, buffer2, diff_endian);
961
+
962
+ read_bytes<double, double>(node.mean, data_sizets[5], in, buffer2, diff_endian);
963
+
964
+ if (data_sizets[6]) {
965
+ node.cat_coef.resize(data_sizets[6]);
966
+ node.cat_coef.shrink_to_fit();
967
+ size_t veclen;
968
+ for (auto &vec : node.cat_coef) {
969
+ read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer2, diff_endian);
970
+ read_bytes<double, double>(vec, veclen, in, buffer2, diff_endian);
971
+ }
972
+ }
973
+
974
+ read_bytes<int, saved_int_t>(node.chosen_cat, data_sizets[7], in, buffer2, diff_endian);
975
+
976
+ read_bytes<double, double>(node.fill_val, data_sizets[8], in, buffer2, diff_endian);
977
+
978
+ read_bytes<double, double>(node.fill_new, data_sizets[9], in, buffer2, diff_endian);
979
+ }
980
+
981
+ size_t get_size_node(const ImputeNode &node) noexcept
982
+ {
983
+ size_t n_bytes = 0;
984
+ n_bytes += sizeof(size_t) * 5;
985
+ n_bytes += sizeof(double) * node.num_sum.size();
986
+ n_bytes += sizeof(double) * node.num_weight.size();
987
+ if (node.cat_sum.size()) {
988
+ for (const auto &v : node.cat_sum) {
989
+ n_bytes += sizeof(size_t);
990
+ n_bytes += sizeof(double) * v.size();
991
+ }
992
+ }
993
+ n_bytes += sizeof(double) * node.cat_weight.size();
994
+ return n_bytes;
257
995
  }
258
996
 
259
- #endif /* ifdef _MSC_VER */
997
+ template <class otype>
998
+ void serialize_node(const ImputeNode &node, otype &out)
999
+ {
1000
+ if (interrupt_switch) return;
1001
+
1002
+ size_t data_sizets[] = {
1003
+ node.parent,
1004
+ node.num_sum.size(),
1005
+ node.num_weight.size(),
1006
+ node.cat_sum.size(),
1007
+ node.cat_weight.size(),
1008
+ };
1009
+ write_bytes<size_t>((void*)data_sizets, (size_t)5, out);
1010
+
1011
+ write_bytes<double>((void*)node.num_sum.data(), node.num_sum.size(), out);
1012
+
1013
+ write_bytes<double>((void*)node.num_weight.data(), node.num_weight.size(), out);
1014
+
1015
+ if (node.cat_sum.size()) {
1016
+ size_t veclen;
1017
+ for (const auto &v : node.cat_sum) {
1018
+ veclen = v.size();
1019
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
1020
+ write_bytes<double>((void*)v.data(), veclen, out);
1021
+ }
1022
+ }
1023
+
1024
+ write_bytes<double>((void*)node.cat_weight.data(), node.cat_weight.size(), out);
1025
+ }
1026
+
1027
+ template <class itype>
1028
+ void deserialize_node(ImputeNode &node, itype &in)
1029
+ {
1030
+ if (interrupt_switch) return;
1031
+
1032
+ size_t data_sizets[5];
1033
+ read_bytes<size_t>((void*)data_sizets, (size_t)5, in);
1034
+ node.parent = data_sizets[0];
1035
+
1036
+ read_bytes<double>(node.num_sum, data_sizets[1], in);
1037
+
1038
+ read_bytes<double>(node.num_weight, data_sizets[2], in);
1039
+
1040
+ node.cat_sum.resize(data_sizets[3]);
1041
+ if (data_sizets[3]) {
1042
+ size_t veclen;
1043
+ for (auto &v : node.cat_sum) {
1044
+ read_bytes<size_t>((void*)&veclen, (size_t)1, in);
1045
+ read_bytes<double>(v, veclen, in);
1046
+ }
1047
+ }
1048
+ node.cat_sum.shrink_to_fit();
1049
+
1050
+ read_bytes<double>(node.cat_weight, data_sizets[4], in);
1051
+ }
1052
+
1053
+ template <class itype, class saved_int_t, class saved_size_t>
1054
+ void deserialize_node(ImputeNode &node, itype &in, std::vector<char> &buffer, const bool diff_endian)
1055
+ {
1056
+ if (interrupt_switch) return;
1057
+
1058
+ size_t data_sizets[5];
1059
+ read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)5, in, buffer, diff_endian);
1060
+ node.parent = data_sizets[0];
1061
+
1062
+ read_bytes<double, double>(node.num_sum, data_sizets[1], in, buffer, diff_endian);
1063
+
1064
+ read_bytes<double, double>(node.num_weight, data_sizets[2], in, buffer, diff_endian);
260
1065
 
1066
+ node.cat_sum.resize(data_sizets[3]);
1067
+ if (data_sizets[3]) {
1068
+ size_t veclen;
1069
+ for (auto &v : node.cat_sum) {
1070
+ read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer, diff_endian);
1071
+ read_bytes<double, double>(v, veclen, in, buffer, diff_endian);
1072
+ }
1073
+ }
1074
+ node.cat_sum.shrink_to_fit();
1075
+
1076
+ read_bytes<double, double>(node.cat_weight, data_sizets[4], in, buffer, diff_endian);
1077
+ }
261
1078
 
262
- #endif /* _ENABLE_CEREAL */
1079
+ size_t get_size_node(const SingleTreeIndex &node) noexcept
1080
+ {
1081
+ size_t n_bytes = 0;
1082
+ n_bytes += sizeof(size_t);
1083
+ n_bytes += node.terminal_node_mappings.size() * sizeof(size_t);
1084
+ n_bytes += sizeof(size_t);
1085
+ n_bytes += node.node_distances.size() * sizeof(double);
1086
+ n_bytes += sizeof(size_t);
1087
+ n_bytes += node.node_depths.size() * sizeof(double);
1088
+ n_bytes += sizeof(size_t);
1089
+ n_bytes += node.reference_points.size() * sizeof(size_t);
1090
+ n_bytes += sizeof(size_t);
1091
+ n_bytes += node.reference_indptr.size() * sizeof(size_t);
1092
+ n_bytes += sizeof(size_t);
1093
+ n_bytes += node.reference_mapping.size() * sizeof(size_t);
1094
+ n_bytes += sizeof(size_t);
1095
+ return n_bytes;
1096
+ }
1097
+
1098
+ template <class otype>
1099
+ void serialize_node(const SingleTreeIndex &node, otype &out)
1100
+ {
1101
+ if (interrupt_switch) return;
1102
+
1103
+ size_t vec_size = node.terminal_node_mappings.size();
1104
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1105
+ if (vec_size)
1106
+ write_bytes<size_t>((void*)node.terminal_node_mappings.data(), vec_size, out);
1107
+
1108
+ vec_size = node.node_distances.size();
1109
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1110
+ if (vec_size)
1111
+ write_bytes<double>((void*)node.node_distances.data(), vec_size, out);
1112
+
1113
+ vec_size = node.node_depths.size();
1114
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1115
+ if (vec_size)
1116
+ write_bytes<double>((void*)node.node_depths.data(), vec_size, out);
1117
+
1118
+ vec_size = node.reference_points.size();
1119
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1120
+ if (vec_size)
1121
+ write_bytes<size_t>((void*)node.reference_points.data(), vec_size, out);
1122
+
1123
+ vec_size = node.reference_indptr.size();
1124
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1125
+ if (vec_size)
1126
+ write_bytes<size_t>((void*)node.reference_indptr.data(), vec_size, out);
1127
+
1128
+ vec_size = node.reference_mapping.size();
1129
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1130
+ if (vec_size)
1131
+ write_bytes<size_t>((void*)node.reference_mapping.data(), vec_size, out);
1132
+
1133
+ vec_size = node.n_terminal;
1134
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1135
+ }
1136
+
1137
+ template <class itype>
1138
+ void deserialize_node(SingleTreeIndex &node, itype &in)
1139
+ {
1140
+ if (interrupt_switch) return;
1141
+
1142
+ size_t vec_size;
1143
+ read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
1144
+ read_bytes<size_t>(node.terminal_node_mappings, vec_size, in);
1145
+
1146
+ read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
1147
+ read_bytes<double>(node.node_distances, vec_size, in);
1148
+
1149
+ read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
1150
+ read_bytes<double>(node.node_depths, vec_size, in);
1151
+
1152
+ read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
1153
+ read_bytes<size_t>(node.reference_points, vec_size, in);
1154
+
1155
+ read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
1156
+ read_bytes<size_t>(node.reference_indptr, vec_size, in);
1157
+
1158
+ read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
1159
+ read_bytes<size_t>(node.reference_mapping, vec_size, in);
1160
+
1161
+ read_bytes<size_t>((void*)&node.n_terminal, (size_t)1, in);
1162
+ }
1163
+
1164
+ template <class itype, class saved_int_t, class saved_size_t>
1165
+ void deserialize_node(SingleTreeIndex &node, itype &in, std::vector<char> &buffer, const bool diff_endian)
1166
+ {
1167
+ if (interrupt_switch) return;
1168
+
1169
+ size_t vec_size;
1170
+ read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
1171
+ read_bytes<size_t, saved_size_t>(node.terminal_node_mappings, vec_size, in, buffer, diff_endian);
1172
+
1173
+ read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
1174
+ read_bytes<double, double>(node.node_distances, vec_size, in, buffer, diff_endian);
1175
+
1176
+ read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
1177
+ read_bytes<double, double>(node.node_depths, vec_size, in, buffer, diff_endian);
1178
+
1179
+ read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
1180
+ read_bytes<size_t, saved_size_t>(node.reference_points, vec_size, in, buffer, diff_endian);
1181
+
1182
+ read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
1183
+ read_bytes<size_t, saved_size_t>(node.reference_indptr, vec_size, in, buffer, diff_endian);
1184
+
1185
+ read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
1186
+ read_bytes<size_t, saved_size_t>(node.reference_mapping, vec_size, in, buffer, diff_endian);
1187
+
1188
+ read_bytes<size_t, saved_size_t>((void*)&node.n_terminal, (size_t)1, in, buffer, diff_endian);
1189
+ }
1190
+
1191
+ size_t get_size_model(const IsoForest &model) noexcept
1192
+ {
1193
+ size_t n_bytes = 0;
1194
+ n_bytes += sizeof(uint8_t) * 5;
1195
+ n_bytes += sizeof(double) * 2;
1196
+ n_bytes += sizeof(size_t) * 2;
1197
+ for (const auto &tree : model.trees) {
1198
+ n_bytes += sizeof(size_t);
1199
+ for (const auto &node : tree)
1200
+ n_bytes += get_size_node(node);
1201
+ }
1202
+ return n_bytes;
1203
+ }
1204
+
1205
+ template <class otype>
1206
+ void serialize_model(const IsoForest &model, otype &out)
1207
+ {
1208
+ if (interrupt_switch) return;
1209
+
1210
+ uint8_t data_en[] = {
1211
+ (uint8_t)model.new_cat_action,
1212
+ (uint8_t)model.cat_split_type,
1213
+ (uint8_t)model.missing_action,
1214
+ (uint8_t)model.has_range_penalty,
1215
+ (uint8_t)model.scoring_metric,
1216
+ };
1217
+ write_bytes<uint8_t>((void*)data_en, (size_t)5, out);
1218
+
1219
+ double data_doubles[] = {
1220
+ model.exp_avg_depth,
1221
+ model.exp_avg_sep
1222
+ };
1223
+ write_bytes<double>((void*)data_doubles, (size_t)2, out);
1224
+
1225
+ size_t data_sizets[] = {
1226
+ model.orig_sample_size,
1227
+ model.trees.size()
1228
+ };
1229
+ write_bytes<size_t>((void*)data_sizets, (size_t)2, out);
1230
+
1231
+ size_t veclen;
1232
+ for (const auto &tree : model.trees) {
1233
+ veclen = tree.size();
1234
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
1235
+ for (const auto &node : tree)
1236
+ serialize_node(node, out);
1237
+ }
1238
+ }
1239
+
1240
+ template <class itype>
1241
+ void deserialize_model(IsoForest &model, itype &in)
1242
+ {
1243
+ if (interrupt_switch) return;
1244
+
1245
+ uint8_t data_en[5];
1246
+ read_bytes<uint8_t>((void*)data_en, (size_t)5, in);
1247
+ model.new_cat_action = (NewCategAction)data_en[0];
1248
+ model.cat_split_type = (CategSplit)data_en[1];
1249
+ model.missing_action = (MissingAction)data_en[2];
1250
+ model.has_range_penalty = (bool)data_en[3];
1251
+ model.scoring_metric = (ScoringMetric)data_en[4];
1252
+
1253
+ double data_doubles[2];
1254
+ read_bytes<double>((void*)data_doubles, (size_t)2, in);
1255
+ model.exp_avg_depth = data_doubles[0];
1256
+ model.exp_avg_sep = data_doubles[1];
1257
+
1258
+ size_t data_sizets[2];
1259
+ read_bytes<size_t>((void*)data_sizets, (size_t)2, in);
1260
+ model.orig_sample_size = data_sizets[0];
1261
+ model.trees.resize(data_sizets[1]);
1262
+ model.trees.shrink_to_fit();
1263
+
1264
+ size_t veclen;
1265
+ for (auto &tree : model.trees) {
1266
+ read_bytes<size_t>((void*)&veclen, (size_t)1, in);
1267
+ tree.resize(veclen);
1268
+ tree.shrink_to_fit();
1269
+ for (auto &node : tree)
1270
+ deserialize_node(node, in);
1271
+ }
1272
+ }
1273
+
1274
+ template <class itype, class saved_int_t, class saved_size_t>
1275
+ void deserialize_model(IsoForest &model, itype &in, std::vector<char> &buffer,
1276
+ const bool diff_endian, const bool lacks_range_penalty,
1277
+ const bool lacks_scoring_metric)
1278
+ {
1279
+ if (interrupt_switch) return;
1280
+
1281
+ if (lacks_range_penalty)
1282
+ {
1283
+ uint8_t data_en[3];
1284
+ read_bytes<uint8_t>((void*)data_en, (size_t)3, in);
1285
+ model.new_cat_action = (NewCategAction)data_en[0];
1286
+ model.cat_split_type = (CategSplit)data_en[1];
1287
+ model.missing_action = (MissingAction)data_en[2];
1288
+ }
1289
+
1290
+ else
1291
+ {
1292
+ uint8_t data_en[4];
1293
+ read_bytes<uint8_t>((void*)data_en, (size_t)4, in);
1294
+ model.new_cat_action = (NewCategAction)data_en[0];
1295
+ model.cat_split_type = (CategSplit)data_en[1];
1296
+ model.missing_action = (MissingAction)data_en[2];
1297
+ model.has_range_penalty = (bool)data_en[3];
1298
+ }
1299
+
1300
+ if (lacks_scoring_metric)
1301
+ {
1302
+ model.scoring_metric = Depth;
1303
+ }
1304
+
1305
+ else
1306
+ {
1307
+ uint8_t data_en;
1308
+ read_bytes<uint8_t>((void*)&data_en, (size_t)1, in);
1309
+ model.scoring_metric = (ScoringMetric)data_en;
1310
+ }
1311
+
1312
+ double data_doubles[2];
1313
+ read_bytes<double, double>((void*)data_doubles, (size_t)2, in, buffer, diff_endian);
1314
+ model.exp_avg_depth = data_doubles[0];
1315
+ model.exp_avg_sep = data_doubles[1];
1316
+
1317
+ size_t data_sizets[2];
1318
+ read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)2, in, buffer, diff_endian);
1319
+ model.orig_sample_size = data_sizets[0];
1320
+ model.trees.resize(data_sizets[1]);
1321
+ model.trees.shrink_to_fit();
1322
+
1323
+ size_t veclen;
1324
+ for (auto &tree : model.trees) {
1325
+ read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer, diff_endian);
1326
+ tree.resize(veclen);
1327
+ tree.shrink_to_fit();
1328
+ for (auto &node : tree)
1329
+ deserialize_node<itype, saved_int_t, saved_size_t>(node, in, buffer, diff_endian);
1330
+ }
1331
+ }
1332
+
1333
+ template <class otype>
1334
+ void serialize_additional_trees(const IsoForest &model, otype &out, size_t trees_prev)
1335
+ {
1336
+ size_t veclen;
1337
+ for (size_t ix = trees_prev; ix < model.trees.size(); ix++) {
1338
+ veclen = model.trees[ix].size();
1339
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
1340
+ for (const auto &node : model.trees[ix])
1341
+ serialize_node(node, out);
1342
+ }
1343
+ }
1344
+
1345
+ size_t determine_serialized_size_additional_trees(const IsoForest &model, size_t old_ntrees) noexcept
1346
+ {
1347
+ size_t n_bytes = 0;
1348
+ for (size_t ix = 0; ix < model.trees.size(); ix++) {
1349
+ n_bytes += sizeof(size_t);
1350
+ for (const auto &node : model.trees[ix])
1351
+ n_bytes += get_size_node(node);
1352
+ }
1353
+ return n_bytes;
1354
+ }
1355
+
1356
+ size_t get_size_model(const ExtIsoForest &model) noexcept
1357
+ {
1358
+ size_t n_bytes = 0;
1359
+ n_bytes += sizeof(uint8_t) * 5;
1360
+ n_bytes += sizeof(double) * 2;
1361
+ n_bytes += sizeof(size_t) * 2;
1362
+ for (const auto &tree : model.hplanes) {
1363
+ n_bytes += sizeof(size_t);
1364
+ for (const auto &node : tree)
1365
+ n_bytes += get_size_node(node);
1366
+ }
1367
+ return n_bytes;
1368
+ }
1369
+
1370
+ template <class otype>
1371
+ void serialize_model(const ExtIsoForest &model, otype &out)
1372
+ {
1373
+ if (interrupt_switch) return;
1374
+
1375
+ uint8_t data_en[] = {
1376
+ (uint8_t)model.new_cat_action,
1377
+ (uint8_t)model.cat_split_type,
1378
+ (uint8_t)model.missing_action,
1379
+ (uint8_t)model.has_range_penalty,
1380
+ (uint8_t)model.scoring_metric
1381
+ };
1382
+ write_bytes<uint8_t>((void*)data_en, (size_t)5, out);
1383
+
1384
+ double data_doubles[] = {
1385
+ model.exp_avg_depth,
1386
+ model.exp_avg_sep
1387
+ };
1388
+ write_bytes<double>((void*)data_doubles, (size_t)2, out);
1389
+
1390
+ size_t data_sizets[] = {
1391
+ model.orig_sample_size,
1392
+ model.hplanes.size()
1393
+ };
1394
+ write_bytes<size_t>((void*)data_sizets, (size_t)2, out);
1395
+
1396
+ std::vector<uint8_t> buffer;
1397
+ size_t veclen;
1398
+ for (const auto &tree : model.hplanes) {
1399
+ veclen = tree.size();
1400
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
1401
+ for (const auto &node : tree)
1402
+ serialize_node(node, out, buffer);
1403
+ }
1404
+ }
1405
+
1406
+ template <class itype>
1407
+ void deserialize_model(ExtIsoForest &model, itype &in)
1408
+ {
1409
+ if (interrupt_switch) return;
1410
+
1411
+ uint8_t data_en[5];
1412
+ read_bytes<uint8_t>((void*)data_en, (size_t)5, in);
1413
+ model.new_cat_action = (NewCategAction)data_en[0];
1414
+ model.cat_split_type = (CategSplit)data_en[1];
1415
+ model.missing_action = (MissingAction)data_en[2];
1416
+ model.has_range_penalty = (bool)data_en[3];
1417
+ model.scoring_metric = (ScoringMetric)data_en[4];
1418
+
1419
+ double data_doubles[2];
1420
+ read_bytes<double>((void*)data_doubles, (size_t)2, in);
1421
+ model.exp_avg_depth = data_doubles[0];
1422
+ model.exp_avg_sep = data_doubles[1];
1423
+
1424
+ size_t data_sizets[2];
1425
+ read_bytes<size_t>((void*)data_sizets, (size_t)2, in);
1426
+ model.orig_sample_size = data_sizets[0];
1427
+ model.hplanes.resize(data_sizets[1]);
1428
+ model.hplanes.shrink_to_fit();
1429
+
1430
+ size_t veclen;
1431
+ std::vector<uint8_t> buffer;
1432
+ for (auto &tree : model.hplanes) {
1433
+ read_bytes<size_t>((void*)&veclen, (size_t)1, in);
1434
+ tree.resize(veclen);
1435
+ tree.shrink_to_fit();
1436
+ for (auto &node : tree)
1437
+ deserialize_node(node, in, buffer);
1438
+ }
1439
+ }
1440
+
1441
+ template <class itype, class saved_int_t, class saved_size_t>
1442
+ void deserialize_model(ExtIsoForest &model, itype &in, std::vector<char> &buffer,
1443
+ const bool diff_endian, const bool lacks_range_penalty,
1444
+ const bool lacks_scoring_metric)
1445
+ {
1446
+ if (interrupt_switch) return;
1447
+
1448
+ if (lacks_range_penalty)
1449
+ {
1450
+ uint8_t data_en[3];
1451
+ read_bytes<uint8_t>((void*)data_en, (size_t)3, in);
1452
+ model.new_cat_action = (NewCategAction)data_en[0];
1453
+ model.cat_split_type = (CategSplit)data_en[1];
1454
+ model.missing_action = (MissingAction)data_en[2];
1455
+ }
1456
+
1457
+ else
1458
+ {
1459
+ uint8_t data_en[4];
1460
+ read_bytes<uint8_t>((void*)data_en, (size_t)4, in);
1461
+ model.new_cat_action = (NewCategAction)data_en[0];
1462
+ model.cat_split_type = (CategSplit)data_en[1];
1463
+ model.missing_action = (MissingAction)data_en[2];
1464
+ model.has_range_penalty = (bool)data_en[3];
1465
+ }
1466
+
1467
+ if (lacks_scoring_metric)
1468
+ {
1469
+ model.scoring_metric = Depth;
1470
+ }
1471
+
1472
+ else
1473
+ {
1474
+ uint8_t data_en;
1475
+ read_bytes<uint8_t>((void*)&data_en, (size_t)1, in);
1476
+ model.scoring_metric = (ScoringMetric)data_en;
1477
+ }
1478
+
1479
+ double data_doubles[2];
1480
+ read_bytes<double, double>((void*)data_doubles, (size_t)2, in, buffer, diff_endian);
1481
+ model.exp_avg_depth = data_doubles[0];
1482
+ model.exp_avg_sep = data_doubles[1];
1483
+
1484
+ size_t data_sizets[2];
1485
+ read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)2, in, buffer, diff_endian);
1486
+ model.orig_sample_size = data_sizets[0];
1487
+ model.hplanes.resize(data_sizets[1]);
1488
+ model.hplanes.shrink_to_fit();
1489
+
1490
+ size_t veclen;
1491
+ std::vector<uint8_t> buffer_;
1492
+ for (auto &tree : model.hplanes) {
1493
+ read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer, diff_endian);
1494
+ tree.resize(veclen);
1495
+ tree.shrink_to_fit();
1496
+ for (auto &node : tree)
1497
+ deserialize_node<itype, saved_int_t, saved_size_t>(node, in, buffer_, buffer, diff_endian);
1498
+ }
1499
+ }
1500
+
1501
+ template <class otype>
1502
+ void serialize_additional_trees(const ExtIsoForest &model, otype &out, size_t trees_prev)
1503
+ {
1504
+ if (interrupt_switch) return;
1505
+
1506
+ std::vector<uint8_t> buffer;
1507
+ size_t veclen;
1508
+ for (size_t ix = trees_prev; ix < model.hplanes.size(); ix++) {
1509
+ veclen = model.hplanes[ix].size();
1510
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
1511
+ for (const auto &node : model.hplanes[ix])
1512
+ serialize_node(node, out, buffer);
1513
+ }
1514
+ }
1515
+
1516
+ size_t determine_serialized_size_additional_trees(const ExtIsoForest &model, size_t old_ntrees) noexcept
1517
+ {
1518
+ size_t n_bytes = 0;
1519
+ for (size_t ix = 0; ix < model.hplanes.size(); ix++) {
1520
+ n_bytes += sizeof(size_t);
1521
+ for (const auto &node : model.hplanes[ix])
1522
+ n_bytes += get_size_node(node);
1523
+ }
1524
+ return n_bytes;
1525
+ }
1526
+
1527
+ size_t get_size_model(const Imputer &model) noexcept
1528
+ {
1529
+ size_t n_bytes = 0;
1530
+ n_bytes += sizeof(size_t) * 6;
1531
+ n_bytes += sizeof(int) * model.ncat.size();
1532
+ n_bytes += sizeof(double) * model.col_means.size();
1533
+ n_bytes += sizeof(int) * model.col_modes.size();
1534
+ for (const auto &tree : model.imputer_tree) {
1535
+ n_bytes += sizeof(size_t);
1536
+ for (const auto &node : tree)
1537
+ n_bytes += get_size_node(node);
1538
+ }
1539
+ return n_bytes;
1540
+ }
1541
+
1542
+ template <class otype>
1543
+ void serialize_model(const Imputer &model, otype &out)
1544
+ {
1545
+ if (interrupt_switch) return;
1546
+
1547
+ size_t data_sizets[] = {
1548
+ model.ncols_numeric,
1549
+ model.ncols_categ,
1550
+ model.ncat.size(),
1551
+ model.imputer_tree.size(),
1552
+ model.col_means.size(),
1553
+ model.col_modes.size()
1554
+ };
1555
+ write_bytes<size_t>((void*)data_sizets, (size_t)6, out);
1556
+
1557
+ write_bytes<int>((void*)model.ncat.data(), model.ncat.size(), out);
1558
+
1559
+ write_bytes<double>((void*)model.col_means.data(), model.col_means.size(), out);
1560
+
1561
+ write_bytes<int>((void*)model.col_modes.data(), model.col_modes.size(), out);
1562
+
1563
+ size_t veclen;
1564
+ for (const auto &tree : model.imputer_tree) {
1565
+ veclen = tree.size();
1566
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
1567
+ for (const auto &node : tree)
1568
+ serialize_node(node, out);
1569
+ }
1570
+ }
1571
+
1572
+ template <class itype>
1573
+ void deserialize_model(Imputer &model, itype &in)
1574
+ {
1575
+ if (interrupt_switch) return;
1576
+
1577
+ size_t data_sizets[6];
1578
+ read_bytes<size_t>((void*)data_sizets, (size_t)6, in);
1579
+ model.ncols_numeric = data_sizets[0];
1580
+ model.ncols_categ = data_sizets[1];
1581
+ model.ncat.resize(data_sizets[2]);
1582
+ model.imputer_tree.resize(data_sizets[3]);
1583
+ model.col_means.resize(data_sizets[4]);
1584
+ model.col_modes.resize(data_sizets[5]);
1585
+
1586
+ model.ncat.shrink_to_fit();
1587
+ model.imputer_tree.shrink_to_fit();
1588
+ model.col_means.shrink_to_fit();
1589
+ model.col_modes.shrink_to_fit();
1590
+
1591
+ read_bytes<int>(model.ncat, model.ncat.size(), in);
1592
+
1593
+ read_bytes<double>(model.col_means, model.col_means.size(), in);
1594
+
1595
+ read_bytes<int>(model.col_modes, model.col_modes.size(), in);
1596
+
1597
+ size_t veclen;
1598
+ for (auto &tree : model.imputer_tree) {
1599
+ read_bytes<size_t>((void*)&veclen, (size_t)1, in);
1600
+ tree.resize(veclen);
1601
+ tree.shrink_to_fit();
1602
+ for (auto &node : tree)
1603
+ deserialize_node(node, in);
1604
+ }
1605
+ }
1606
+
1607
+ template <class itype, class saved_int_t, class saved_size_t>
1608
+ void deserialize_model(Imputer &model, itype &in, std::vector<char> &buffer,
1609
+ const bool diff_endian, const bool lacks_range_penalty,
1610
+ const bool lacks_scoring_metric)
1611
+ {
1612
+ if (interrupt_switch) return;
1613
+
1614
+ size_t data_sizets[6];
1615
+ read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)6, in, buffer, diff_endian);
1616
+ model.ncols_numeric = data_sizets[0];
1617
+ model.ncols_categ = data_sizets[1];
1618
+ model.ncat.resize(data_sizets[2]);
1619
+ model.imputer_tree.resize(data_sizets[3]);
1620
+ model.col_means.resize(data_sizets[4]);
1621
+ model.col_modes.resize(data_sizets[5]);
1622
+
1623
+ model.ncat.shrink_to_fit();
1624
+ model.imputer_tree.shrink_to_fit();
1625
+ model.col_means.shrink_to_fit();
1626
+ model.col_modes.shrink_to_fit();
1627
+
1628
+ read_bytes<int, saved_int_t>(model.ncat, model.ncat.size(), in, buffer, diff_endian);
1629
+
1630
+ read_bytes<double, double>(model.col_means, model.col_means.size(), in, buffer, diff_endian);
1631
+
1632
+ read_bytes<int, saved_int_t>(model.col_modes, model.col_modes.size(), in, buffer, diff_endian);
1633
+
1634
+ size_t veclen;
1635
+ for (auto &tree : model.imputer_tree) {
1636
+ read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer, diff_endian);
1637
+ tree.resize(veclen);
1638
+ tree.shrink_to_fit();
1639
+ for (auto &node : tree)
1640
+ deserialize_node<itype, saved_int_t, saved_size_t>(node, in, buffer, diff_endian);
1641
+ }
1642
+ }
1643
+
1644
+ template <class otype>
1645
+ void serialize_additional_trees(const Imputer &model, otype &out, size_t trees_prev)
1646
+ {
1647
+ size_t veclen;
1648
+ for (size_t ix = trees_prev; ix < model.imputer_tree.size(); ix++) {
1649
+ veclen = model.imputer_tree[ix].size();
1650
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
1651
+ for (const auto &node : model.imputer_tree[ix])
1652
+ serialize_node(node, out);
1653
+ }
1654
+ }
1655
+
1656
+ size_t determine_serialized_size_additional_trees(const Imputer &model, size_t old_ntrees) noexcept
1657
+ {
1658
+ size_t n_bytes = 0;
1659
+ for (size_t ix = 0; ix < model.imputer_tree.size(); ix++) {
1660
+ n_bytes += sizeof(size_t);
1661
+ for (const auto &node : model.imputer_tree[ix])
1662
+ n_bytes += get_size_node(node);
1663
+ }
1664
+ return n_bytes;
1665
+ }
1666
+
1667
+ size_t get_size_model(const TreesIndexer &model) noexcept
1668
+ {
1669
+ size_t n_bytes = 0;
1670
+ n_bytes += sizeof(size_t);
1671
+ for (const auto &node : model.indices)
1672
+ n_bytes += get_size_node(node);
1673
+ return n_bytes;
1674
+ }
1675
+
1676
+ template <class otype>
1677
+ void serialize_model(const TreesIndexer &model, otype &out)
1678
+ {
1679
+ if (interrupt_switch) return;
1680
+
1681
+ size_t vec_size = model.indices.size();
1682
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1683
+
1684
+ for (const auto &tree : model.indices)
1685
+ serialize_node(tree, out);
1686
+ }
1687
+
1688
+ template <class itype>
1689
+ void deserialize_model(TreesIndexer &model, itype &in)
1690
+ {
1691
+ if (interrupt_switch) return;
1692
+
1693
+ size_t vec_size;
1694
+ read_bytes<size_t>(&vec_size, (size_t)1, in);
1695
+ model.indices.resize(vec_size);
1696
+ model.indices.shrink_to_fit();
1697
+ for (auto &tree : model.indices)
1698
+ deserialize_node(tree, in);
1699
+ }
1700
+
1701
+ template <class itype, class saved_int_t, class saved_size_t>
1702
+ void deserialize_model(TreesIndexer &model, itype &in, std::vector<char> &buffer,
1703
+ const bool diff_endian, const bool lacks_range_penalty,
1704
+ const bool lacks_scoring_metric)
1705
+ {
1706
+ if (interrupt_switch) return;
1707
+
1708
+ size_t vec_size;
1709
+ read_bytes<size_t, saved_size_t>(&vec_size, (size_t)1, in, buffer, diff_endian);
1710
+ model.indices.resize(vec_size);
1711
+ model.indices.shrink_to_fit();
1712
+ for (auto &tree : model.indices)
1713
+ deserialize_node<itype, saved_int_t, saved_size_t>(tree, in, buffer, diff_endian);
1714
+ }
1715
+
1716
+ template <class otype>
1717
+ void serialize_additional_trees(const TreesIndexer &model, otype &out, size_t trees_prev)
1718
+ {
1719
+ for (size_t ix = trees_prev; ix < model.indices.size(); ix++)
1720
+ serialize_node(model.indices[ix], out);
1721
+ }
1722
+
1723
+ size_t determine_serialized_size_additional_trees(const TreesIndexer &model, size_t old_ntrees) noexcept
1724
+ {
1725
+ size_t n_bytes = 0;
1726
+ for (size_t ix = 0; ix < model.indices.size(); ix++)
1727
+ n_bytes += get_size_node(model.indices[ix]);
1728
+ return n_bytes;
1729
+ }
1730
+
1731
+ bool get_is_little_endian() noexcept
1732
+ {
1733
+ const int one = 1;
1734
+ return *((unsigned char*)&one) != 0;
1735
+ }
1736
+
1737
+ size_t get_size_setup_info() noexcept
1738
+ {
1739
+ size_t n_bytes = 0;
1740
+ n_bytes += sizeof(unsigned char) * SIZE_WATERMARK;
1741
+ n_bytes += sizeof(uint8_t) * 9;
1742
+ return n_bytes;
1743
+ }
1744
+
1745
+ template <class otype>
1746
+ void add_setup_info(otype &out, bool full_watermark)
1747
+ {
1748
+ write_bytes<unsigned char>((void*)(full_watermark? watermark: incomplete_watermark), SIZE_WATERMARK, out);
1749
+ /*
1750
+ 0 : endianness
1751
+ 1-3: isotree version
1752
+ 4: double type
1753
+ 5: size_t limit
1754
+ 6: sizeof(int)
1755
+ 7: sizeof(size_t)
1756
+ 8: sizeof(double)
1757
+ */
1758
+ uint8_t setup_info[] = {
1759
+ (uint8_t)get_is_little_endian(),
1760
+ (uint8_t)ISOTREE_VERSION_MAJOR,
1761
+ (uint8_t)ISOTREE_VERSION_MINOR,
1762
+ (uint8_t)ISOTREE_VERSION_PATCH,
1763
+ #if defined(HAS_IEEE_DOUBLE)
1764
+ (uint8_t)IsNormalDouble,
1765
+ #else
1766
+ (uint8_t)IsAbnormalDouble,
1767
+ #endif
1768
+ #if SIZE_MAX == UINT32_MAX
1769
+ (uint8_t)Is32Bit,
1770
+ #elif SIZE_MAX == UINT64_MAX
1771
+ (uint8_t)Is64Bit,
1772
+ #else
1773
+ (uint8_t)IsOther,
1774
+ #endif
1775
+ (uint8_t)sizeof(int),
1776
+ (uint8_t)sizeof(size_t),
1777
+ (uint8_t)sizeof(double)
1778
+ };
1779
+ write_bytes<uint8_t>((void*)setup_info, (size_t)9, out);
1780
+ }
1781
+
1782
+ template <class otype>
1783
+ void add_full_watermark(otype &out)
1784
+ {
1785
+ write_bytes<unsigned char>((void*)watermark, SIZE_WATERMARK, out);
1786
+ }
1787
+
1788
+ template <class itype>
1789
+ void check_setup_info
1790
+ (
1791
+ itype &in,
1792
+ bool &has_watermark,
1793
+ bool &has_incomplete_watermark,
1794
+ bool &has_same_double,
1795
+ bool &has_same_int_size,
1796
+ bool &has_same_size_t_size,
1797
+ bool &has_same_endianness,
1798
+ PlatformSize &saved_int_t,
1799
+ PlatformSize &saved_size_t,
1800
+ PlatformEndianness &saved_endian,
1801
+ bool &is_deserializable,
1802
+ bool &lacks_range_penalty,
1803
+ bool &lacks_scoring_metric,
1804
+ bool &lacks_indexer
1805
+ )
1806
+ {
1807
+ is_deserializable = false;
1808
+ has_incomplete_watermark = false;
1809
+ lacks_range_penalty = false;
1810
+ lacks_scoring_metric = false;
1811
+ lacks_indexer = false;
1812
+
1813
+ unsigned char watermark_in[SIZE_WATERMARK];
1814
+ read_bytes<unsigned char>((void*)watermark_in, SIZE_WATERMARK, in);
1815
+ if (memcmp(watermark_in, (unsigned char*)watermark, SIZE_WATERMARK)) {
1816
+ has_watermark = false;
1817
+ if (!memcmp(watermark_in, (unsigned char*)incomplete_watermark, SIZE_WATERMARK))
1818
+ has_incomplete_watermark = true;
1819
+ return;
1820
+ }
1821
+ else {
1822
+ has_watermark = true;
1823
+ }
1824
+
1825
+ uint8_t setup_info[9];
1826
+ read_bytes<uint8_t>((void*)setup_info, (size_t)9, in);
1827
+
1828
+ bool is_little_endian = get_is_little_endian();
1829
+ if ((bool)is_little_endian != (bool)setup_info[0]) {
1830
+ has_same_endianness = false;
1831
+ saved_endian = is_little_endian? PlatformLittleEndian : PlatformBigEndian;
1832
+ }
1833
+ else {
1834
+ has_same_endianness = true;
1835
+ }
1836
+
1837
+ if (setup_info[1] == 0 && setup_info[2] == 3 && setup_info[3] == 0) {
1838
+ lacks_range_penalty = true;
1839
+ }
1840
+
1841
+ if (setup_info[1] == 0 && setup_info[2] < 4) {
1842
+ lacks_scoring_metric = true;
1843
+ }
1844
+
1845
+ if (setup_info[1] == 0 && setup_info[2] < 5) {
1846
+ lacks_indexer = true;
1847
+ }
1848
+
1849
+ if (setup_info[4] == (uint8_t)IsAbnormalDouble)
1850
+ fprintf(stderr, "Warning: input model uses non-standard numeric type, might read correctly.\n");
1851
+
1852
+ switch(setup_info[6])
1853
+ {
1854
+ case 16: {saved_int_t = Is16Bit; break;}
1855
+ case 32: {saved_int_t = Is32Bit; break;}
1856
+ case 64: {saved_int_t = Is64Bit; break;}
1857
+ default: {saved_int_t = IsOther; break;}
1858
+ }
1859
+ if ((uint8_t)sizeof(int) != setup_info[6]) {
1860
+ has_same_int_size = false;
1861
+ if (sizeof(uint8_t) != 1) return;
1862
+ if (saved_int_t == IsOther) return;
1863
+ }
1864
+ else {
1865
+ has_same_int_size = true;
1866
+ }
1867
+
1868
+
1869
+ if ((uint8_t)sizeof(size_t) != setup_info[7]) {
1870
+ has_same_size_t_size = false;
1871
+ if (sizeof(uint8_t) != 1) return;
1872
+ }
1873
+ else {
1874
+ has_same_size_t_size = true;
1875
+ }
1876
+
1877
+
1878
+ if ((uint8_t)sizeof(double) != setup_info[8]) {
1879
+ has_same_double = false;
1880
+ return;
1881
+ }
1882
+ else {
1883
+ has_same_double = true;
1884
+ }
1885
+
1886
+ saved_size_t = (PlatformSize)setup_info[5];
1887
+ #if SIZE_MAX == UINT32_MAX
1888
+ if (setup_info[5] != (uint8_t)Is32Bit)
1889
+ #elif SIZE_MAX == UINT64_MAX
1890
+ if (setup_info[5] != (uint8_t)Is64Bit)
1891
+ #else
1892
+ if (setup_info[5] != (uint8_t)IsOther)
1893
+ #endif
1894
+ {
1895
+ has_same_size_t_size = false;
1896
+ if (saved_size_t == IsOther)
1897
+ return;
1898
+ }
1899
+
1900
+ else {
1901
+ has_same_size_t_size = true;
1902
+ }
1903
+
1904
+ is_deserializable = true;
1905
+ }
1906
+
1907
+ template <class itype>
1908
+ void check_setup_info(itype &in)
1909
+ {
1910
+ bool has_watermark = false;
1911
+ bool has_incomplete_watermark = false;
1912
+ bool has_same_double = false;
1913
+ bool has_same_int_size = false;
1914
+ bool has_same_size_t_size = false;
1915
+ bool has_same_endianness = false;
1916
+ PlatformSize saved_int_t;
1917
+ PlatformSize saved_size_t;
1918
+ PlatformEndianness saved_endian;
1919
+ bool is_deserializable = false;
1920
+ bool lacks_range_penalty = false;
1921
+ bool lacks_scoring_metric = false;
1922
+ bool lacks_indexer = false;
1923
+
1924
+ check_setup_info(
1925
+ in,
1926
+ has_watermark,
1927
+ has_incomplete_watermark,
1928
+ has_same_double,
1929
+ has_same_int_size,
1930
+ has_same_size_t_size,
1931
+ has_same_endianness,
1932
+ saved_int_t,
1933
+ saved_size_t,
1934
+ saved_endian,
1935
+ is_deserializable,
1936
+ lacks_range_penalty,
1937
+ lacks_scoring_metric,
1938
+ lacks_indexer
1939
+ );
1940
+
1941
+ if (!has_watermark) {
1942
+ if (has_incomplete_watermark)
1943
+ throw std::runtime_error("Error: serialized model is incomplete.\n");
1944
+ else
1945
+ throw std::runtime_error("Error: input is not an isotree model.\n");
1946
+ }
1947
+ if (!has_same_double)
1948
+ throw std::runtime_error("Error: input model was saved in a machine with different 'double' type.\n");
1949
+ if (!has_same_int_size)
1950
+ throw std::runtime_error("Error: input model was saved in a machine with different integer type.\n");
1951
+ if (!has_same_size_t_size)
1952
+ throw std::runtime_error("Error: input model was saved in a machine with different 'size_t' type.\n");
1953
+ if (!has_same_endianness)
1954
+ throw std::runtime_error("Error: input model was saved in a machine with different endianness.\n");
1955
+ if (lacks_range_penalty || lacks_scoring_metric || lacks_indexer)
1956
+ throw std::runtime_error("Error: input model was produced with an incompatible earlier version, needs to be re-serialized.\n");
1957
+ }
1958
+
1959
+ template <class itype>
1960
+ void check_setup_info
1961
+ (
1962
+ itype &in,
1963
+ bool &has_same_int_size,
1964
+ bool &has_same_size_t_size,
1965
+ bool &has_same_endianness,
1966
+ PlatformSize &saved_int_t,
1967
+ PlatformSize &saved_size_t,
1968
+ PlatformEndianness &saved_endian,
1969
+ bool &lacks_range_penalty,
1970
+ bool &lacks_scoring_metric,
1971
+ bool &lacks_indexer
1972
+ )
1973
+ {
1974
+ bool has_watermark = false;
1975
+ bool has_incomplete_watermark = false;
1976
+ bool has_same_double = false;
1977
+ bool is_deserializable = false;
1978
+
1979
+ check_setup_info(
1980
+ in,
1981
+ has_watermark,
1982
+ has_incomplete_watermark,
1983
+ has_same_double,
1984
+ has_same_int_size,
1985
+ has_same_size_t_size,
1986
+ has_same_endianness,
1987
+ saved_int_t,
1988
+ saved_size_t,
1989
+ saved_endian,
1990
+ is_deserializable,
1991
+ lacks_range_penalty,
1992
+ lacks_scoring_metric,
1993
+ lacks_indexer
1994
+ );
1995
+
1996
+ if (!has_watermark) {
1997
+ if (has_incomplete_watermark)
1998
+ throw std::runtime_error("Error: serialized model is incomplete.\n");
1999
+ else
2000
+ throw std::runtime_error("Error: input is not an isotree model.\n");
2001
+ }
2002
+ if (!has_same_double)
2003
+ throw std::runtime_error("Error: input model was saved in a machine with different 'double' type.\n");
2004
+ if (!is_deserializable)
2005
+ throw std::runtime_error("Error: input format is incompatible.\n");
2006
+ }
2007
+
2008
+ size_t get_size_ending_metadata() noexcept
2009
+ {
2010
+ size_t n_bytes = 0;
2011
+ n_bytes += sizeof(uint8_t);
2012
+ n_bytes += sizeof(size_t);
2013
+ return n_bytes;
2014
+ }
2015
+
2016
+ template <class Model>
2017
+ size_t determine_serialized_size(const Model &model) noexcept
2018
+ {
2019
+ size_t n_bytes = 0;
2020
+ n_bytes += get_size_setup_info();
2021
+ n_bytes += sizeof(uint8_t);
2022
+ n_bytes += sizeof(size_t);
2023
+ n_bytes += get_size_model(model);
2024
+ n_bytes += get_size_ending_metadata();
2025
+ return n_bytes;
2026
+ }
2027
+
2028
+ uint8_t get_model_code(const IsoForest &model) noexcept
2029
+ {
2030
+ return IsoForestModel;
2031
+ }
2032
+
2033
+ uint8_t get_model_code(const ExtIsoForest &model) noexcept
2034
+ {
2035
+ return ExtIsoForestModel;
2036
+ }
2037
+
2038
+ uint8_t get_model_code(const Imputer &model) noexcept
2039
+ {
2040
+ return ImputerModel;
2041
+ }
2042
+
2043
+ uint8_t get_model_code(const TreesIndexer &model) noexcept
2044
+ {
2045
+ return IndexerModel;
2046
+ }
2047
+
2048
+ template <class Model, class otype>
2049
+ void serialization_pipeline(const Model &model, otype &out)
2050
+ {
2051
+ SignalSwitcher ss = SignalSwitcher();
2052
+
2053
+ auto pos_watermark = set_return_position(out);
2054
+
2055
+ add_setup_info(out, false);
2056
+ uint8_t model_type = get_model_code(model);
2057
+ write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
2058
+ size_t size_model = get_size_model(model);
2059
+ write_bytes<size_t>((void*)&size_model, (size_t)1, out);
2060
+ serialize_model(model, out);
2061
+ check_interrupt_switch(ss);
2062
+
2063
+ /* This last bit will be left open in order to signal if anything follows,
2064
+ in case it's decided to change the format in the future or to add
2065
+ something additional, along with a 'size_t' slot in case it would need
2066
+ to jump ahead or something like that. */
2067
+ uint8_t ending_type = (uint8_t)EndsHere;
2068
+ write_bytes<uint8_t>((void*)&ending_type, (size_t)1, out);
2069
+ size_t jump_ahead = 0;
2070
+ write_bytes<size_t>((void*)&jump_ahead, (size_t)1, out);
2071
+
2072
+ auto end_pos = set_return_position(out);
2073
+ return_to_position(out, pos_watermark);
2074
+ add_full_watermark(out);
2075
+ return_to_position(out, end_pos);
2076
+ }
2077
+
2078
+ template <class Model, class itype>
2079
+ void deserialization_pipeline(Model &model, itype &in)
2080
+ {
2081
+ SignalSwitcher ss = SignalSwitcher();
2082
+
2083
+ bool has_same_int_size;
2084
+ bool has_same_size_t_size;
2085
+ bool has_same_endianness;
2086
+ PlatformSize saved_int_t;
2087
+ PlatformSize saved_size_t;
2088
+ PlatformEndianness saved_endian;
2089
+ bool lacks_range_penalty;
2090
+ bool lacks_scoring_metric;
2091
+ bool lacks_indexer; /* <- ignored */
2092
+
2093
+ check_setup_info(
2094
+ in,
2095
+ has_same_int_size,
2096
+ has_same_size_t_size,
2097
+ has_same_endianness,
2098
+ saved_int_t,
2099
+ saved_size_t,
2100
+ saved_endian,
2101
+ lacks_range_penalty,
2102
+ lacks_scoring_metric,
2103
+ lacks_indexer
2104
+ );
2105
+
2106
+ uint8_t model_type = get_model_code(model);
2107
+ uint8_t model_in;
2108
+ read_bytes<uint8_t>((void*)&model_in, (size_t)1, in);
2109
+ if (model_type != model_in)
2110
+ throw std::runtime_error("Object to de-serialize does not match with the supplied type.\n");
2111
+
2112
+ size_t size_model;
2113
+ if (has_same_int_size && has_same_size_t_size && has_same_endianness && !lacks_range_penalty && !lacks_scoring_metric)
2114
+ {
2115
+ read_bytes<size_t>((void*)&size_model, (size_t)1, in);
2116
+ deserialize_model(model, in);
2117
+ }
2118
+
2119
+ else
2120
+ {
2121
+ std::vector<char> buffer;
2122
+ const bool diff_endian = !has_same_endianness;
2123
+
2124
+ if (saved_int_t == Is16Bit && saved_size_t == Is32Bit)
2125
+ {
2126
+ read_bytes<size_t, uint32_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
2127
+ deserialize_model<itype, int16_t, uint32_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
2128
+ }
2129
+
2130
+ else if (saved_int_t == Is32Bit && saved_size_t == Is32Bit)
2131
+ {
2132
+ read_bytes<size_t, uint32_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
2133
+ deserialize_model<itype, int32_t, uint32_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
2134
+ }
2135
+
2136
+ else if (saved_int_t == Is64Bit && saved_size_t == Is32Bit)
2137
+ {
2138
+ read_bytes<size_t, uint32_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
2139
+ deserialize_model<itype, int64_t, uint32_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
2140
+ }
2141
+
2142
+ else if (saved_int_t == Is16Bit && saved_size_t == Is64Bit)
2143
+ {
2144
+ read_bytes<size_t, uint64_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
2145
+ deserialize_model<itype, int16_t, uint64_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
2146
+ }
2147
+
2148
+ else if (saved_int_t == Is32Bit && saved_size_t == Is64Bit)
2149
+ {
2150
+ read_bytes<size_t, uint64_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
2151
+ deserialize_model<itype, int32_t, uint64_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
2152
+ }
2153
+
2154
+ else if (saved_int_t == Is64Bit && saved_size_t == Is64Bit)
2155
+ {
2156
+ read_bytes<size_t, uint64_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
2157
+ deserialize_model<itype, int64_t, uint64_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
2158
+ }
2159
+
2160
+ else
2161
+ {
2162
+ unexpected_error();
2163
+ }
2164
+ }
2165
+
2166
+ check_interrupt_switch(ss);
2167
+
2168
+ if (lacks_range_penalty)
2169
+ {
2170
+ add_range_penalty(model);
2171
+ check_interrupt_switch(ss);
2172
+ }
2173
+
2174
+ /* Not currently used, but left in case the format changes */
2175
+ uint8_t ending_type;
2176
+ read_bytes<uint8_t>((void*)&ending_type, (size_t)1, in);
2177
+ size_t jump_ahead;
2178
+ read_bytes<size_t>((void*)&jump_ahead, (size_t)1, in);
2179
+ }
2180
+
2181
+ void re_serialization_pipeline(const IsoForest &model, char *&out)
2182
+ {
2183
+ SignalSwitcher ss = SignalSwitcher();
2184
+
2185
+ check_setup_info(out);
2186
+
2187
+ uint8_t model_in;
2188
+ memcpy(&model_in, out, sizeof(uint8_t));
2189
+ out += sizeof(uint8_t);
2190
+ if (model_in != get_model_code(model))
2191
+ throw std::runtime_error("Object to incrementally-serialize does not match with the supplied type.\n");
2192
+
2193
+ char *pos_size = out;
2194
+ size_t old_size;
2195
+ memcpy(&old_size, out, sizeof(size_t));
2196
+ out += sizeof(size_t);
2197
+
2198
+ char *old_end = out + old_size;
2199
+ uint8_t old_ending_type;
2200
+ memcpy(&old_ending_type, old_end, sizeof(uint8_t));
2201
+ size_t old_jump_ahead;
2202
+ memcpy(&old_jump_ahead, old_end + sizeof(uint8_t), sizeof(size_t));
2203
+
2204
+ size_t new_size = get_size_model(model);
2205
+ size_t new_ntrees = model.trees.size();
2206
+
2207
+ try
2208
+ {
2209
+ out += sizeof(uint8_t) * 3;
2210
+ if (model.has_range_penalty)
2211
+ {
2212
+ uint8_t has_range_penalty;
2213
+ memcpy(&has_range_penalty, out, sizeof(uint8_t));
2214
+ if (!has_range_penalty)
2215
+ memcpy(out, &has_range_penalty, sizeof(uint8_t));
2216
+ }
2217
+ out += sizeof(uint8_t);
2218
+ out += sizeof(double) * 2;
2219
+ out += sizeof(size_t);
2220
+
2221
+ char *pos_ntrees = out;
2222
+ size_t old_ntrees;
2223
+ memcpy(&old_ntrees, out, sizeof(size_t));
2224
+
2225
+ serialize_additional_trees(model, old_end, old_ntrees);
2226
+
2227
+ out = old_end;
2228
+ uint8_t ending_type = (uint8_t)EndsHere;
2229
+ memcpy(out, &ending_type, sizeof(uint8_t));
2230
+ out += sizeof(uint8_t);
2231
+ size_t jump_ahead = 0;
2232
+ memcpy(out, &jump_ahead, sizeof(size_t));
2233
+ out += sizeof(size_t);
2234
+
2235
+ /* Leave this for the end in case something fails, so as not to
2236
+ render the serialized bytes unusable. */
2237
+ memcpy(pos_size, &new_size, sizeof(size_t));
2238
+ memcpy(pos_ntrees, &new_ntrees, sizeof(size_t));
2239
+ }
2240
+
2241
+ catch(...)
2242
+ {
2243
+ memcpy(out, &old_ending_type, sizeof(uint8_t));
2244
+ memcpy(out + sizeof(uint8_t), &old_jump_ahead, sizeof(size_t));
2245
+ throw;
2246
+ }
2247
+
2248
+ check_interrupt_switch(ss);
2249
+ }
2250
+
2251
+ void re_serialization_pipeline(const ExtIsoForest &model, char *&out)
2252
+ {
2253
+ SignalSwitcher ss = SignalSwitcher();
2254
+
2255
+ check_setup_info(out);
2256
+
2257
+ uint8_t model_in;
2258
+ memcpy(&model_in, out, sizeof(uint8_t));
2259
+ out += sizeof(uint8_t);
2260
+ if (model_in != get_model_code(model))
2261
+ throw std::runtime_error("Object to incrementally-serialize does not match with the supplied type.\n");
2262
+
2263
+ char *pos_size = out;
2264
+ size_t old_size;
2265
+ memcpy(&old_size, out, sizeof(size_t));
2266
+ out += sizeof(size_t);
2267
+
2268
+ char *old_end = out + old_size;
2269
+ uint8_t old_ending_type;
2270
+ memcpy(&old_ending_type, old_end, sizeof(uint8_t));
2271
+ size_t old_jump_ahead;
2272
+ memcpy(&old_jump_ahead, old_end + sizeof(uint8_t), sizeof(size_t));
2273
+
2274
+ size_t new_size = get_size_model(model);
2275
+ size_t new_ntrees = model.hplanes.size();
2276
+
2277
+ try
2278
+ {
2279
+ out += sizeof(uint8_t) * 3;
2280
+ if (model.has_range_penalty)
2281
+ {
2282
+ uint8_t has_range_penalty;
2283
+ memcpy(&has_range_penalty, out, sizeof(uint8_t));
2284
+ if (!has_range_penalty)
2285
+ memcpy(out, &has_range_penalty, sizeof(uint8_t));
2286
+ }
2287
+ out += sizeof(uint8_t);
2288
+ out += sizeof(double) * 2;
2289
+ out += sizeof(size_t);
2290
+ char *pos_ntrees = out;
2291
+ size_t old_ntrees;
2292
+ memcpy(&old_ntrees, out, sizeof(size_t));
2293
+ out += sizeof(size_t);
2294
+
2295
+ serialize_additional_trees(model, old_end, old_ntrees);
2296
+
2297
+ out = old_end;
2298
+ uint8_t ending_type = (uint8_t)EndsHere;
2299
+ memcpy(out, &ending_type, sizeof(uint8_t));
2300
+ out += sizeof(uint8_t);
2301
+ size_t jump_ahead = 0;
2302
+ memcpy(out, &jump_ahead, sizeof(size_t));
2303
+ out += sizeof(size_t);
2304
+
2305
+ /* Leave this for the end in case something fails, so as not to
2306
+ render the serialized bytes unusable. */
2307
+ memcpy(pos_size, &new_size, sizeof(size_t));
2308
+ memcpy(pos_ntrees, &new_ntrees, sizeof(size_t));
2309
+ }
2310
+
2311
+ catch(...)
2312
+ {
2313
+ memcpy(out, &old_ending_type, sizeof(uint8_t));
2314
+ memcpy(out + sizeof(uint8_t), &old_jump_ahead, sizeof(size_t));
2315
+ throw;
2316
+ }
2317
+
2318
+ check_interrupt_switch(ss);
2319
+ }
2320
+
2321
+ void re_serialization_pipeline(const Imputer &model, char *&out)
2322
+ {
2323
+ SignalSwitcher ss = SignalSwitcher();
2324
+
2325
+ check_setup_info(out);
2326
+
2327
+ uint8_t model_in;
2328
+ memcpy(&model_in, out, sizeof(uint8_t));
2329
+ out += sizeof(uint8_t);
2330
+ if (model_in != get_model_code(model))
2331
+ throw std::runtime_error("Object to incrementally-serialize does not match with the supplied type.\n");
2332
+
2333
+ char *pos_size = out;
2334
+ size_t old_size;
2335
+ memcpy(&old_size, out, sizeof(size_t));
2336
+ out += sizeof(size_t);
2337
+
2338
+ char *old_end = out + old_size;
2339
+ uint8_t old_ending_type;
2340
+ memcpy(&old_ending_type, old_end, sizeof(uint8_t));
2341
+ size_t old_jump_ahead;
2342
+ memcpy(&old_jump_ahead, old_end + sizeof(uint8_t), sizeof(size_t));
2343
+
2344
+ size_t new_size = get_size_model(model);
2345
+ size_t new_ntrees = model.imputer_tree.size();
2346
+
2347
+ try
2348
+ {
2349
+ out += sizeof(size_t) * 3;
2350
+
2351
+ char *pos_ntrees = out;
2352
+ size_t old_ntrees;
2353
+ memcpy(&old_ntrees, out, sizeof(size_t));
2354
+
2355
+ serialize_additional_trees(model, old_end, old_ntrees);
2356
+
2357
+ out = old_end;
2358
+ uint8_t ending_type = (uint8_t)EndsHere;
2359
+ memcpy(out, &ending_type, sizeof(uint8_t));
2360
+ out += sizeof(uint8_t);
2361
+ size_t jump_ahead = 0;
2362
+ memcpy(out, &jump_ahead, sizeof(size_t));
2363
+ out += sizeof(size_t);
2364
+
2365
+ /* Leave this for the end in case something fails, so as not to
2366
+ render the serialized bytes unusable. */
2367
+ memcpy(pos_size, &new_size, sizeof(size_t));
2368
+ memcpy(pos_ntrees, &new_ntrees, sizeof(size_t));
2369
+ }
2370
+
2371
+ catch(...)
2372
+ {
2373
+ memcpy(out, &old_ending_type, sizeof(uint8_t));
2374
+ memcpy(out + sizeof(uint8_t), &old_jump_ahead, sizeof(size_t));
2375
+ throw;
2376
+ }
2377
+
2378
+ check_interrupt_switch(ss);
2379
+ }
2380
+
2381
+ void re_serialization_pipeline(const TreesIndexer &model, char *&out)
2382
+ {
2383
+ SignalSwitcher ss = SignalSwitcher();
2384
+
2385
+ check_setup_info(out);
2386
+
2387
+ uint8_t model_in;
2388
+ memcpy(&model_in, out, sizeof(uint8_t));
2389
+ out += sizeof(uint8_t);
2390
+ if (model_in != get_model_code(model))
2391
+ throw std::runtime_error("Object to incrementally-serialize does not match with the supplied type.\n");
2392
+
2393
+ char *pos_size = out;
2394
+ size_t old_size;
2395
+ memcpy(&old_size, out, sizeof(size_t));
2396
+ out += sizeof(size_t);
2397
+
2398
+ char *old_end = out + old_size;
2399
+ uint8_t old_ending_type;
2400
+ memcpy(&old_ending_type, old_end, sizeof(uint8_t));
2401
+ size_t old_jump_ahead;
2402
+ memcpy(&old_jump_ahead, old_end + sizeof(uint8_t), sizeof(size_t));
2403
+
2404
+ size_t new_size = get_size_model(model);
2405
+ size_t new_ntrees = model.indices.size();
2406
+
2407
+ try
2408
+ {
2409
+ char *pos_ntrees = out;
2410
+ size_t old_ntrees;
2411
+ memcpy(&old_ntrees, out, sizeof(size_t));
2412
+
2413
+ serialize_additional_trees(model, old_end, old_ntrees);
2414
+
2415
+ out = old_end;
2416
+ uint8_t ending_type = (uint8_t)EndsHere;
2417
+ memcpy(out, &ending_type, sizeof(uint8_t));
2418
+ out += sizeof(uint8_t);
2419
+ size_t jump_ahead = 0;
2420
+ memcpy(out, &jump_ahead, sizeof(size_t));
2421
+ out += sizeof(size_t);
2422
+
2423
+ /* Leave this for the end in case something fails, so as not to
2424
+ render the serialized bytes unusable. */
2425
+ memcpy(pos_size, &new_size, sizeof(size_t));
2426
+ memcpy(pos_ntrees, &new_ntrees, sizeof(size_t));
2427
+ }
2428
+
2429
+ catch(...)
2430
+ {
2431
+ memcpy(out, &old_ending_type, sizeof(uint8_t));
2432
+ memcpy(out + sizeof(uint8_t), &old_jump_ahead, sizeof(size_t));
2433
+ throw;
2434
+ }
2435
+
2436
+ check_interrupt_switch(ss);
2437
+ }
2438
+
2439
+ void incremental_serialize_IsoForest(const IsoForest &model, char *old_bytes_reallocated)
2440
+ {
2441
+ char *out = old_bytes_reallocated;
2442
+ re_serialization_pipeline(model, out);
2443
+ }
2444
+
2445
+ void incremental_serialize_ExtIsoForest(const ExtIsoForest &model, char *old_bytes_reallocated)
2446
+ {
2447
+ char *out = old_bytes_reallocated;
2448
+ re_serialization_pipeline(model, out);
2449
+ }
2450
+
2451
+ void incremental_serialize_Imputer(const Imputer &model, char *old_bytes_reallocated)
2452
+ {
2453
+ char *out = old_bytes_reallocated;
2454
+ re_serialization_pipeline(model, out);
2455
+ }
2456
+
2457
+ void incremental_serialize_Indexer(const TreesIndexer &model, char *old_bytes_reallocated)
2458
+ {
2459
+ char *out = old_bytes_reallocated;
2460
+ re_serialization_pipeline(model, out);
2461
+ }
2462
+
2463
+ template <class Model>
2464
+ void incremental_serialize_string(const Model &model, std::string &old_bytes)
2465
+ {
2466
+ size_t new_size = determine_serialized_size(model);
2467
+ if (old_bytes.size() > new_size)
2468
+ throw std::runtime_error("'old_bytes' is not a subset of 'model'.\n");
2469
+ if (!new_size)
2470
+ unexpected_error();
2471
+ old_bytes.resize(new_size);
2472
+ char *out = &old_bytes[0];
2473
+ re_serialization_pipeline(model, out);
2474
+ }
2475
+
2476
+ void incremental_serialize_IsoForest(const IsoForest &model, std::string &old_bytes)
2477
+ {
2478
+ incremental_serialize_string(model, old_bytes);
2479
+ }
2480
+
2481
+ void incremental_serialize_ExtIsoForest(const ExtIsoForest &model, std::string &old_bytes)
2482
+ {
2483
+ incremental_serialize_string(model, old_bytes);
2484
+ }
2485
+
2486
+ void incremental_serialize_Imputer(const Imputer &model, std::string &old_bytes)
2487
+ {
2488
+ incremental_serialize_string(model, old_bytes);
2489
+ }
2490
+
2491
+ void incremental_serialize_Indexer(const TreesIndexer &model, std::string &old_bytes)
2492
+ {
2493
+ incremental_serialize_string(model, old_bytes);
2494
+ }
2495
+
2496
+ template <class Model>
2497
+ std::string serialization_pipeline(const Model &model)
2498
+ {
2499
+ std::string serialized;
2500
+ serialized.resize(get_size_model(model));
2501
+ char *ptr = &serialized[0];
2502
+ serialization_pipeline(model, ptr);
2503
+ return serialized;
2504
+ }
2505
+
2506
+ template <class Model>
2507
+ void serialization_pipeline_ToFile(const Model &model, const char *fname)
2508
+ {
2509
+ FileHandle f(fname, "wb");
2510
+ serialization_pipeline(model, f.handle);
2511
+ }
2512
+
2513
+ #ifdef WCHAR_T_FUNS
2514
+ template <class Model>
2515
+ void serialization_pipeline_ToFile(const Model &model, const wchar_t *fname)
2516
+ {
2517
+ WFileHandle f(fname, L"wb");
2518
+ serialization_pipeline(model, f.handle);
2519
+ }
2520
+ #endif
2521
+
2522
+ size_t determine_serialized_size(const IsoForest &model) noexcept
2523
+ {
2524
+ return determine_serialized_size<IsoForest>(model);
2525
+ }
2526
+
2527
+ size_t determine_serialized_size(const ExtIsoForest &model) noexcept
2528
+ {
2529
+ return determine_serialized_size<ExtIsoForest>(model);
2530
+ }
2531
+
2532
+ size_t determine_serialized_size(const Imputer &model) noexcept
2533
+ {
2534
+ return determine_serialized_size<Imputer>(model);
2535
+ }
2536
+
2537
+ size_t determine_serialized_size(const TreesIndexer &model) noexcept
2538
+ {
2539
+ return determine_serialized_size<TreesIndexer>(model);
2540
+ }
2541
+
2542
+ void serialize_IsoForest(const IsoForest &model, char *out)
2543
+ {
2544
+ serialization_pipeline(model, out);
2545
+ }
2546
+
2547
+ void serialize_IsoForest(const IsoForest &model, FILE *out)
2548
+ {
2549
+ serialization_pipeline(model, out);
2550
+ }
2551
+
2552
+ void serialize_IsoForest(const IsoForest &model, std::ostream &out)
2553
+ {
2554
+ serialization_pipeline(model, out);
2555
+ }
2556
+
2557
+ std::string serialize_IsoForest(const IsoForest &model)
2558
+ {
2559
+ return serialization_pipeline(model);
2560
+ }
2561
+
2562
+ void serialize_IsoForest_ToFile(const IsoForest &model, const char *fname)
2563
+ {
2564
+ serialization_pipeline_ToFile(model, fname);
2565
+ }
2566
+
2567
+ #ifdef WCHAR_T_FUNS
2568
+ void serialize_IsoForest_ToFile(const IsoForest &model, const wchar_t *fname)
2569
+ {
2570
+ serialization_pipeline_ToFile(model, fname);
2571
+ }
2572
+ #endif
2573
+
2574
+ void deserialize_IsoForest(IsoForest &model, const char *in)
2575
+ {
2576
+ deserialization_pipeline(model, in);
2577
+ }
2578
+
2579
+ void deserialize_IsoForest(IsoForest &model, FILE *in)
2580
+ {
2581
+ deserialization_pipeline(model, in);
2582
+ }
2583
+
2584
+ void deserialize_IsoForest(IsoForest &model, std::istream &in)
2585
+ {
2586
+ deserialization_pipeline(model, in);
2587
+ }
2588
+
2589
+ void deserialize_IsoForest(IsoForest &model, const std::string &in)
2590
+ {
2591
+ if (!in.size())
2592
+ throw std::runtime_error("Invalid input model to deserialize.");
2593
+ const char *in_ = &in[0];
2594
+ deserialization_pipeline(model, in_);
2595
+ }
2596
+
2597
+ void deserialize_IsoForest_FromFile(IsoForest &model, const char *fname)
2598
+ {
2599
+ FileHandle f(fname, "rb");
2600
+ deserialize_IsoForest(model, f.handle);
2601
+ }
2602
+
2603
+ #ifdef WCHAR_T_FUNS
2604
+ void deserialize_IsoForest_FromFile(IsoForest &model, const wchar_t *fname)
2605
+ {
2606
+ WFileHandle f(fname, L"rb");
2607
+ deserialize_IsoForest(model, f.handle);
2608
+ }
2609
+ #endif
2610
+
2611
+ void serialize_ExtIsoForest(const ExtIsoForest &model, char *out)
2612
+ {
2613
+ serialization_pipeline(model, out);
2614
+ }
2615
+
2616
+ void serialize_ExtIsoForest(const ExtIsoForest &model, FILE *out)
2617
+ {
2618
+ serialization_pipeline(model, out);
2619
+ }
2620
+
2621
+ void serialize_ExtIsoForest(const ExtIsoForest &model, std::ostream &out)
2622
+ {
2623
+ serialization_pipeline(model, out);
2624
+ }
2625
+
2626
+ std::string serialize_ExtIsoForest(const ExtIsoForest &model)
2627
+ {
2628
+ return serialization_pipeline(model);
2629
+ }
2630
+
2631
+ void serialize_ExtIsoForest_ToFile(const ExtIsoForest &model, const char *fname)
2632
+ {
2633
+ serialization_pipeline_ToFile(model, fname);
2634
+ }
2635
+
2636
+ #ifdef WCHAR_T_FUNS
2637
+ void serialize_ExtIsoForest_ToFile(const ExtIsoForest &model, const wchar_t *fname)
2638
+ {
2639
+ serialization_pipeline_ToFile(model, fname);
2640
+ }
2641
+ #endif
2642
+
2643
+ void deserialize_ExtIsoForest(ExtIsoForest &model, const char *in)
2644
+ {
2645
+ deserialization_pipeline(model, in);
2646
+ }
2647
+
2648
+ void deserialize_ExtIsoForest(ExtIsoForest &model, FILE *in)
2649
+ {
2650
+ deserialization_pipeline(model, in);
2651
+ }
2652
+
2653
+ void deserialize_ExtIsoForest(ExtIsoForest &model, std::istream &in)
2654
+ {
2655
+ deserialization_pipeline(model, in);
2656
+ }
2657
+
2658
+ void deserialize_ExtIsoForest(ExtIsoForest &model, const std::string &in)
2659
+ {
2660
+ if (!in.size())
2661
+ throw std::runtime_error("Invalid input model to deserialize.");
2662
+ const char *in_ = &in[0];
2663
+ deserialization_pipeline(model, in_);
2664
+ }
2665
+
2666
+ void deserialize_ExtIsoForest_FromFile(ExtIsoForest &model, const char *fname)
2667
+ {
2668
+ FileHandle f(fname, "rb");
2669
+ deserialize_ExtIsoForest(model, f.handle);
2670
+ }
2671
+
2672
+ #ifdef WCHAR_T_FUNS
2673
+ void deserialize_ExtIsoForest_FromFile(ExtIsoForest &model, const wchar_t *fname)
2674
+ {
2675
+ WFileHandle f(fname, L"rb");
2676
+ deserialize_ExtIsoForest(model, f.handle);
2677
+ }
2678
+ #endif
2679
+
2680
+ void serialize_Imputer(const Imputer &model, char *out)
2681
+ {
2682
+ serialization_pipeline(model, out);
2683
+ }
2684
+
2685
+ void serialize_Imputer(const Imputer &model, FILE *out)
2686
+ {
2687
+ serialization_pipeline(model, out);
2688
+ }
2689
+
2690
+ void serialize_Imputer(const Imputer &model, std::ostream &out)
2691
+ {
2692
+ serialization_pipeline(model, out);
2693
+ }
2694
+
2695
+ std::string serialize_Imputer(const Imputer &model)
2696
+ {
2697
+ return serialization_pipeline(model);
2698
+ }
2699
+
2700
+ void serialize_Imputer_ToFile(const Imputer &model, const char *fname)
2701
+ {
2702
+ serialization_pipeline_ToFile(model, fname);
2703
+ }
2704
+
2705
+ #ifdef WCHAR_T_FUNS
2706
+ void serialize_Imputer_ToFile(const Imputer &model, const wchar_t *fname)
2707
+ {
2708
+ serialization_pipeline_ToFile(model, fname);
2709
+ }
2710
+ #endif
2711
+
2712
+ void deserialize_Imputer(Imputer &model, const char *in)
2713
+ {
2714
+ deserialization_pipeline(model, in);
2715
+ }
2716
+
2717
+ void deserialize_Imputer(Imputer &model, FILE *in)
2718
+ {
2719
+ deserialization_pipeline(model, in);
2720
+ }
2721
+
2722
+ void deserialize_Imputer(Imputer &model, std::istream &in)
2723
+ {
2724
+ deserialization_pipeline(model, in);
2725
+ }
2726
+
2727
+ void deserialize_Imputer(Imputer &model, const std::string &in)
2728
+ {
2729
+ if (!in.size())
2730
+ throw std::runtime_error("Invalid input model to deserialize.");
2731
+ const char *in_ = &in[0];
2732
+ deserialization_pipeline(model, in_);
2733
+ }
2734
+
2735
+ void deserialize_Imputer_FromFile(Imputer &model, const char *fname)
2736
+ {
2737
+ FileHandle f(fname, "rb");
2738
+ deserialize_Imputer(model, f.handle);
2739
+ }
2740
+
2741
+ #ifdef WCHAR_T_FUNS
2742
+ void deserialize_Imputer_FromFile(Imputer &model, const wchar_t *fname)
2743
+ {
2744
+ WFileHandle f(fname, L"rb");
2745
+ deserialize_Imputer(model, f.handle);
2746
+ }
2747
+ #endif
2748
+
2749
+ void serialize_Indexer(const TreesIndexer &model, char *out)
2750
+ {
2751
+ serialization_pipeline(model, out);
2752
+ }
2753
+
2754
+ void serialize_Indexer(const TreesIndexer &model, FILE *out)
2755
+ {
2756
+ serialization_pipeline(model, out);
2757
+ }
2758
+
2759
+ void serialize_Indexer(const TreesIndexer &model, std::ostream &out)
2760
+ {
2761
+ serialization_pipeline(model, out);
2762
+ }
2763
+
2764
+ std::string serialize_Indexer(const TreesIndexer &model)
2765
+ {
2766
+ return serialization_pipeline(model);
2767
+ }
2768
+
2769
+ void serialize_Indexer_ToFile(const TreesIndexer &model, const char *fname)
2770
+ {
2771
+ serialization_pipeline_ToFile(model, fname);
2772
+ }
2773
+
2774
+ #ifdef WCHAR_T_FUNS
2775
+ void serialize_Indexer_ToFile(const TreesIndexer &model, const wchar_t *fname)
2776
+ {
2777
+ serialization_pipeline_ToFile(model, fname);
2778
+ }
2779
+ #endif
2780
+
2781
+ void deserialize_Indexer(TreesIndexer &model, const char *in)
2782
+ {
2783
+ deserialization_pipeline(model, in);
2784
+ }
2785
+
2786
+ void deserialize_Indexer(TreesIndexer &model, FILE *in)
2787
+ {
2788
+ deserialization_pipeline(model, in);
2789
+ }
2790
+
2791
+ void deserialize_Indexer(TreesIndexer &model, std::istream &in)
2792
+ {
2793
+ deserialization_pipeline(model, in);
2794
+ }
2795
+
2796
+ void deserialize_Indexer(TreesIndexer &model, const std::string &in)
2797
+ {
2798
+ if (!in.size())
2799
+ throw std::runtime_error("Invalid input model to deserialize.");
2800
+ const char *in_ = &in[0];
2801
+ deserialization_pipeline(model, in_);
2802
+ }
2803
+
2804
+ void deserialize_Indexer_FromFile(TreesIndexer &model, const char *fname)
2805
+ {
2806
+ FileHandle f(fname, "rb");
2807
+ deserialize_Indexer(model, f.handle);
2808
+ }
2809
+
2810
+ #ifdef WCHAR_T_FUNS
2811
+ void deserialize_Indexer_FromFile(TreesIndexer &model, const wchar_t *fname)
2812
+ {
2813
+ WFileHandle f(fname, L"rb");
2814
+ deserialize_Indexer(model, f.handle);
2815
+ }
2816
+ #endif
2817
+
2818
+ /* Shorthands to use in templates (will be used in R) */
2819
+ void serialize_isotree(const IsoForest &model, char *out)
2820
+ {
2821
+ serialize_IsoForest(model, out);
2822
+ }
2823
+
2824
+ void serialize_isotree(const ExtIsoForest &model, char *out)
2825
+ {
2826
+ serialize_ExtIsoForest(model, out);
2827
+ }
2828
+
2829
+ void serialize_isotree(const Imputer &model, char *out)
2830
+ {
2831
+ serialize_Imputer(model, out);
2832
+ }
2833
+
2834
+ void serialize_isotree(const TreesIndexer &model, char *out)
2835
+ {
2836
+ serialize_Indexer(model, out);
2837
+ }
2838
+
2839
+ void deserialize_isotree(IsoForest &model, const char *in)
2840
+ {
2841
+ deserialize_IsoForest(model, in);
2842
+ }
2843
+
2844
+ void deserialize_isotree(ExtIsoForest &model, const char *in)
2845
+ {
2846
+ deserialize_ExtIsoForest(model, in);
2847
+ }
2848
+
2849
+ void deserialize_isotree(Imputer &model, const char *in)
2850
+ {
2851
+ deserialize_Imputer(model, in);
2852
+ }
2853
+
2854
+ void deserialize_isotree(TreesIndexer &model, const char *in)
2855
+ {
2856
+ deserialize_Indexer(model, in);
2857
+ }
2858
+
2859
+ void incremental_serialize_isotree(const IsoForest &model, char *old_bytes_reallocated)
2860
+ {
2861
+ incremental_serialize_IsoForest(model, old_bytes_reallocated);
2862
+ }
2863
+
2864
+ void incremental_serialize_isotree(const ExtIsoForest &model, char *old_bytes_reallocated)
2865
+ {
2866
+ incremental_serialize_ExtIsoForest(model, old_bytes_reallocated);
2867
+ }
2868
+
2869
+ void incremental_serialize_isotree(const Imputer &model, char *old_bytes_reallocated)
2870
+ {
2871
+ incremental_serialize_Imputer(model, old_bytes_reallocated);
2872
+ }
2873
+
2874
+ void incremental_serialize_isotree(const TreesIndexer &model, char *old_bytes_reallocated)
2875
+ {
2876
+ incremental_serialize_Indexer(model, old_bytes_reallocated);
2877
+ }
2878
+
2879
+ template <class itype>
2880
+ void read_bytes_size_t(void *ptr, const size_t n_els, itype &in, const PlatformSize saved_size_t, const bool has_same_endianness)
2881
+ {
2882
+ std::vector<char> buffer;
2883
+ switch(saved_size_t)
2884
+ {
2885
+ case Is32Bit:
2886
+ {
2887
+ read_bytes<size_t, uint32_t>(ptr, n_els, in, buffer, !has_same_endianness);
2888
+ break;
2889
+ }
2890
+
2891
+ case Is64Bit:
2892
+ {
2893
+ read_bytes<size_t, uint64_t>(ptr, n_els, in, buffer, !has_same_endianness);
2894
+ break;
2895
+ }
2896
+
2897
+ default:
2898
+ {
2899
+ unexpected_error();
2900
+ }
2901
+ }
2902
+ }
2903
+
2904
+ template <class itype>
2905
+ void inspect_serialized_object
2906
+ (
2907
+ itype &serialized_bytes,
2908
+ bool &is_isotree_model,
2909
+ bool &is_compatible,
2910
+ bool &has_combined_objects,
2911
+ bool &has_IsoForest,
2912
+ bool &has_ExtIsoForest,
2913
+ bool &has_Imputer,
2914
+ bool &has_Indexer,
2915
+ bool &has_metadata,
2916
+ size_t &size_metadata,
2917
+ bool &has_same_int_size,
2918
+ bool &has_same_size_t_size,
2919
+ bool &has_same_endianness,
2920
+ bool &lacks_range_penalty,
2921
+ bool &lacks_scoring_metric
2922
+ )
2923
+ {
2924
+ auto saved_position = set_return_position(serialized_bytes);
2925
+
2926
+ is_isotree_model = false;
2927
+ is_compatible = false;
2928
+ has_combined_objects = false;
2929
+ has_IsoForest = false;
2930
+ has_ExtIsoForest = false;
2931
+ has_Imputer = false;
2932
+ has_Indexer = false;
2933
+ has_metadata = false;
2934
+ size_metadata = 0;
2935
+
2936
+ bool lacks_indexer = false;
2937
+
2938
+ bool has_same_double = false;
2939
+ bool has_incomplete_watermark = false;
2940
+ PlatformSize saved_int_t;
2941
+ PlatformSize saved_size_t;
2942
+ PlatformEndianness saved_endian;
2943
+ check_setup_info(
2944
+ serialized_bytes,
2945
+ is_isotree_model,
2946
+ has_incomplete_watermark,
2947
+ has_same_double,
2948
+ has_same_int_size,
2949
+ has_same_size_t_size,
2950
+ has_same_endianness,
2951
+ saved_int_t,
2952
+ saved_size_t,
2953
+ saved_endian,
2954
+ is_compatible,
2955
+ lacks_range_penalty,
2956
+ lacks_scoring_metric,
2957
+ lacks_indexer
2958
+ );
2959
+
2960
+ if (!is_isotree_model || !is_compatible)
2961
+ return;
2962
+
2963
+ uint8_t model_type;
2964
+ read_bytes<uint8_t>((void*)&model_type, (size_t)1, serialized_bytes);
2965
+
2966
+ switch(model_type)
2967
+ {
2968
+ case IsoForestModel:
2969
+ {
2970
+ has_IsoForest = true;
2971
+ break;
2972
+ }
2973
+
2974
+ case ExtIsoForestModel:
2975
+ {
2976
+ has_ExtIsoForest = true;
2977
+ break;
2978
+ }
2979
+
2980
+ case ImputerModel:
2981
+ {
2982
+ has_Imputer = true;
2983
+ break;
2984
+ }
2985
+
2986
+ case IndexerModel:
2987
+ {
2988
+ has_Indexer = true;
2989
+ }
2990
+
2991
+ case AllObjectsCombined:
2992
+ {
2993
+ has_combined_objects = true;
2994
+ break;
2995
+ }
2996
+
2997
+ default:
2998
+ {
2999
+
3000
+ }
3001
+ }
3002
+
3003
+ if (has_combined_objects)
3004
+ {
3005
+ size_t size_model[4] = {0};
3006
+
3007
+ read_bytes<uint8_t>((void*)&model_type, (size_t)1, serialized_bytes);
3008
+ switch(model_type)
3009
+ {
3010
+ case HasSingleVarModelNext:
3011
+ {
3012
+ has_IsoForest = true;
3013
+ break;
3014
+ }
3015
+ case HasExtModelNext:
3016
+ {
3017
+ has_ExtIsoForest = true;
3018
+ break;
3019
+ }
3020
+ case HasSingleVarModelPlusImputerNext:
3021
+ {
3022
+ has_IsoForest = true;
3023
+ has_Imputer = true;
3024
+ break;
3025
+ }
3026
+ case HasSingleVarModelPlusIndexerNext:
3027
+ {
3028
+ has_IsoForest = true;
3029
+ has_Indexer = true;
3030
+ break;
3031
+ }
3032
+ case HasSingleVarModelPlusImputerPlusIndexerNext:
3033
+ {
3034
+ has_IsoForest = true;
3035
+ has_Imputer = true;
3036
+ has_Indexer = true;
3037
+ break;
3038
+ }
3039
+ case HasExtModelPlusImputerNext:
3040
+ {
3041
+ has_ExtIsoForest = true;
3042
+ has_Imputer = true;
3043
+ break;
3044
+ }
3045
+ case HasExtModelPlusIndexerNext:
3046
+ {
3047
+ has_ExtIsoForest = true;
3048
+ has_Indexer = true;
3049
+ break;
3050
+ }
3051
+ case HasExtModelPlusImputerPlusIndexerNext:
3052
+ {
3053
+ has_ExtIsoForest = true;
3054
+ has_Imputer = true;
3055
+ has_Indexer = true;
3056
+ break;
3057
+ }
3058
+ case HasSingleVarModelPlusMetadataNext:
3059
+ {
3060
+ has_IsoForest = true;
3061
+ has_metadata = true;
3062
+ read_bytes_size_t(size_model, (size_t)(3+!lacks_indexer), serialized_bytes, saved_size_t, has_same_endianness);
3063
+ size_metadata = size_model[2+!lacks_indexer];
3064
+ break;
3065
+ }
3066
+ case HasSingleVarModelPlusIndexerPlusMetadataNext:
3067
+ {
3068
+ has_IsoForest = true;
3069
+ has_Indexer = true;
3070
+ has_metadata = true;
3071
+ read_bytes_size_t(size_model, (size_t)4, serialized_bytes, saved_size_t, has_same_endianness);
3072
+ size_metadata = size_model[3];
3073
+ break;
3074
+ }
3075
+ case HasExtModelPlusMetadataNext:
3076
+ {
3077
+ has_ExtIsoForest = true;
3078
+ has_metadata = true;
3079
+ read_bytes_size_t(size_model, (size_t)(3+!lacks_indexer), serialized_bytes, saved_size_t, has_same_endianness);
3080
+ size_metadata = size_model[2+!lacks_indexer];
3081
+ break;
3082
+ }
3083
+ case HasExtModelPlusIndexerPlusMetadataNext:
3084
+ {
3085
+ has_ExtIsoForest = true;
3086
+ has_Indexer = true;
3087
+ has_metadata = true;
3088
+ read_bytes_size_t(size_model, (size_t)4, serialized_bytes, saved_size_t, has_same_endianness);
3089
+ size_metadata = size_model[3];
3090
+ break;
3091
+ break;
3092
+ }
3093
+ case HasSingleVarModelPlusImputerPlusMetadataNext:
3094
+ {
3095
+ has_IsoForest = true;
3096
+ has_Imputer = true;
3097
+ has_metadata = true;
3098
+ read_bytes_size_t(size_model, (size_t)(3+!lacks_indexer), serialized_bytes, saved_size_t, has_same_endianness);
3099
+ size_metadata = size_model[2+!lacks_indexer];
3100
+ break;
3101
+ }
3102
+ case HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext:
3103
+ {
3104
+ has_IsoForest = true;
3105
+ has_Imputer = true;
3106
+ has_Indexer = true;
3107
+ has_metadata = true;
3108
+ read_bytes_size_t(size_model, (size_t)4, serialized_bytes, saved_size_t, has_same_endianness);
3109
+ size_metadata = size_model[3];
3110
+ break;
3111
+ }
3112
+ case HasExtModelPlusImputerPlusMetadataNext:
3113
+ {
3114
+ has_ExtIsoForest = true;
3115
+ has_Imputer = true;
3116
+ has_metadata = true;
3117
+ read_bytes_size_t(size_model, (size_t)(3+!lacks_indexer), serialized_bytes, saved_size_t, has_same_endianness);
3118
+ size_metadata = size_model[2+!lacks_indexer];
3119
+ break;
3120
+ }
3121
+ case HasExtModelPlusImputerPlusIndexerPlusMetadataNext:
3122
+ {
3123
+ has_ExtIsoForest = true;
3124
+ has_Imputer = true;
3125
+ has_Indexer = true;
3126
+ has_metadata = true;
3127
+ read_bytes_size_t(size_model, (size_t)4, serialized_bytes, saved_size_t, has_same_endianness);
3128
+ size_metadata = size_model[3];
3129
+ break;
3130
+ }
3131
+
3132
+ default:
3133
+ {
3134
+
3135
+ }
3136
+ }
3137
+ }
3138
+
3139
+ return_to_position(serialized_bytes, saved_position);
3140
+ }
3141
+
3142
+ template <class itype>
3143
+ void inspect_serialized_object
3144
+ (
3145
+ itype &serialized_bytes,
3146
+ bool &is_isotree_model,
3147
+ bool &is_compatible,
3148
+ bool &has_combined_objects,
3149
+ bool &has_IsoForest,
3150
+ bool &has_ExtIsoForest,
3151
+ bool &has_Imputer,
3152
+ bool &has_Indexer,
3153
+ bool &has_metadata,
3154
+ size_t &size_metadata
3155
+ )
3156
+ {
3157
+ bool ignored[5];
3158
+ inspect_serialized_object(
3159
+ serialized_bytes,
3160
+ is_isotree_model,
3161
+ is_compatible,
3162
+ has_combined_objects,
3163
+ has_IsoForest,
3164
+ has_ExtIsoForest,
3165
+ has_Imputer,
3166
+ has_Indexer,
3167
+ has_metadata,
3168
+ size_metadata,
3169
+ ignored[0],
3170
+ ignored[1],
3171
+ ignored[2],
3172
+ ignored[3],
3173
+ ignored[4]
3174
+ );
3175
+ }
3176
+
3177
+ void inspect_serialized_object
3178
+ (
3179
+ const char *serialized_bytes,
3180
+ bool &is_isotree_model,
3181
+ bool &is_compatible,
3182
+ bool &has_combined_objects,
3183
+ bool &has_IsoForest,
3184
+ bool &has_ExtIsoForest,
3185
+ bool &has_Imputer,
3186
+ bool &has_Indexer,
3187
+ bool &has_metadata,
3188
+ size_t &size_metadata
3189
+ )
3190
+ {
3191
+ const char *in = serialized_bytes;
3192
+ inspect_serialized_object<const char*>(
3193
+ in,
3194
+ is_isotree_model,
3195
+ is_compatible,
3196
+ has_combined_objects,
3197
+ has_IsoForest,
3198
+ has_ExtIsoForest,
3199
+ has_Imputer,
3200
+ has_Indexer,
3201
+ has_metadata,
3202
+ size_metadata
3203
+ );
3204
+ }
3205
+
3206
+ void inspect_serialized_object
3207
+ (
3208
+ const std::string &serialized_bytes,
3209
+ bool &is_isotree_model,
3210
+ bool &is_compatible,
3211
+ bool &has_combined_objects,
3212
+ bool &has_IsoForest,
3213
+ bool &has_ExtIsoForest,
3214
+ bool &has_Imputer,
3215
+ bool &has_Indexer,
3216
+ bool &has_metadata,
3217
+ size_t &size_metadata
3218
+ )
3219
+ {
3220
+ if (!serialized_bytes.size()) {
3221
+ is_isotree_model = false;
3222
+ is_compatible = false;
3223
+ has_IsoForest = false;
3224
+ has_ExtIsoForest = false;
3225
+ has_Imputer = false;
3226
+ has_Indexer = false;
3227
+ has_metadata = false;
3228
+ return;
3229
+ }
3230
+ const char *in = &serialized_bytes[0];
3231
+ inspect_serialized_object<const char*>(
3232
+ in,
3233
+ is_isotree_model,
3234
+ is_compatible,
3235
+ has_combined_objects,
3236
+ has_IsoForest,
3237
+ has_ExtIsoForest,
3238
+ has_Imputer,
3239
+ has_Indexer,
3240
+ has_metadata,
3241
+ size_metadata
3242
+ );
3243
+ }
3244
+
3245
+ void inspect_serialized_object
3246
+ (
3247
+ FILE *serialized_bytes,
3248
+ bool &is_isotree_model,
3249
+ bool &is_compatible,
3250
+ bool &has_combined_objects,
3251
+ bool &has_IsoForest,
3252
+ bool &has_ExtIsoForest,
3253
+ bool &has_Imputer,
3254
+ bool &has_Indexer,
3255
+ bool &has_metadata,
3256
+ size_t &size_metadata
3257
+ )
3258
+ {
3259
+ FILE *in = serialized_bytes;
3260
+ inspect_serialized_object<FILE*>(
3261
+ in,
3262
+ is_isotree_model,
3263
+ is_compatible,
3264
+ has_combined_objects,
3265
+ has_IsoForest,
3266
+ has_ExtIsoForest,
3267
+ has_Imputer,
3268
+ has_Indexer,
3269
+ has_metadata,
3270
+ size_metadata
3271
+ );
3272
+ }
3273
+
3274
+ void inspect_serialized_object
3275
+ (
3276
+ std::istream &serialized_bytes,
3277
+ bool &is_isotree_model,
3278
+ bool &is_compatible,
3279
+ bool &has_combined_objects,
3280
+ bool &has_IsoForest,
3281
+ bool &has_ExtIsoForest,
3282
+ bool &has_Imputer,
3283
+ bool &has_Indexer,
3284
+ bool &has_metadata,
3285
+ size_t &size_metadata
3286
+ )
3287
+ {
3288
+ inspect_serialized_object<std::istream>(
3289
+ serialized_bytes,
3290
+ is_isotree_model,
3291
+ is_compatible,
3292
+ has_combined_objects,
3293
+ has_IsoForest,
3294
+ has_ExtIsoForest,
3295
+ has_Imputer,
3296
+ has_Indexer,
3297
+ has_metadata,
3298
+ size_metadata
3299
+ );
3300
+ }
3301
+
3302
+ template <class Model>
3303
+ bool prev_cols_match(const Model &model, const char *serialized_bytes)
3304
+ {
3305
+ return true;
3306
+ }
3307
+
3308
+ bool prev_cols_match(const Imputer &model, const char *serialized_bytes)
3309
+ {
3310
+ size_t prev[6];
3311
+ read_bytes<size_t>((void*)prev, (size_t)6, serialized_bytes);
3312
+ if (prev[0] != model.ncols_numeric ||
3313
+ prev[1] != model.ncols_categ ||
3314
+ prev[2] != model.ncat.size() ||
3315
+ prev[4] != model.col_means.size() ||
3316
+ prev[5] != model.col_modes.size())
3317
+ {
3318
+ return false;
3319
+ }
3320
+
3321
+ return true;
3322
+ }
3323
+
3324
+ template <class Model>
3325
+ bool check_can_undergo_incremental_serialization(const Model &model, const char *serialized_bytes)
3326
+ {
3327
+ const char *start = serialized_bytes;
3328
+ size_t curr_ntrees = get_ntrees(model);
3329
+
3330
+ bool is_isotree_model;
3331
+ bool is_compatible;
3332
+ bool has_combined_objects;
3333
+ bool has_IsoForest;
3334
+ bool has_ExtIsoForest;
3335
+ bool has_Imputer;
3336
+ bool has_Indexer;
3337
+ bool has_metadata;
3338
+ size_t size_metadata;
3339
+ bool has_same_int_size;
3340
+ bool has_same_size_t_size;
3341
+ bool has_same_endianness;
3342
+ bool lacks_range_penalty;
3343
+ bool lacks_scoring_metric;
3344
+
3345
+ inspect_serialized_object(
3346
+ serialized_bytes,
3347
+ is_isotree_model,
3348
+ is_compatible,
3349
+ has_combined_objects,
3350
+ has_IsoForest,
3351
+ has_ExtIsoForest,
3352
+ has_Imputer,
3353
+ has_Indexer,
3354
+ has_metadata,
3355
+ size_metadata,
3356
+ has_same_int_size,
3357
+ has_same_size_t_size,
3358
+ has_same_endianness,
3359
+ lacks_range_penalty,
3360
+ lacks_scoring_metric
3361
+ );
3362
+
3363
+ if (!is_isotree_model || !is_compatible || has_combined_objects ||
3364
+ !has_same_int_size || !has_same_size_t_size || !has_same_endianness ||
3365
+ lacks_range_penalty || lacks_scoring_metric)
3366
+ return false;
3367
+
3368
+ if (std::is_same<Model, IsoForest>::value) {
3369
+ if (!has_IsoForest || has_ExtIsoForest || has_Imputer || has_Indexer)
3370
+ return false;
3371
+ }
3372
+
3373
+ else if (std::is_same<Model, ExtIsoForest>::value) {
3374
+ if (has_IsoForest || !has_ExtIsoForest || has_Imputer || has_Indexer)
3375
+ return false;
3376
+ }
3377
+
3378
+ else if (std::is_same<Model, Imputer>::value) {
3379
+ if (has_IsoForest || has_ExtIsoForest || !has_Imputer || has_Indexer)
3380
+ return false;
3381
+ }
3382
+
3383
+ else if (std::is_same<Model, TreesIndexer>::value) {
3384
+ if (has_IsoForest || has_ExtIsoForest || has_Imputer || !has_Indexer)
3385
+ return false;
3386
+ }
3387
+
3388
+ else {
3389
+ assert(0);
3390
+ }
3391
+
3392
+ start += get_size_setup_info();
3393
+ start += sizeof(uint8_t);
3394
+ start += sizeof(size_t);
3395
+
3396
+ if (std::is_same<Model, IsoForest>::value) {
3397
+ start += sizeof(uint8_t) * 4;
3398
+ start += sizeof(double) * 2;
3399
+ start += sizeof(size_t);
3400
+ }
3401
+
3402
+ else if (std::is_same<Model, ExtIsoForest>::value) {
3403
+ start += sizeof(uint8_t) * 4;
3404
+ start += sizeof(double) * 2;
3405
+ start += sizeof(size_t);
3406
+ }
3407
+
3408
+ else if (std::is_same<Model, Imputer>::value) {
3409
+ if (!prev_cols_match(model, start))
3410
+ return false;
3411
+ start += sizeof(size_t) * 3;
3412
+ }
3413
+
3414
+ else if (std::is_same<Model, TreesIndexer>::value) {
3415
+ /* Nothing is required here */
3416
+ }
3417
+
3418
+ else {
3419
+ assert(0);
3420
+ }
3421
+
3422
+ size_t old_ntrees;
3423
+ memcpy(&old_ntrees, start, sizeof(size_t));
3424
+ if (old_ntrees > curr_ntrees)
3425
+ return false;
3426
+
3427
+ return true;
3428
+ }
3429
+
3430
+ bool check_can_undergo_incremental_serialization(const IsoForest &model, const char *serialized_bytes)
3431
+ {
3432
+ return check_can_undergo_incremental_serialization<IsoForest>(model, serialized_bytes);
3433
+ }
3434
+
3435
+ bool check_can_undergo_incremental_serialization(const ExtIsoForest &model, const char *serialized_bytes)
3436
+ {
3437
+ return check_can_undergo_incremental_serialization<ExtIsoForest>(model, serialized_bytes);
3438
+ }
3439
+
3440
+ bool check_can_undergo_incremental_serialization(const Imputer &model, const char *serialized_bytes)
3441
+ {
3442
+ return check_can_undergo_incremental_serialization<Imputer>(model, serialized_bytes);
3443
+ }
3444
+
3445
+ bool check_can_undergo_incremental_serialization(const TreesIndexer &model, const char *serialized_bytes)
3446
+ {
3447
+ return check_can_undergo_incremental_serialization<TreesIndexer>(model, serialized_bytes);
3448
+ }
3449
+
3450
+ size_t determine_serialized_size_combined
3451
+ (
3452
+ const IsoForest *model,
3453
+ const ExtIsoForest *model_ext,
3454
+ const Imputer *imputer,
3455
+ const TreesIndexer *indexer,
3456
+ const size_t size_optional_metadata
3457
+ ) noexcept
3458
+ {
3459
+ size_t n_bytes = get_size_setup_info();
3460
+ n_bytes += 3 * sizeof(uint8_t);
3461
+ n_bytes += 5 * sizeof(size_t);
3462
+
3463
+ if (model != NULL)
3464
+ n_bytes += get_size_model(*model);
3465
+ else
3466
+ n_bytes += get_size_model(*model_ext);
3467
+ if (imputer != NULL)
3468
+ n_bytes += get_size_model(*imputer);
3469
+ if (indexer != NULL)
3470
+ n_bytes += get_size_model(*indexer);
3471
+
3472
+ n_bytes += get_size_ending_metadata();
3473
+ return n_bytes;
3474
+ }
3475
+
3476
+ template <class otype>
3477
+ void serialize_combined
3478
+ (
3479
+ const IsoForest *model,
3480
+ const ExtIsoForest *model_ext,
3481
+ const Imputer *imputer,
3482
+ const TreesIndexer *indexer,
3483
+ const char *optional_metadata,
3484
+ const size_t size_optional_metadata,
3485
+ otype &out
3486
+ )
3487
+ {
3488
+ SignalSwitcher ss = SignalSwitcher();
3489
+
3490
+ auto pos_watermark = set_return_position(out);
3491
+
3492
+ add_setup_info(out, false);
3493
+ uint8_t model_type = AllObjectsCombined;
3494
+ write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
3495
+
3496
+ if (model != NULL)
3497
+ {
3498
+
3499
+ if (!size_optional_metadata)
3500
+ {
3501
+ if (imputer == NULL) {
3502
+ if (indexer == NULL)
3503
+ model_type = HasSingleVarModelNext;
3504
+ else
3505
+ model_type = HasSingleVarModelPlusIndexerNext;
3506
+ }
3507
+ else {
3508
+ if (indexer == NULL)
3509
+ model_type = HasSingleVarModelPlusImputerNext;
3510
+ else
3511
+ model_type = HasSingleVarModelPlusImputerPlusIndexerNext;
3512
+ }
3513
+ }
3514
+
3515
+ else
3516
+ {
3517
+ if (imputer == NULL) {
3518
+ if (indexer == NULL)
3519
+ model_type = HasSingleVarModelPlusMetadataNext;
3520
+ else
3521
+ model_type = HasSingleVarModelPlusIndexerPlusMetadataNext;
3522
+ }
3523
+ else {
3524
+ if (indexer == NULL)
3525
+ model_type = HasSingleVarModelPlusImputerPlusMetadataNext;
3526
+ else
3527
+ model_type = HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext;
3528
+ }
3529
+ }
3530
+
3531
+ }
3532
+
3533
+ else if (model_ext != NULL)
3534
+ {
3535
+
3536
+ if (!size_optional_metadata)
3537
+ {
3538
+ if (imputer == NULL) {
3539
+ if (indexer == NULL)
3540
+ model_type = HasExtModelNext;
3541
+ else
3542
+ model_type = HasExtModelPlusIndexerNext;
3543
+ }
3544
+ else {
3545
+ if (indexer == NULL)
3546
+ model_type = HasExtModelPlusImputerNext;
3547
+ else
3548
+ model_type = HasExtModelPlusImputerPlusIndexerNext;
3549
+ }
3550
+ }
3551
+
3552
+ else
3553
+ {
3554
+ if (imputer == NULL) {
3555
+ if (indexer == NULL)
3556
+ model_type = HasExtModelPlusMetadataNext;
3557
+ else
3558
+ model_type = HasExtModelPlusIndexerPlusMetadataNext;
3559
+ }
3560
+ else {
3561
+ if (indexer == NULL)
3562
+ model_type = HasExtModelPlusImputerPlusMetadataNext;
3563
+ else
3564
+ model_type = HasExtModelPlusImputerPlusIndexerPlusMetadataNext;
3565
+ }
3566
+ }
3567
+ }
3568
+
3569
+ else {
3570
+ throw std::runtime_error("Must pass one of 'model' or 'model_ext'.\n");
3571
+ }
3572
+
3573
+ write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
3574
+
3575
+ size_t size_model;
3576
+ if (model != NULL)
3577
+ size_model = get_size_model(*model);
3578
+ else
3579
+ size_model = get_size_model(*model_ext);
3580
+ write_bytes<size_t>((void*)&size_model, (size_t)1, out);
3581
+
3582
+ if (imputer != NULL)
3583
+ size_model = get_size_model(*imputer);
3584
+ else
3585
+ size_model = 0;
3586
+ write_bytes<size_t>((void*)&size_model, (size_t)1, out);
3587
+
3588
+ if (indexer != NULL)
3589
+ size_model = get_size_model(*indexer);
3590
+ else
3591
+ size_model = 0;
3592
+ write_bytes<size_t>((void*)&size_model, (size_t)1, out);
3593
+
3594
+ write_bytes<size_t>((void*)&size_optional_metadata, (size_t)1, out);
3595
+
3596
+
3597
+ check_interrupt_switch(ss);
3598
+
3599
+ if (model != NULL)
3600
+ serialize_model(*model, out);
3601
+ else
3602
+ serialize_model(*model_ext, out);
3603
+
3604
+ if (imputer != NULL)
3605
+ serialize_model(*imputer, out);
3606
+
3607
+ if (indexer != NULL)
3608
+ serialize_model(*indexer, out);
3609
+
3610
+ if (size_optional_metadata)
3611
+ write_bytes<char>((void*)optional_metadata, size_optional_metadata, out);
3612
+
3613
+ check_interrupt_switch(ss);
3614
+
3615
+ uint8_t ending_type = (uint8_t)EndsHere;
3616
+ write_bytes<uint8_t>((void*)&ending_type, (size_t)1, out);
3617
+ size_t jump_ahead = 0;
3618
+ write_bytes<size_t>((void*)&jump_ahead, (size_t)1, out);
3619
+
3620
+ auto end_pos = set_return_position(out);
3621
+ return_to_position(out, pos_watermark);
3622
+ add_full_watermark(out);
3623
+ return_to_position(out, end_pos);
3624
+ }
3625
+
3626
+ void serialize_combined
3627
+ (
3628
+ const IsoForest *model,
3629
+ const ExtIsoForest *model_ext,
3630
+ const Imputer *imputer,
3631
+ const TreesIndexer *indexer,
3632
+ const char *optional_metadata,
3633
+ const size_t size_optional_metadata,
3634
+ char *out
3635
+ )
3636
+ {
3637
+ serialize_combined<char*>(
3638
+ model,
3639
+ model_ext,
3640
+ imputer,
3641
+ indexer,
3642
+ optional_metadata,
3643
+ size_optional_metadata,
3644
+ out
3645
+ );
3646
+ }
3647
+
3648
+ void serialize_combined
3649
+ (
3650
+ const IsoForest *model,
3651
+ const ExtIsoForest *model_ext,
3652
+ const Imputer *imputer,
3653
+ const TreesIndexer *indexer,
3654
+ const char *optional_metadata,
3655
+ const size_t size_optional_metadata,
3656
+ FILE *out
3657
+ )
3658
+ {
3659
+ serialize_combined<FILE*>(
3660
+ model,
3661
+ model_ext,
3662
+ imputer,
3663
+ indexer,
3664
+ optional_metadata,
3665
+ size_optional_metadata,
3666
+ out
3667
+ );
3668
+ }
3669
+
3670
+ void serialize_combined
3671
+ (
3672
+ const IsoForest *model,
3673
+ const ExtIsoForest *model_ext,
3674
+ const Imputer *imputer,
3675
+ const TreesIndexer *indexer,
3676
+ const char *optional_metadata,
3677
+ const size_t size_optional_metadata,
3678
+ std::ostream &out
3679
+ )
3680
+ {
3681
+ serialize_combined<std::ostream>(
3682
+ model,
3683
+ model_ext,
3684
+ imputer,
3685
+ indexer,
3686
+ optional_metadata,
3687
+ size_optional_metadata,
3688
+ out
3689
+ );
3690
+ }
3691
+
3692
+ std::string serialize_combined
3693
+ (
3694
+ const IsoForest *model,
3695
+ const ExtIsoForest *model_ext,
3696
+ const Imputer *imputer,
3697
+ const TreesIndexer *indexer,
3698
+ const char *optional_metadata,
3699
+ const size_t size_optional_metadata
3700
+ )
3701
+ {
3702
+ std::string serialized;
3703
+ serialized.resize(determine_serialized_size_combined(model, model_ext, imputer, indexer, size_optional_metadata));
3704
+ char *ptr = &serialized[0];
3705
+ serialize_combined(model, model_ext, imputer, indexer, optional_metadata, size_optional_metadata, ptr);
3706
+ return serialized;
3707
+ }
3708
+
3709
+ size_t determine_serialized_size_combined
3710
+ (
3711
+ const char *serialized_model,
3712
+ const char *serialized_model_ext,
3713
+ const char *serialized_imputer,
3714
+ const char *serialized_indexer,
3715
+ const size_t size_optional_metadata
3716
+ ) noexcept
3717
+ {
3718
+ size_t n_bytes = get_size_setup_info();
3719
+ n_bytes += 3 * sizeof(uint8_t);
3720
+ n_bytes += 5 * sizeof(size_t);
3721
+
3722
+ size_t model_size;
3723
+
3724
+ if (serialized_model != NULL)
3725
+ memcpy(&model_size, serialized_model + get_size_setup_info() + sizeof(uint8_t), sizeof(size_t));
3726
+ else
3727
+ memcpy(&model_size, serialized_model_ext + get_size_setup_info() + sizeof(uint8_t), sizeof(size_t));
3728
+ n_bytes += model_size;
3729
+ if (serialized_imputer != NULL) {
3730
+ memcpy(&model_size, serialized_imputer + get_size_setup_info() + sizeof(uint8_t), sizeof(size_t));
3731
+ n_bytes += model_size;
3732
+ }
3733
+ if (serialized_indexer != NULL) {
3734
+ memcpy(&model_size, serialized_indexer + get_size_setup_info() + sizeof(uint8_t), sizeof(size_t));
3735
+ n_bytes += model_size;
3736
+ }
3737
+
3738
+ n_bytes += size_optional_metadata;
3739
+
3740
+ n_bytes += get_size_ending_metadata();
3741
+ return n_bytes;
3742
+ }
3743
+
3744
+ template <class otype>
3745
+ void serialize_combined
3746
+ (
3747
+ const char *serialized_model,
3748
+ const char *serialized_model_ext,
3749
+ const char *serialized_imputer,
3750
+ const char *serialized_indexer,
3751
+ const char *optional_metadata,
3752
+ const size_t size_optional_metadata,
3753
+ otype &out
3754
+ )
3755
+ {
3756
+ SignalSwitcher ss = SignalSwitcher();
3757
+
3758
+ std::unique_ptr<char[]> curr_setup(new char[get_size_setup_info()]);
3759
+ char *ptr_curr_setup = curr_setup.get();
3760
+ add_setup_info(ptr_curr_setup, true);
3761
+ auto pos_watermark = set_return_position(out);
3762
+ add_setup_info(out, false);
3763
+
3764
+ uint8_t model_type = AllObjectsCombined;
3765
+ write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
3766
+
3767
+ if (serialized_model != NULL)
3768
+ {
3769
+ if (!size_optional_metadata)
3770
+ {
3771
+ if (serialized_imputer == NULL) {
3772
+ if (serialized_indexer == NULL)
3773
+ model_type = HasSingleVarModelNext;
3774
+ else
3775
+ model_type = HasSingleVarModelPlusIndexerNext;
3776
+ }
3777
+ else {
3778
+ if (serialized_indexer == NULL)
3779
+ model_type = HasSingleVarModelPlusImputerNext;
3780
+ else
3781
+ model_type = HasSingleVarModelPlusImputerPlusIndexerNext;
3782
+ }
3783
+ }
3784
+
3785
+ else
3786
+ {
3787
+ if (serialized_imputer == NULL) {
3788
+ if (serialized_indexer == NULL)
3789
+ model_type = HasSingleVarModelPlusMetadataNext;
3790
+ else
3791
+ model_type = HasSingleVarModelPlusIndexerPlusMetadataNext;
3792
+ }
3793
+ else {
3794
+ if (serialized_indexer == NULL)
3795
+ model_type = HasSingleVarModelPlusImputerPlusMetadataNext;
3796
+ else
3797
+ model_type = HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext;
3798
+ }
3799
+ }
3800
+ }
3801
+
3802
+ else
3803
+ {
3804
+ if (!size_optional_metadata)
3805
+ {
3806
+ if (serialized_imputer == NULL) {
3807
+ if (serialized_indexer == NULL)
3808
+ model_type = HasExtModelNext;
3809
+ else
3810
+ model_type = HasExtModelPlusIndexerNext;
3811
+ }
3812
+ else {
3813
+ if (serialized_indexer == NULL)
3814
+ model_type = HasExtModelPlusImputerNext;
3815
+ else
3816
+ model_type = HasExtModelPlusImputerPlusIndexerNext;
3817
+ }
3818
+ }
3819
+
3820
+ else
3821
+ {
3822
+ if (serialized_imputer == NULL) {
3823
+ if (serialized_indexer == NULL)
3824
+ model_type = HasExtModelPlusMetadataNext;
3825
+ else
3826
+ model_type = HasExtModelPlusIndexerPlusMetadataNext;
3827
+ }
3828
+ else {
3829
+ if (serialized_indexer == NULL)
3830
+ model_type = HasExtModelPlusImputerPlusMetadataNext;
3831
+ else
3832
+ model_type = HasExtModelPlusImputerPlusIndexerPlusMetadataNext;
3833
+ }
3834
+ }
3835
+ }
3836
+
3837
+ write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
3838
+
3839
+ size_t model_size;
3840
+ size_t size_model1, size_model2, size_model3, size_model4;
3841
+
3842
+ std::unique_ptr<char[]> new_model;
3843
+ if (serialized_model != NULL)
3844
+ {
3845
+ if (memcmp(curr_setup.get(), serialized_model, get_size_setup_info()))
3846
+ {
3847
+ fprintf(stderr, "Warning: 'model' was serialized in a different setup, will need to convert.\n");
3848
+ IsoForest model;
3849
+ deserialization_pipeline(model, serialized_model);
3850
+ new_model = std::unique_ptr<char[]>(new char[get_size_model(model)]);
3851
+ char *ptr_new_model_ser = new_model.get();
3852
+ serialization_pipeline(model, ptr_new_model_ser);
3853
+ serialized_model = new_model.get();
3854
+ }
3855
+ serialized_model += get_size_setup_info() + sizeof(uint8_t);
3856
+ memcpy(&model_size, serialized_model, sizeof(size_t));
3857
+ serialized_model += sizeof(size_t);
3858
+ size_model1 = model_size;
3859
+ }
3860
+
3861
+ else
3862
+ {
3863
+ if (memcmp(curr_setup.get(), serialized_model_ext, get_size_setup_info()))
3864
+ {
3865
+ fprintf(stderr, "Warning: 'model_ext' was serialized in a different setup, will need to convert.\n");
3866
+ ExtIsoForest model;
3867
+ deserialization_pipeline(model, serialized_model_ext);
3868
+ new_model = std::unique_ptr<char[]>(new char[get_size_model(model)]);
3869
+ char *ptr_new_model_ser = new_model.get();
3870
+ serialization_pipeline(model, ptr_new_model_ser);
3871
+ serialized_model_ext = new_model.get();
3872
+ }
3873
+ serialized_model_ext += get_size_setup_info() + sizeof(uint8_t);
3874
+ memcpy(&model_size, serialized_model_ext, sizeof(size_t));
3875
+ serialized_model_ext += sizeof(size_t);
3876
+ size_model2 = model_size;
3877
+ }
3878
+
3879
+ check_interrupt_switch(ss);
3880
+
3881
+ write_bytes<size_t>((void*)&model_size, (size_t)1, out);
3882
+
3883
+ if (serialized_imputer != NULL)
3884
+ {
3885
+ if (memcmp(curr_setup.get(), serialized_imputer, get_size_setup_info()))
3886
+ {
3887
+ fprintf(stderr, "Warning: 'imputer' was serialized in a different setup, will need to convert.\n");
3888
+ Imputer model;
3889
+ deserialization_pipeline(model, serialized_imputer);
3890
+ new_model = std::unique_ptr<char[]>(new char[get_size_model(model)]);
3891
+ char *ptr_new_model_ser = new_model.get();
3892
+ serialization_pipeline(model, ptr_new_model_ser);
3893
+ serialized_imputer = new_model.get();
3894
+ }
3895
+ serialized_imputer += get_size_setup_info() + sizeof(uint8_t);
3896
+ memcpy(&model_size, serialized_imputer, sizeof(size_t));
3897
+ serialized_imputer += sizeof(size_t);
3898
+ size_model3 = model_size;
3899
+ }
3900
+
3901
+ else {
3902
+ model_size = 0;
3903
+ }
3904
+ write_bytes<size_t>((void*)&model_size, (size_t)1, out);
3905
+
3906
+ if (serialized_indexer != NULL)
3907
+ {
3908
+ if (memcmp(curr_setup.get(), serialized_indexer, get_size_setup_info()))
3909
+ {
3910
+ fprintf(stderr, "Warning: 'indexer' was serialized in a different setup, will need to convert.\n");
3911
+ TreesIndexer model;
3912
+ deserialization_pipeline(model, serialized_indexer);
3913
+ new_model = std::unique_ptr<char[]>(new char[get_size_model(model)]);
3914
+ char *ptr_new_model_ser = new_model.get();
3915
+ serialization_pipeline(model, ptr_new_model_ser);
3916
+ serialized_indexer = new_model.get();
3917
+ }
3918
+ serialized_indexer += get_size_setup_info() + sizeof(uint8_t);
3919
+ memcpy(&model_size, serialized_indexer, sizeof(size_t));
3920
+ serialized_indexer += sizeof(size_t);
3921
+ size_model4 = model_size;
3922
+ }
3923
+
3924
+ else {
3925
+ model_size = 0;
3926
+ }
3927
+ write_bytes<size_t>((void*)&model_size, (size_t)1, out);
3928
+
3929
+ check_interrupt_switch(ss);
3930
+
3931
+ write_bytes<size_t>((void*)&size_optional_metadata, (size_t)1, out);
3932
+
3933
+ if (serialized_model != NULL)
3934
+ write_bytes<char>((void*)serialized_model, size_model1, out);
3935
+ else
3936
+ write_bytes<char>((void*)serialized_model_ext, size_model2, out);
3937
+ if (serialized_imputer != NULL)
3938
+ write_bytes<char>((void*)serialized_imputer, size_model3, out);
3939
+ if (serialized_indexer != NULL)
3940
+ write_bytes<char>((void*)serialized_indexer, size_model4, out);
3941
+
3942
+ if (size_optional_metadata)
3943
+ write_bytes<char>((void*)optional_metadata, size_optional_metadata, out);
3944
+
3945
+ check_interrupt_switch(ss);
3946
+
3947
+ uint8_t ending_type = (uint8_t)EndsHere;
3948
+ write_bytes<uint8_t>((void*)&ending_type, (size_t)1, out);
3949
+ size_t jump_ahead = 0;
3950
+ write_bytes<size_t>((void*)&jump_ahead, (size_t)1, out);
3951
+
3952
+ auto end_pos = set_return_position(out);
3953
+ return_to_position(out, pos_watermark);
3954
+ add_full_watermark(out);
3955
+ return_to_position(out, end_pos);
3956
+ }
3957
+
3958
+ void serialize_combined
3959
+ (
3960
+ const char *serialized_model,
3961
+ const char *serialized_model_ext,
3962
+ const char *serialized_imputer,
3963
+ const char *serialized_indexer,
3964
+ const char *optional_metadata,
3965
+ const size_t size_optional_metadata,
3966
+ FILE *out
3967
+ )
3968
+ {
3969
+ serialize_combined<FILE*>(
3970
+ serialized_model,
3971
+ serialized_model_ext,
3972
+ serialized_imputer,
3973
+ serialized_indexer,
3974
+ optional_metadata,
3975
+ size_optional_metadata,
3976
+ out
3977
+ );
3978
+ }
3979
+
3980
+ void serialize_combined
3981
+ (
3982
+ const char *serialized_model,
3983
+ const char *serialized_model_ext,
3984
+ const char *serialized_imputer,
3985
+ const char *serialized_indexer,
3986
+ const char *optional_metadata,
3987
+ const size_t size_optional_metadata,
3988
+ std::ostream &out
3989
+ )
3990
+ {
3991
+ serialize_combined<std::ostream>(
3992
+ serialized_model,
3993
+ serialized_model_ext,
3994
+ serialized_imputer,
3995
+ serialized_indexer,
3996
+ optional_metadata,
3997
+ size_optional_metadata,
3998
+ out
3999
+ );
4000
+ }
4001
+
4002
+ std::string serialize_combined
4003
+ (
4004
+ const char *serialized_model,
4005
+ const char *serialized_model_ext,
4006
+ const char *serialized_imputer,
4007
+ const char *serialized_indexer,
4008
+ const char *optional_metadata,
4009
+ const size_t size_optional_metadata
4010
+ )
4011
+ {
4012
+ std::string serialized;
4013
+ serialized.resize(
4014
+ determine_serialized_size_combined(
4015
+ serialized_model,
4016
+ serialized_model_ext,
4017
+ serialized_imputer,
4018
+ serialized_indexer,
4019
+ size_optional_metadata
4020
+ )
4021
+ );
4022
+ char *ptr = &serialized[0];
4023
+ serialize_combined(
4024
+ serialized_model,
4025
+ serialized_model_ext,
4026
+ serialized_imputer,
4027
+ serialized_indexer,
4028
+ optional_metadata,
4029
+ size_optional_metadata,
4030
+ ptr
4031
+ );
4032
+ return serialized;
4033
+ }
4034
+
4035
+ template <class Model, class itype>
4036
+ void deserialize_model
4037
+ (
4038
+ Model &model,
4039
+ itype &in,
4040
+ const bool has_same_endianness,
4041
+ const bool has_same_int_size,
4042
+ const bool has_same_size_t_size,
4043
+ const PlatformSize saved_int_t,
4044
+ const PlatformSize saved_size_t,
4045
+ const bool lacks_range_penalty,
4046
+ const bool lacks_scoring_metric
4047
+ )
4048
+ {
4049
+ if (has_same_endianness && has_same_int_size && has_same_size_t_size && !lacks_range_penalty && !lacks_scoring_metric)
4050
+ {
4051
+ deserialize_model(model, in);
4052
+ return;
4053
+ }
4054
+
4055
+ std::vector<char> buffer;
4056
+
4057
+ if (saved_int_t == Is16Bit && saved_size_t == Is32Bit)
4058
+ {
4059
+ deserialize_model<itype, int16_t, uint32_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
4060
+ }
4061
+
4062
+ else if (saved_int_t == Is32Bit && saved_size_t == Is32Bit)
4063
+ {
4064
+ deserialize_model<itype, int32_t, uint32_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
4065
+ }
4066
+
4067
+ else if (saved_int_t == Is64Bit && saved_size_t == Is32Bit)
4068
+ {
4069
+ deserialize_model<itype, int64_t, uint32_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
4070
+ }
4071
+
4072
+ else if (saved_int_t == Is16Bit && saved_size_t == Is64Bit)
4073
+ {
4074
+ deserialize_model<itype, int16_t, uint64_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
4075
+ }
4076
+
4077
+ else if (saved_int_t == Is32Bit && saved_size_t == Is64Bit)
4078
+ {
4079
+ deserialize_model<itype, int32_t, uint64_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
4080
+ }
4081
+
4082
+ else if (saved_int_t == Is64Bit && saved_size_t == Is64Bit)
4083
+ {
4084
+ deserialize_model<itype, int16_t, uint64_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
4085
+ }
4086
+
4087
+ else
4088
+ {
4089
+ unexpected_error();
4090
+ }
4091
+ }
4092
+
4093
+ template <class itype>
4094
+ void deserialize_combined
4095
+ (
4096
+ itype &in,
4097
+ IsoForest *model,
4098
+ ExtIsoForest *model_ext,
4099
+ Imputer *imputer,
4100
+ TreesIndexer *indexer,
4101
+ char *optional_metadata
4102
+ )
4103
+ {
4104
+ SignalSwitcher ss = SignalSwitcher();
4105
+
4106
+ bool has_same_int_size;
4107
+ bool has_same_size_t_size;
4108
+ bool has_same_endianness;
4109
+ PlatformSize saved_int_t;
4110
+ PlatformSize saved_size_t;
4111
+ PlatformEndianness saved_endian;
4112
+ bool lacks_range_penalty;
4113
+ bool lacks_scoring_metric;
4114
+ bool lacks_indexer;
4115
+
4116
+ check_setup_info(
4117
+ in,
4118
+ has_same_int_size,
4119
+ has_same_size_t_size,
4120
+ has_same_endianness,
4121
+ saved_int_t,
4122
+ saved_size_t,
4123
+ saved_endian,
4124
+ lacks_range_penalty,
4125
+ lacks_scoring_metric,
4126
+ lacks_indexer
4127
+ );
4128
+
4129
+ uint8_t model_in;
4130
+ read_bytes<uint8_t>((void*)&model_in, (size_t)1, in);
4131
+ if (model_in != AllObjectsCombined)
4132
+ throw std::runtime_error("Object to de-serialize was not created through 'serialize_combined'.\n");
4133
+
4134
+ read_bytes<uint8_t>((void*)&model_in, (size_t)1, in);
4135
+
4136
+ size_t size_model[4];
4137
+ size_t size_metadata;
4138
+ if (!lacks_indexer)
4139
+ {
4140
+ read_bytes_size_t((void*)size_model, (size_t)4, in, saved_size_t, has_same_endianness);
4141
+ size_metadata = size_model[3];
4142
+ }
4143
+
4144
+ else
4145
+ {
4146
+ read_bytes_size_t((void*)size_model, (size_t)3, in, saved_size_t, has_same_endianness);
4147
+ size_metadata = size_model[2];
4148
+ size_model[2] = 0;
4149
+ size_model[3] = size_metadata;
4150
+ }
4151
+
4152
+ switch (model_in)
4153
+ {
4154
+ case HasSingleVarModelNext:
4155
+ {
4156
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4157
+ break;
4158
+ }
4159
+ case HasSingleVarModelPlusIndexerNext:
4160
+ {
4161
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4162
+ check_interrupt_switch(ss);
4163
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4164
+ break;
4165
+ }
4166
+ case HasExtModelNext:
4167
+ {
4168
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4169
+ break;
4170
+ }
4171
+ case HasExtModelPlusIndexerNext:
4172
+ {
4173
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4174
+ check_interrupt_switch(ss);
4175
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4176
+ break;
4177
+ }
4178
+ case HasSingleVarModelPlusImputerNext:
4179
+ {
4180
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4181
+ check_interrupt_switch(ss);
4182
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4183
+ break;
4184
+ }
4185
+ case HasSingleVarModelPlusImputerPlusIndexerNext:
4186
+ {
4187
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4188
+ check_interrupt_switch(ss);
4189
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4190
+ check_interrupt_switch(ss);
4191
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4192
+ break;
4193
+ }
4194
+ case HasExtModelPlusImputerNext:
4195
+ {
4196
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4197
+ check_interrupt_switch(ss);
4198
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4199
+ break;
4200
+ }
4201
+ case HasExtModelPlusImputerPlusIndexerNext:
4202
+ {
4203
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4204
+ check_interrupt_switch(ss);
4205
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4206
+ check_interrupt_switch(ss);
4207
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4208
+ break;
4209
+ }
4210
+ case HasSingleVarModelPlusMetadataNext:
4211
+ {
4212
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4213
+ check_interrupt_switch(ss);
4214
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4215
+ break;
4216
+ }
4217
+ case HasSingleVarModelPlusIndexerPlusMetadataNext:
4218
+ {
4219
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4220
+ check_interrupt_switch(ss);
4221
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4222
+ check_interrupt_switch(ss);
4223
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4224
+ break;
4225
+ }
4226
+ case HasExtModelPlusMetadataNext:
4227
+ {
4228
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4229
+ check_interrupt_switch(ss);
4230
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4231
+ break;
4232
+ }
4233
+ case HasExtModelPlusIndexerPlusMetadataNext:
4234
+ {
4235
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4236
+ check_interrupt_switch(ss);
4237
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4238
+ check_interrupt_switch(ss);
4239
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4240
+ break;
4241
+ }
4242
+ case HasSingleVarModelPlusImputerPlusMetadataNext:
4243
+ {
4244
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4245
+ check_interrupt_switch(ss);
4246
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4247
+ check_interrupt_switch(ss);
4248
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4249
+ break;
4250
+ }
4251
+ case HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext:
4252
+ {
4253
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4254
+ check_interrupt_switch(ss);
4255
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4256
+ check_interrupt_switch(ss);
4257
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4258
+ check_interrupt_switch(ss);
4259
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4260
+ break;
4261
+ }
4262
+ case HasExtModelPlusImputerPlusMetadataNext:
4263
+ {
4264
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4265
+ check_interrupt_switch(ss);
4266
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4267
+ check_interrupt_switch(ss);
4268
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4269
+ break;
4270
+ }
4271
+ case HasExtModelPlusImputerPlusIndexerPlusMetadataNext:
4272
+ {
4273
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4274
+ check_interrupt_switch(ss);
4275
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4276
+ check_interrupt_switch(ss);
4277
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4278
+ check_interrupt_switch(ss);
4279
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4280
+ break;
4281
+ }
4282
+
4283
+ default:
4284
+ {
4285
+ throw std::runtime_error("Serialized format is incompatible.\n");
4286
+ }
4287
+ }
4288
+ }
4289
+
4290
+ void deserialize_combined
4291
+ (
4292
+ const char* in,
4293
+ IsoForest *model,
4294
+ ExtIsoForest *model_ext,
4295
+ Imputer *imputer,
4296
+ TreesIndexer *indexer,
4297
+ char *optional_metadata
4298
+ )
4299
+ {
4300
+ deserialize_combined<const char*>(
4301
+ in,
4302
+ model,
4303
+ model_ext,
4304
+ imputer,
4305
+ indexer,
4306
+ optional_metadata
4307
+ );
4308
+ }
4309
+
4310
+ void deserialize_combined
4311
+ (
4312
+ FILE* in,
4313
+ IsoForest *model,
4314
+ ExtIsoForest *model_ext,
4315
+ Imputer *imputer,
4316
+ TreesIndexer *indexer,
4317
+ char *optional_metadata
4318
+ )
4319
+ {
4320
+ deserialize_combined<FILE*>(
4321
+ in,
4322
+ model,
4323
+ model_ext,
4324
+ imputer,
4325
+ indexer,
4326
+ optional_metadata
4327
+ );
4328
+ }
4329
+
4330
+ void deserialize_combined
4331
+ (
4332
+ std::istream &in,
4333
+ IsoForest *model,
4334
+ ExtIsoForest *model_ext,
4335
+ Imputer *imputer,
4336
+ TreesIndexer *indexer,
4337
+ char *optional_metadata
4338
+ )
4339
+ {
4340
+ deserialize_combined<std::istream>(
4341
+ in,
4342
+ model,
4343
+ model_ext,
4344
+ imputer,
4345
+ indexer,
4346
+ optional_metadata
4347
+ );
4348
+ }
4349
+
4350
+ void deserialize_combined
4351
+ (
4352
+ const std::string &in,
4353
+ IsoForest *model,
4354
+ ExtIsoForest *model_ext,
4355
+ Imputer *imputer,
4356
+ TreesIndexer *indexer,
4357
+ char *optional_metadata
4358
+ )
4359
+ {
4360
+ const char *ptr = &in[0];
4361
+ deserialize_combined<const char*>(
4362
+ ptr,
4363
+ model,
4364
+ model_ext,
4365
+ imputer,
4366
+ indexer,
4367
+ optional_metadata
4368
+ );
4369
+ }
4370
+
4371
+ bool check_model_has_range_penalty(const IsoForest &model) noexcept
4372
+ {
4373
+ for (const auto &tree : model.trees)
4374
+ {
4375
+ for (const auto &node : tree)
4376
+ {
4377
+ if (node.score < 0 && node.col_type == Numeric)
4378
+ {
4379
+ if (node.range_low > -HUGE_VAL && node.range_high < HUGE_VAL)
4380
+ return true;
4381
+ }
4382
+ }
4383
+ }
4384
+
4385
+ return false;
4386
+ }
4387
+
4388
+ bool check_model_has_range_penalty(const ExtIsoForest &model) noexcept
4389
+ {
4390
+ for (const auto &tree : model.hplanes)
4391
+ {
4392
+ for (const auto &node : tree)
4393
+ {
4394
+ if (node.score < 0)
4395
+ {
4396
+ if (node.range_low > -HUGE_VAL && node.range_high < HUGE_VAL)
4397
+ return true;
4398
+ }
4399
+ }
4400
+ }
4401
+
4402
+ return false;
4403
+ }
4404
+
4405
+ void add_range_penalty(IsoForest &model) noexcept
4406
+ {
4407
+ model.has_range_penalty = check_model_has_range_penalty(model);
4408
+ }
4409
+
4410
+ void add_range_penalty(ExtIsoForest &model) noexcept
4411
+ {
4412
+ model.has_range_penalty = check_model_has_range_penalty(model);
4413
+ }
4414
+
4415
+ void add_range_penalty(Imputer &model) noexcept
4416
+ {
4417
+
4418
+ }
4419
+
4420
+ void add_range_penalty(TreesIndexer &model) noexcept
4421
+ {
4422
+
4423
+ }