isotree 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (151) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -1
  3. data/LICENSE.txt +2 -2
  4. data/README.md +32 -14
  5. data/ext/isotree/ext.cpp +144 -31
  6. data/ext/isotree/extconf.rb +7 -7
  7. data/lib/isotree/isolation_forest.rb +110 -30
  8. data/lib/isotree/version.rb +1 -1
  9. data/vendor/isotree/LICENSE +1 -1
  10. data/vendor/isotree/README.md +165 -27
  11. data/vendor/isotree/include/isotree.hpp +2111 -0
  12. data/vendor/isotree/include/isotree_oop.hpp +394 -0
  13. data/vendor/isotree/inst/COPYRIGHTS +62 -0
  14. data/vendor/isotree/src/RcppExports.cpp +525 -52
  15. data/vendor/isotree/src/Rwrapper.cpp +1931 -268
  16. data/vendor/isotree/src/c_interface.cpp +953 -0
  17. data/vendor/isotree/src/crit.hpp +4232 -0
  18. data/vendor/isotree/src/dist.hpp +1886 -0
  19. data/vendor/isotree/src/exp_depth_table.hpp +134 -0
  20. data/vendor/isotree/src/extended.hpp +1444 -0
  21. data/vendor/isotree/src/external_facing_generic.hpp +399 -0
  22. data/vendor/isotree/src/fit_model.hpp +2401 -0
  23. data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
  24. data/vendor/isotree/src/helpers_iforest.hpp +813 -0
  25. data/vendor/isotree/src/{impute.cpp → impute.hpp} +353 -122
  26. data/vendor/isotree/src/indexer.cpp +515 -0
  27. data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
  28. data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
  29. data/vendor/isotree/src/isoforest.hpp +1659 -0
  30. data/vendor/isotree/src/isotree.hpp +1804 -392
  31. data/vendor/isotree/src/isotree_exportable.hpp +99 -0
  32. data/vendor/isotree/src/merge_models.cpp +159 -16
  33. data/vendor/isotree/src/mult.hpp +1321 -0
  34. data/vendor/isotree/src/oop_interface.cpp +842 -0
  35. data/vendor/isotree/src/oop_interface.hpp +278 -0
  36. data/vendor/isotree/src/other_helpers.hpp +219 -0
  37. data/vendor/isotree/src/predict.hpp +1932 -0
  38. data/vendor/isotree/src/python_helpers.hpp +134 -0
  39. data/vendor/isotree/src/ref_indexer.hpp +154 -0
  40. data/vendor/isotree/src/robinmap/LICENSE +21 -0
  41. data/vendor/isotree/src/robinmap/README.md +483 -0
  42. data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
  43. data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1620 -0
  44. data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
  45. data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
  46. data/vendor/isotree/src/serialize.cpp +4300 -139
  47. data/vendor/isotree/src/sql.cpp +141 -59
  48. data/vendor/isotree/src/subset_models.cpp +174 -0
  49. data/vendor/isotree/src/utils.hpp +3808 -0
  50. data/vendor/isotree/src/xoshiro.hpp +467 -0
  51. data/vendor/isotree/src/ziggurat.hpp +405 -0
  52. metadata +38 -104
  53. data/vendor/cereal/LICENSE +0 -24
  54. data/vendor/cereal/README.md +0 -85
  55. data/vendor/cereal/include/cereal/access.hpp +0 -351
  56. data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
  57. data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
  58. data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
  59. data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
  60. data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
  61. data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
  62. data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
  63. data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
  64. data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
  65. data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
  66. data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
  67. data/vendor/cereal/include/cereal/details/util.hpp +0 -84
  68. data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
  69. data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
  70. data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
  71. data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
  72. data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
  73. data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
  74. data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
  75. data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
  76. data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
  77. data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
  78. data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
  79. data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
  80. data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
  81. data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
  82. data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
  83. data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
  84. data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
  85. data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
  86. data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
  87. data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
  88. data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
  89. data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
  90. data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
  91. data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
  92. data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
  93. data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
  94. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
  95. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
  96. data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
  97. data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
  98. data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
  99. data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
  100. data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
  101. data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
  102. data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
  103. data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
  104. data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
  105. data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
  106. data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
  107. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
  108. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
  109. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
  110. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
  111. data/vendor/cereal/include/cereal/macros.hpp +0 -154
  112. data/vendor/cereal/include/cereal/specialize.hpp +0 -139
  113. data/vendor/cereal/include/cereal/types/array.hpp +0 -79
  114. data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
  115. data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
  116. data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
  117. data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
  118. data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
  119. data/vendor/cereal/include/cereal/types/common.hpp +0 -129
  120. data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
  121. data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
  122. data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
  123. data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
  124. data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
  125. data/vendor/cereal/include/cereal/types/list.hpp +0 -62
  126. data/vendor/cereal/include/cereal/types/map.hpp +0 -36
  127. data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
  128. data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
  129. data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
  130. data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
  131. data/vendor/cereal/include/cereal/types/set.hpp +0 -103
  132. data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
  133. data/vendor/cereal/include/cereal/types/string.hpp +0 -61
  134. data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
  135. data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
  136. data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
  137. data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
  138. data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
  139. data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
  140. data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
  141. data/vendor/cereal/include/cereal/version.hpp +0 -52
  142. data/vendor/isotree/src/Makevars +0 -4
  143. data/vendor/isotree/src/crit.cpp +0 -912
  144. data/vendor/isotree/src/dist.cpp +0 -749
  145. data/vendor/isotree/src/extended.cpp +0 -790
  146. data/vendor/isotree/src/fit_model.cpp +0 -1090
  147. data/vendor/isotree/src/helpers_iforest.cpp +0 -324
  148. data/vendor/isotree/src/isoforest.cpp +0 -771
  149. data/vendor/isotree/src/mult.cpp +0 -607
  150. data/vendor/isotree/src/predict.cpp +0 -853
  151. data/vendor/isotree/src/utils.cpp +0 -1566
@@ -18,11 +18,29 @@
18
18
  * [5] https://sourceforge.net/projects/iforest/
19
19
  * [6] https://math.stackexchange.com/questions/3388518/expected-number-of-paths-required-to-separate-elements-in-a-binary-tree
20
20
  * [7] Quinlan, J. Ross. C4. 5: programs for machine learning. Elsevier, 2014.
21
- * [8] Cortes, David. "Distance approximation using Isolation Forests." arXiv preprint arXiv:1910.12362 (2019).
22
- * [9] Cortes, David. "Imputing missing values with unsupervised random trees." arXiv preprint arXiv:1911.06646 (2019).
21
+ * [8] Cortes, David.
22
+ * "Distance approximation using Isolation Forests."
23
+ * arXiv preprint arXiv:1910.12362 (2019).
24
+ * [9] Cortes, David.
25
+ * "Imputing missing values with unsupervised random trees."
26
+ * arXiv preprint arXiv:1911.06646 (2019).
27
+ * [10] https://math.stackexchange.com/questions/3333220/expected-average-depth-in-random-binary-tree-constructed-top-to-bottom
28
+ * [11] Cortes, David.
29
+ * "Revisiting randomized choices in isolation forests."
30
+ * arXiv preprint arXiv:2110.13402 (2021).
31
+ * [12] Guha, Sudipto, et al.
32
+ * "Robust random cut forest based anomaly detection on streams."
33
+ * International conference on machine learning. PMLR, 2016.
34
+ * [13] Cortes, David.
35
+ * "Isolation forests: looking beyond tree depth."
36
+ * arXiv preprint arXiv:2111.11639 (2021).
37
+ * [14] Ting, Kai Ming, Yue Zhu, and Zhi-Hua Zhou.
38
+ * "Isolation kernel and its effect on SVM"
39
+ * Proceedings of the 24th ACM SIGKDD
40
+ * International Conference on Knowledge Discovery & Data Mining. 2018.
23
41
  *
24
42
  * BSD 2-Clause License
25
- * Copyright (c) 2020, David Cortes
43
+ * Copyright (c) 2019-2022, David Cortes
26
44
  * All rights reserved.
27
45
  * Redistribution and use in source and binary forms, with or without
28
46
  * modification, are permitted provided that the following conditions are met:
@@ -44,219 +62,4362 @@
44
62
  */
45
63
  #include "isotree.hpp"
46
64
 
47
- #ifdef _ENABLE_CEREAL
65
+ /* TODO: add option to serialize as JSON file */
48
66
 
67
+ using std::uint8_t;
68
+ using std::int8_t;
69
+ using std::uint16_t;
70
+ using std::int16_t;
71
+ using std::uint32_t;
72
+ using std::int32_t;
73
+ using std::uint64_t;
74
+ using std::int64_t;
49
75
 
50
- template <class T>
51
- void serialize_obj(T &obj, std::ostream &output)
76
+ /* https://stackoverflow.com/questions/16696297/ftell-at-a-position-past-2gb */
77
+ /* TODO: do CLANG and ICC have similar functionality? */
78
+ #if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) && (SIZE_MAX >= UINT64_MAX)
79
+ # ifdef _MSC_VER
80
+ # include <stdio.h>
81
+ # define fseek_ _fseeki64
82
+ # define ftell_ _ftelli64
83
+ # define fpos_t_ __int64
84
+ # elif defined(__GNUG__) || defined(__GNUC__)
85
+ # ifndef _FILE_OFFSET_BITS
86
+ # define _FILE_OFFSET_BITS 64
87
+ # endif
88
+ # include <stdio.h>
89
+ # define fseek_ fseeko
90
+ # define ftell_ ftello
91
+ # define fpos_t_ off_t
92
+ # else
93
+ using std::feof;
94
+ using std::fwrite;
95
+ using std::fread;
96
+ using std::fopen;
97
+ using std::fclose;
98
+ using std::ftell;
99
+ using std::fseek;
100
+ # define fseek_ fseek
101
+ # define ftell_ ftell
102
+ # define fpos_t_ long /* <- might overflow with large files */
103
+ # endif
104
+ #else
105
+ using std::feof;
106
+ using std::fwrite;
107
+ using std::fread;
108
+ using std::fopen;
109
+ using std::fclose;
110
+ using std::ftell;
111
+ using std::fseek;
112
+ # define fseek_ fseek
113
+ # define ftell_ ftell
114
+ # define fpos_t_ long
115
+ #endif
116
+
117
+ #if defined(DBL_MANT_DIG) && (DBL_MANT_DIG == 53) && (FLT_RADIX == 2)
118
+ #define HAS_IEEE_DOUBLE
119
+ #endif
120
+
121
+ #if INT_MAX == INT16_MAX
122
+ #define HAS_INT16
123
+ #elif INT_MAX == INT32_MAX
124
+ #define HAS_INT32
125
+ #elif INT_MAX == INT64_MAX
126
+ #define HAS_INT64
127
+ #else
128
+ #define HAS_INT_OTHER
129
+ #endif
130
+
131
+ #if SIZE_MAX == UINT32_MAX
132
+ #define HAS_SIZE32
133
+ #elif SIZE_MAX == UINT64_MAX
134
+ #define HAS_SIZE64
135
+ #else
136
+ #define HAS_SIZE_OTHER
137
+ #endif
138
+
139
+ const char *watermark = "isotree_model";
140
+ const char *incomplete_watermark = "incomplete___";
141
+ static const size_t SIZE_WATERMARK = 13;
142
+ enum DoubleTypeStructure {IsNormalDouble=1, IsAbnormalDouble=2};
143
+ enum PlatformSize {Is16Bit=1, Is32Bit=2, Is64Bit=3, IsOther=4};
144
+ enum PlatformEndianness {PlatformLittleEndian=1, PlatformBigEndian=2};
145
+ enum ModelTypes {
146
+ IsoForestModel=1,
147
+ ExtIsoForestModel=2,
148
+ ImputerModel=3,
149
+ IndexerModel=5,
150
+ AllObjectsCombined=4
151
+ };
152
+ enum EndingIndicator {
153
+ EndsHere=0,
154
+ HasSingleVarModelNext=1,
155
+ HasExtModelNext=2,
156
+ HasImputerNext=3,
157
+ HasIndexerNext=11,
158
+ HasSingleVarModelPlusImputerNext=4,
159
+ HasSingleVarModelPlusIndexerNext=12,
160
+ HasSingleVarModelPlusImputerPlusIndexerNext=13,
161
+ HasExtModelPlusImputerNext=5,
162
+ HasExtModelPlusIndexerNext=14,
163
+ HasExtModelPlusImputerPlusIndexerNext=15,
164
+ HasSingleVarModelPlusMetadataNext=6,
165
+ HasSingleVarModelPlusIndexerPlusMetadataNext=16,
166
+ HasExtModelPlusMetadataNext=7,
167
+ HasExtModelPlusIndexerPlusMetadataNext=17,
168
+ HasSingleVarModelPlusImputerPlusMetadataNext=8,
169
+ HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext=18,
170
+ HasExtModelPlusImputerPlusMetadataNext=9,
171
+ HasExtModelPlusImputerPlusIndexerPlusMetadataNext=19,
172
+ HasMoreTreesNext=10
173
+ };
174
+
175
+ #ifdef _MSC_VER
176
+ #include <stdlib.h>
177
+ void swap16b(char *bytes) noexcept
178
+ {
179
+ if (std::numeric_limits<unsigned short>::max() == UINT16_MAX) {
180
+ unsigned short temp;
181
+ memcpy(&temp, bytes, sizeof(unsigned short));
182
+ temp = _byteswap_ushort(temp);
183
+ memcpy(bytes, &temp, sizeof(unsigned short));
184
+ }
185
+
186
+ else {
187
+ std::swap(bytes[0], bytes[1]);
188
+ }
189
+ }
190
+ void swap32b(char *bytes) noexcept
191
+ {
192
+ if (std::numeric_limits<unsigned long>::max() == UINT32_MAX) {
193
+ unsigned long temp;
194
+ memcpy(&temp, bytes, sizeof(unsigned long));
195
+ temp = _byteswap_ulong(temp);
196
+ memcpy(bytes, &temp, sizeof(unsigned long));
197
+ }
198
+
199
+ else {
200
+ std::swap(bytes[0], bytes[3]);
201
+ std::swap(bytes[1], bytes[2]);
202
+ }
203
+ }
204
+ void swap64b(char *bytes) noexcept
205
+ {
206
+ unsigned __int64 temp;
207
+ memcpy(&temp, bytes, sizeof(unsigned __int64));
208
+ temp = _byteswap_uint64(temp);
209
+ memcpy(bytes, &temp, sizeof(unsigned __int64));
210
+ }
211
+ #elif defined(__GNUC__) && (__GNUC__ >= 5) && !defined(_WIN32)
212
+ void swap16b(char *bytes) noexcept
213
+ {
214
+ uint16_t temp;
215
+ memcpy(&temp, bytes, sizeof(uint16_t));
216
+ temp = __builtin_bswap16(temp);
217
+ memcpy(bytes, &temp, sizeof(uint16_t));
218
+ }
219
+ void swap32b(char *bytes) noexcept
220
+ {
221
+ uint32_t temp;
222
+ memcpy(&temp, bytes, sizeof(uint32_t));
223
+ temp = __builtin_bswap32(temp);
224
+ memcpy(bytes, &temp, sizeof(uint32_t));
225
+ }
226
+ void swap64b(char *bytes) noexcept
227
+ {
228
+ uint64_t temp;
229
+ memcpy(&temp, bytes, sizeof(uint64_t));
230
+ temp = __builtin_bswap64(temp);
231
+ memcpy(bytes, &temp, sizeof(uint64_t));
232
+ }
233
+ #else
234
+ void swap16b(char *bytes) noexcept
235
+ {
236
+ std::swap(bytes[0], bytes[1]);
237
+ }
238
+ void swap32b(char *bytes) noexcept
239
+ {
240
+ std::swap(bytes[0], bytes[3]);
241
+ std::swap(bytes[1], bytes[2]);
242
+ }
243
+ void swap64b(char *bytes) noexcept
244
+ {
245
+ std::swap(bytes[0], bytes[7]);
246
+ std::swap(bytes[1], bytes[6]);
247
+ std::swap(bytes[2], bytes[5]);
248
+ std::swap(bytes[3], bytes[4]);
249
+ }
250
+ #endif
251
+ void endian_swap(float &bytes) noexcept
252
+ {
253
+ #ifdef HAS_IEEE_DOUBLE
254
+ swap32b((char*)&bytes);
255
+ #else
256
+ std::reverse((char*)&bytes, (char*)&bytes + sizeof(float));
257
+ #endif
258
+ }
259
+ void endian_swap(double &bytes) noexcept
260
+ {
261
+ #ifdef HAS_IEEE_DOUBLE
262
+ swap64b((char*)&bytes);
263
+ #else
264
+ std::reverse((char*)&bytes, (char*)&bytes + sizeof(double));
265
+ #endif
266
+ }
267
+ void endian_swap(uint8_t &bytes) noexcept
268
+ {
269
+ return;
270
+ }
271
+ void endian_swap(uint16_t &bytes) noexcept
272
+ {
273
+ swap16b((char*)&bytes);
274
+ }
275
+ void endian_swap(uint32_t &bytes) noexcept
276
+ {
277
+ swap32b((char*)&bytes);
278
+ }
279
+ void endian_swap(uint64_t &bytes) noexcept
280
+ {
281
+ swap64b((char*)&bytes);
282
+ }
283
+ void endian_swap(int8_t &bytes) noexcept
284
+ {
285
+ return;
286
+ }
287
+ void endian_swap(int16_t &bytes) noexcept
288
+ {
289
+ swap16b((char*)&bytes);
290
+ }
291
+ void endian_swap(int32_t &bytes) noexcept
292
+ {
293
+ swap32b((char*)&bytes);
294
+ }
295
+ void endian_swap(int64_t &bytes) noexcept
296
+ {
297
+ swap64b((char*)&bytes);
298
+ }
299
+ /* Note: on macOS, some compilers will take 'size_t' as different from 'uin64_t',
300
+ hence it needs a separate one. However, in other compiler and platforms this
301
+ leads to a a duplicated function definition, and thus needs this separation
302
+ in names (otherwise, compilers such as GCC will not compile it). */
303
+ void endian_swap_size_t(char *bytes) noexcept
304
+ {
305
+ #if (SIZE_MAX == UINT32_MAX)
306
+ swap32b(bytes);
307
+ #elif (SIZE_MAX == UINT64_MAX)
308
+ swap64b(bytes);
309
+ #else
310
+ std::reverse(bytes, bytes + sizeof(size_t));
311
+ #endif
312
+ }
313
+ void endian_swap_int(char *bytes) noexcept
52
314
  {
53
- cereal::BinaryOutputArchive archive(output);
54
- archive(obj);
315
+ #if (INT_MAX == INT16_MAX)
316
+ swap16b(bytes);
317
+ #elif (INT_MAX == INT32_MAX)
318
+ swap32b(bytes);
319
+ #elif (SIZE_MAX == INT64_MAX)
320
+ swap64b(bytes);
321
+ #else
322
+ std::reverse(bytes, bytes + sizeof(int));
323
+ #endif
55
324
  }
56
325
  template <class T>
57
- std::string serialize_obj(T &obj)
326
+ void endian_swap(T &bytes) noexcept
327
+ {
328
+ std::reverse((char*)&bytes, (char*)&bytes + sizeof(T));
329
+ }
330
+
331
+ template <class dtype>
332
+ void swap_endianness(dtype *ptr, size_t n_els) noexcept
58
333
  {
59
- std::stringstream ss;
334
+ #ifndef __GNUC__
335
+ if (std::is_same<dtype, size_t>::value)
336
+ {
337
+ for (size_t ix = 0; ix < n_els; ix++)
338
+ endian_swap_size_t((char*)&ptr[ix]);
339
+ return;
340
+ }
341
+
342
+ else if (std::is_same<dtype, int>::value)
60
343
  {
61
- cereal::BinaryOutputArchive archive(ss);
62
- archive(obj);
344
+ for (size_t ix = 0; ix < n_els; ix++)
345
+ endian_swap_int((char*)&ptr[ix]);
346
+ return;
63
347
  }
64
- return ss.str();
348
+ #endif
349
+
350
+ for (size_t ix = 0; ix < n_els; ix++)
351
+ endian_swap(ptr[ix]);
65
352
  }
66
- template <class T, class I>
67
- void deserialize_obj(T &output, I &serialized)
353
+
354
+ const char* set_return_position(const char *in) noexcept
68
355
  {
69
- cereal::BinaryInputArchive archive(serialized);
70
- archive(output);
356
+ return in;
71
357
  }
72
- template <class T>
73
- void deserialize_obj(T &output, std::string &serialized, bool move_str)
358
+
359
+ char* set_return_position(char *in) noexcept
74
360
  {
75
- std::stringstream ss;
76
- if (move_str)
77
- ss.str(std::move(serialized));
78
- else
79
- /* Bug with GCC4 not implementing the move method for stringsreams
80
- https://stackoverflow.com/questions/50926506/deleted-function-std-basic-stringstream-in-linux-with-g
81
- https://github.com/david-cortes/isotree/issues/7 */
82
- // ss = std::stringstream(serialized); /* <- fails with GCC4, CRAN complains */
83
- {
84
- std::string str_copy = serialized;
85
- ss.str(str_copy);
86
- }
87
- deserialize_obj(output, ss);
361
+ return in;
88
362
  }
89
363
 
364
+ fpos_t_ set_return_position(FILE *in)
365
+ {
366
+ return ftell_(in);
367
+ }
90
368
 
91
- /* Serialization and de-serialization functions using Cereal
92
- *
93
- * Parameters
94
- * ==========
95
- * - model (in)
96
- * A model object to serialize, after being fitted through function 'fit_iforest'.
97
- * - imputer (in)
98
- * An imputer object to serialize, after being fitted through function 'fit_iforest'
99
- * with 'build_imputer=true'.
100
- * - output_obj (out)
101
- * An already-allocated object into which a serialized object of the same class will
102
- * be de-serialized. The contents of this object will be overwritten. Should be initialized
103
- * through the default constructor (e.g. 'new ExtIsoForest' or 'ExtIsoForest()').
104
- * - output (out)
105
- * An output stream (any type will do) in which to save/persist/serialize the
106
- * model or imputer object using the cereal library. In the functions that do not
107
- * take this parameter, it will be returned as a string containing the raw bytes.
108
- * - serialized (in)
109
- * The input stream which contains the serialized/saved/persisted model or imputer object,
110
- * which will be de-serialized into 'output'.
111
- * - output_file_path
112
- * File name into which to write the serialized model or imputer object as raw bytes.
113
- * Note that, on Windows, passing non-ASCII characters will fail, and in such case,
114
- * you might instead want to use instead the versions that take 'wchar_t', which are
115
- * only available in the MSVC compiler (it uses 'std::ofstream' internally, which as
116
- * of C++20, is not required by the standard to accept 'wchar_t' in its constructor).
117
- * Be aware that it will only write raw bytes, thus metadata such as CPU endianness
118
- * will be lost. If you need to transfer files berween e.g. an x86 computer and a SPARC
119
- * server, you'll have to use other methods.
120
- * This functionality is intended for being easily wrapper into scripting languages
121
- * without having to copy the contents to to some intermediate language.
122
- * - input_file_path
123
- * File name from which to read a serialized model or imputer object as raw bytes.
124
- * See the description for 'output_file_path' for more details.
125
- * - move_str
126
- * Whether to move ('std::move') the contents of the string passed as input in order
127
- * to speed things up and avoid making a redundant copy of the raw bytes. If passing
128
- * 'true', the input string will be rendered empty afterwards.
129
- */
130
- void serialize_isoforest(IsoForest &model, std::ostream &output)
369
+ #define pos_type_istream decltype(std::declval<std::istream>().tellg())
370
+
371
+ pos_type_istream set_return_position(std::istream &in)
372
+ {
373
+ return in.tellg();
374
+ }
375
+
376
+ pos_type_istream set_return_position(std::ostream &in)
131
377
  {
132
- serialize_obj(model, output);
378
+ return in.tellp();
133
379
  }
134
- void serialize_isoforest(IsoForest &model, const char *output_file_path)
380
+
381
+ void return_to_position(const char *&in, const char *saved_position) noexcept
382
+ {
383
+ in = saved_position;
384
+ }
385
+
386
+ void return_to_position(char *&in, char *saved_position) noexcept
135
387
  {
136
- std::ofstream output(output_file_path);
137
- serialize_obj(model, output);
388
+ in = saved_position;
138
389
  }
139
- std::string serialize_isoforest(IsoForest &model)
390
+
391
+ void return_to_position(FILE *&in, fpos_t_ saved_position)
140
392
  {
141
- return serialize_obj(model);
393
+ fseek_(in, saved_position, SEEK_SET);
142
394
  }
143
- void deserialize_isoforest(IsoForest &output_obj, std::istream &serialized)
395
+
396
+ void return_to_position(std::istream &in, pos_type_istream saved_position)
144
397
  {
145
- deserialize_obj(output_obj, serialized);
398
+ in.seekg(saved_position);
146
399
  }
147
- void deserialize_isoforest(IsoForest &output_obj, const char *input_file_path)
400
+
401
+ void return_to_position(std::ostream &in, pos_type_istream saved_position)
148
402
  {
149
- std::ifstream serialized(input_file_path);
150
- deserialize_obj(output_obj, serialized);
403
+ in.seekp(saved_position);
151
404
  }
152
- void deserialize_isoforest(IsoForest &output_obj, std::string &serialized, bool move_str)
405
+
406
+
407
+ bool has_wchar_t_file_serializers() noexcept
153
408
  {
154
- deserialize_obj(output_obj, serialized, move_str);
409
+ #ifdef WCHAR_T_FUNS
410
+ return true;
411
+ #else
412
+ return false;
413
+ #endif
155
414
  }
156
415
 
416
+ void throw_errno()
417
+ {
418
+ throw std::runtime_error("Error " + std::to_string(errno) + " " + strerror(errno) + "\n");
419
+ }
157
420
 
421
+ void throw_ferror(FILE *file)
422
+ {
423
+ if (!errno) fflush(file);
424
+ throw_errno();
425
+ }
158
426
 
159
- void serialize_ext_isoforest(ExtIsoForest &model, std::ostream &output)
427
+ void throw_feoferr()
160
428
  {
161
- serialize_obj(model, output);
429
+ throw std::runtime_error("Error: file ended unexpectedly.\n");
162
430
  }
163
- void serialize_ext_isoforest(ExtIsoForest &model, const char *output_file_path)
431
+
432
+ template <class dtype, class saved_type>
433
+ void convert_dtype(void *ptr_write_, std::vector<char> &buffer, size_t n_els)
164
434
  {
165
- std::ofstream output(output_file_path);
166
- serialize_obj(model, output);
435
+ dtype *ptr_write = (dtype*)ptr_write_;
436
+ saved_type *ptr_read = (saved_type*)buffer.data();
437
+
438
+ if ((sizeof(dtype) <= sizeof(saved_type)) &&
439
+ (saved_type)std::numeric_limits<dtype>::max() < std::numeric_limits<saved_type>::max())
440
+ {
441
+ const saved_type maxval = (saved_type) std::numeric_limits<dtype>::max();
442
+ for (size_t el = 0; el < n_els; el++)
443
+ if (unlikely(ptr_read[el] > maxval))
444
+ throw std::runtime_error("Error: serialized model has values too large for the current machine's types.\n");
445
+ }
446
+
447
+ for (size_t el = 0; el < n_els; el++)
448
+ ptr_write[el] = (dtype)ptr_read[el];
167
449
  }
168
- std::string serialize_ext_isoforest(ExtIsoForest &model)
450
+
451
+ template <class dtype>
452
+ void write_bytes(const void *ptr, const size_t n_els, char *&out) noexcept
169
453
  {
170
- return serialize_obj(model);
454
+ if (n_els == 0) return;
455
+ memcpy(out, ptr, n_els * sizeof(dtype));
456
+ out += n_els * sizeof(dtype);
171
457
  }
172
- void deserialize_ext_isoforest(ExtIsoForest &output_obj, std::istream &serialized)
458
+
459
+ template <class dtype>
460
+ void write_bytes(const void *ptr, const size_t n_els, std::ostream &out)
173
461
  {
174
- deserialize_obj(output_obj, serialized);
462
+ if (n_els == 0) return;
463
+ out.write((char*)ptr, n_els * sizeof(dtype));
464
+ if (unlikely(out.bad())) throw_errno();
175
465
  }
176
- void deserialize_ext_isoforest(ExtIsoForest &output_obj, const char *input_file_path)
466
+
467
+ template <class dtype>
468
+ void write_bytes(const void *ptr, const size_t n_els, FILE *&out)
177
469
  {
178
- std::ifstream serialized(input_file_path);
179
- deserialize_obj(output_obj, serialized);
470
+ if (n_els == 0) return;
471
+ size_t n_written = fwrite(ptr, sizeof(dtype), n_els, out);
472
+ if (n_written != n_els || ferror(out)) throw_ferror(out);
180
473
  }
181
- void deserialize_ext_isoforest(ExtIsoForest &output_obj, std::string &serialized, bool move_str)
474
+
475
+ template <class dtype>
476
+ void read_bytes(void *ptr, const size_t n_els, const char *&in) noexcept
182
477
  {
183
- deserialize_obj(output_obj, serialized, move_str);
478
+ if (n_els == 0) return;
479
+ memcpy(ptr, in, n_els * sizeof(dtype));
480
+ in += n_els * sizeof(dtype);
184
481
  }
185
482
 
483
+ template <class dtype, class saved_type>
484
+ void read_bytes(void *ptr, const size_t n_els, const char *&in, std::vector<char> &buffer, const bool diff_endian)
485
+ {
486
+ if (std::is_same<dtype, saved_type>::value)
487
+ {
488
+ read_bytes<dtype>(ptr, n_els, in);
489
+ if (unlikely(diff_endian)) swap_endianness((dtype*)ptr, n_els);
490
+ return;
491
+ }
492
+ if (n_els == 0) return;
493
+ if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
494
+ buffer.resize((size_t)2 * n_els * sizeof(saved_type));
495
+ memcpy(buffer.data(), in, n_els * sizeof(saved_type));
496
+ in += n_els * sizeof(saved_type);
186
497
 
498
+ if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
499
+ convert_dtype<dtype, saved_type>(ptr, buffer, n_els);
500
+ }
187
501
 
502
+ template <class dtype>
503
+ void read_bytes(void *ptr, const size_t n_els, char *&in) noexcept
504
+ {
505
+ if (n_els == 0) return;
506
+ memcpy(ptr, in, n_els * sizeof(dtype));
507
+ in += n_els * sizeof(dtype);
508
+ }
188
509
 
189
- void serialize_imputer(Imputer &imputer, std::ostream &output)
510
+ template <class dtype, class saved_type>
511
+ void read_bytes(void *ptr, const size_t n_els, char *&in, std::vector<char> &buffer, const bool diff_endian)
190
512
  {
191
- serialize_obj(imputer, output);
513
+ if (std::is_same<dtype, saved_type>::value)
514
+ {
515
+ read_bytes<dtype>(ptr, n_els, in);
516
+ if (unlikely(diff_endian)) swap_endianness((dtype*)ptr, n_els);
517
+ return;
518
+ }
519
+ if (n_els == 0) return;
520
+ if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
521
+ buffer.resize((size_t)2 * n_els * sizeof(saved_type));
522
+ memcpy(buffer.data(), in, n_els * sizeof(saved_type));
523
+ in += n_els * sizeof(saved_type);
524
+
525
+ if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
526
+ convert_dtype<dtype, saved_type>(ptr, buffer, n_els);
192
527
  }
193
- void serialize_imputer(Imputer &imputer, const char *output_file_path)
528
+
529
+ template <class dtype>
530
+ void read_bytes(void *ptr, const size_t n_els, std::istream &in)
194
531
  {
195
- std::ofstream output(output_file_path);
196
- serialize_obj(imputer, output);
532
+ if (n_els == 0) return;
533
+ in.read((char*)ptr, n_els * sizeof(dtype));
534
+ if (unlikely(in.bad())) throw_errno();
197
535
  }
198
- std::string serialize_imputer(Imputer &imputer)
536
+
537
+ template <class dtype, class saved_type>
538
+ void read_bytes(void *ptr, const size_t n_els, std::istream &in, std::vector<char> &buffer, const bool diff_endian)
199
539
  {
200
- return serialize_obj(imputer);
540
+ if (std::is_same<dtype, saved_type>::value)
541
+ {
542
+ read_bytes<dtype>(ptr, n_els, in);
543
+ if (unlikely(diff_endian)) swap_endianness((dtype*)ptr, n_els);
544
+ return;
545
+ }
546
+ if (n_els == 0) return;
547
+ if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
548
+ buffer.resize((size_t)2 * n_els * sizeof(saved_type));
549
+ in.read((char*)buffer.data(), n_els * sizeof(saved_type));
550
+ if (unlikely(in.bad())) throw_errno();
551
+
552
+ if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
553
+ convert_dtype<dtype, saved_type>(ptr, buffer, n_els);
201
554
  }
202
- void deserialize_imputer(Imputer &output_obj, std::istream &serialized)
555
+
556
+ template <class dtype>
557
+ void read_bytes(void *ptr, const size_t n_els, FILE *&in)
203
558
  {
204
- deserialize_obj(output_obj, serialized);
559
+ if (n_els == 0) return;
560
+ if (unlikely(feof(in))) throw_feoferr();
561
+ size_t n_read = fread(ptr, sizeof(dtype), n_els, in);
562
+ if (unlikely(n_read != n_els || ferror(in))) throw_ferror(in);
205
563
  }
206
- void deserialize_imputer(Imputer &output_obj, const char *input_file_path)
564
+
565
+ template <class dtype, class saved_type>
566
+ void read_bytes(void *ptr, const size_t n_els, FILE *&in, std::vector<char> &buffer, const bool diff_endian)
207
567
  {
208
- std::ifstream serialized(input_file_path);
209
- deserialize_obj(output_obj, serialized);
568
+ if (std::is_same<dtype, saved_type>::value)
569
+ {
570
+ read_bytes<dtype>(ptr, n_els, in);
571
+ if (unlikely(diff_endian)) swap_endianness((dtype*)ptr, n_els);
572
+ return;
573
+ }
574
+ if (n_els == 0) return;
575
+ if (unlikely(feof(in))) throw_feoferr();
576
+ if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
577
+ buffer.resize((size_t)2 * n_els * sizeof(saved_type));
578
+ size_t n_read = fread(buffer.data(), sizeof(saved_type), n_els, in);
579
+ if (unlikely(n_read != n_els || ferror(in))) throw_ferror(in);
580
+
581
+ if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
582
+ convert_dtype<dtype, saved_type>(ptr, buffer, n_els);
210
583
  }
211
- void deserialize_imputer(Imputer &output_obj, std::string &serialized, bool move_str)
584
+
585
+ template <class dtype>
586
+ void read_bytes(std::vector<dtype> &vec, const size_t n_els, const char *&in)
212
587
  {
213
- deserialize_obj(output_obj, serialized, move_str);
588
+ if (n_els)
589
+ vec.assign((dtype*)in, (dtype*)in + n_els);
590
+ else
591
+ vec.clear();
592
+ vec.shrink_to_fit();
593
+ in += n_els * sizeof(dtype);
214
594
  }
215
595
 
596
+ template <class dtype, class saved_type>
597
+ void read_bytes(std::vector<dtype> &vec, const size_t n_els, const char *&in, std::vector<char> &buffer, const bool diff_endian)
598
+ {
599
+ if (std::is_same<dtype, saved_type>::value)
600
+ {
601
+ read_bytes<dtype>(vec, n_els, in);
602
+ if (unlikely(diff_endian)) swap_endianness(vec.data(), n_els);
603
+ return;
604
+ }
605
+ if (n_els) {
606
+ if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
607
+ buffer.resize((size_t)2 * n_els * sizeof(saved_type));
608
+ read_bytes<saved_type>(buffer.data(), n_els, in);
609
+ vec.resize(n_els);
610
+ vec.shrink_to_fit();
611
+
612
+ if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
613
+ convert_dtype<dtype, saved_type>(vec.data(), buffer, n_els);
614
+ }
615
+
616
+ else {
617
+ vec.clear();
618
+ vec.shrink_to_fit();
619
+ }
216
620
 
217
- #ifdef _MSC_VER
218
- void serialize_isoforest(IsoForest &model, const wchar_t *output_file_path)
621
+ in += n_els * sizeof(saved_type);
622
+ }
623
+
624
+ template <class dtype>
625
+ void read_bytes(std::vector<dtype> &vec, const size_t n_els, std::istream &in)
219
626
  {
220
- std::ofstream output(output_file_path);
221
- serialize_obj(model, output);
627
+ vec.resize(n_els);
628
+ vec.shrink_to_fit();
629
+
630
+ if (n_els) {
631
+ in.read((char*)vec.data(), n_els * sizeof(dtype));
632
+ if (unlikely(in.bad())) throw_errno();
633
+ }
222
634
  }
223
- void deserialize_isoforest(IsoForest &output_obj, const wchar_t *input_file_path)
635
+
636
+ template <class dtype, class saved_type>
637
+ void read_bytes(std::vector<dtype> &vec, const size_t n_els, std::istream &in, std::vector<char> &buffer, const bool diff_endian)
224
638
  {
225
- std::ifstream serialized(input_file_path);
226
- deserialize_obj(output_obj, serialized);
639
+ if (std::is_same<dtype, saved_type>::value)
640
+ {
641
+ read_bytes<dtype>(vec, n_els, in);
642
+ if (unlikely(diff_endian)) swap_endianness(vec.data(), n_els);
643
+ return;
644
+ }
645
+ vec.resize(n_els);
646
+ vec.shrink_to_fit();
647
+
648
+ if (n_els) {
649
+ if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
650
+ buffer.resize((size_t)2 * n_els * sizeof(saved_type));
651
+ in.read(buffer.data(), n_els * sizeof(saved_type));
652
+ if (unlikely(in.bad())) throw_errno();
653
+
654
+ if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
655
+ convert_dtype<dtype, saved_type>(vec.data(), buffer, n_els);
656
+ }
227
657
  }
228
- void serialize_ext_isoforest(ExtIsoForest &model, const wchar_t *output_file_path)
658
+
659
+ template <class dtype>
660
+ void read_bytes(std::vector<dtype> &vec, const size_t n_els, FILE *&in)
229
661
  {
230
- std::ofstream output(output_file_path);
231
- serialize_obj(model, output);
662
+ vec.resize(n_els);
663
+ vec.shrink_to_fit();
664
+
665
+ if (n_els) {
666
+ if (unlikely(feof(in))) throw_feoferr();
667
+ size_t n_read = fread(vec.data(), sizeof(dtype), n_els, in);
668
+ if (unlikely(n_read != n_els || ferror(in))) throw_ferror(in);
669
+ }
232
670
  }
233
- void deserialize_ext_isoforest(ExtIsoForest &output_obj, const wchar_t *input_file_path)
671
+
672
+ template <class dtype, class saved_type>
673
+ void read_bytes(std::vector<dtype> &vec, const size_t n_els, FILE *&in, std::vector<char> &buffer, const bool diff_endian)
234
674
  {
235
- std::ifstream serialized(input_file_path);
236
- deserialize_obj(output_obj, serialized);
675
+ if (std::is_same<dtype, saved_type>::value)
676
+ {
677
+ read_bytes<dtype>(vec, n_els, in);
678
+ if (unlikely(diff_endian)) swap_endianness(vec.data(), n_els);
679
+ return;
680
+ }
681
+ vec.resize(n_els);
682
+ vec.shrink_to_fit();
683
+
684
+ if (n_els) {
685
+ if (unlikely(feof(in))) throw_feoferr();
686
+ if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
687
+ buffer.resize((size_t)2 * n_els * sizeof(saved_type));
688
+
689
+ size_t n_read = fread(buffer.data(), sizeof(saved_type), n_els, in);
690
+ if (unlikely(n_read != n_els || ferror(in))) throw_ferror(in);
691
+
692
+ if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
693
+ convert_dtype<dtype, saved_type>(vec.data(), buffer, n_els);
694
+ }
237
695
  }
238
- void serialize_imputer(Imputer &imputer, const wchar_t *output_file_path)
696
+
697
+ size_t get_size_node(const IsoTree &node) noexcept
239
698
  {
240
- std::ofstream output(output_file_path);
241
- serialize_obj(imputer, output);
699
+ size_t n_bytes = 0;
700
+ n_bytes += sizeof(uint8_t);
701
+ n_bytes += sizeof(int);
702
+ n_bytes += sizeof(double) * 6;
703
+ n_bytes += sizeof(size_t) * 4;
704
+ n_bytes += sizeof(signed char) * node.cat_split.size();
705
+ return n_bytes;
242
706
  }
243
- void deserialize_imputer(Imputer &output_obj, const wchar_t *input_file_path)
707
+
708
+ template <class otype>
709
+ void serialize_node(const IsoTree &node, otype &out)
244
710
  {
245
- std::ifstream serialized(input_file_path);
246
- deserialize_obj(output_obj, serialized);
711
+ if (interrupt_switch) return;
712
+
713
+ uint8_t data_en = (uint8_t)node.col_type;
714
+ write_bytes<uint8_t>((void*)&data_en, (size_t)1, out);
715
+
716
+ write_bytes<int>((void*)&node.chosen_cat, (size_t)1, out);
717
+
718
+ double data_doubles[] = {
719
+ node.num_split,
720
+ node.pct_tree_left,
721
+ node.score,
722
+ node.range_low,
723
+ node.range_high,
724
+ node.remainder
725
+ };
726
+ write_bytes<double>((void*)data_doubles, (size_t)6, out);
727
+
728
+ size_t data_sizets[] = {
729
+ node.col_num,
730
+ node.tree_left,
731
+ node.tree_right,
732
+ node.cat_split.size()
733
+ };
734
+ write_bytes<size_t>((void*)data_sizets, (size_t)4, out);
735
+
736
+ if (node.cat_split.size())
737
+ write_bytes<signed char>((void*)node.cat_split.data(), node.cat_split.size(), out);
247
738
  }
248
- bool has_msvc()
739
+
740
+ template <class itype>
741
+ void deserialize_node(IsoTree &node, itype &in)
249
742
  {
250
- return true;
743
+ if (interrupt_switch) return;
744
+
745
+ uint8_t data_en;
746
+ read_bytes<uint8_t>((void*)&data_en, (size_t)1, in);
747
+ node.col_type = (ColType)data_en;
748
+
749
+ read_bytes<int>((void*)&node.chosen_cat, (size_t)1, in);
750
+
751
+ double data_doubles[6];
752
+ read_bytes<double>((void*)data_doubles, (size_t)6, in);
753
+ node.num_split = data_doubles[0];
754
+ node.pct_tree_left = data_doubles[1];
755
+ node.score = data_doubles[2];
756
+ node.range_low = data_doubles[3];
757
+ node.range_high = data_doubles[4];
758
+ node.remainder = data_doubles[5];
759
+
760
+ size_t data_sizets[4];
761
+ read_bytes<size_t>((void*)data_sizets, (size_t)4, in);
762
+ node.col_num = data_sizets[0];
763
+ node.tree_left = data_sizets[1];
764
+ node.tree_right = data_sizets[2];
765
+ read_bytes<signed char>(node.cat_split, data_sizets[3], in);
251
766
  }
252
767
 
253
- #else
254
- bool has_msvc()
768
+ template <class itype, class saved_int_t, class saved_size_t>
769
+ void deserialize_node(IsoTree &node, itype &in, std::vector<char> &buffer, const bool diff_endian)
255
770
  {
256
- return false;
771
+ if (interrupt_switch) return;
772
+
773
+ uint8_t data_en;
774
+ read_bytes<uint8_t>((void*)&data_en, (size_t)1, in);
775
+ node.col_type = (ColType)data_en;
776
+
777
+ read_bytes<int, saved_int_t>((void*)&node.chosen_cat, (size_t)1, in, buffer, diff_endian);
778
+
779
+ double data_doubles[6];
780
+ read_bytes<double, double>((void*)data_doubles, (size_t)6, in, buffer, diff_endian);
781
+ node.num_split = data_doubles[0];
782
+ node.pct_tree_left = data_doubles[1];
783
+ node.score = data_doubles[2];
784
+ node.range_low = data_doubles[3];
785
+ node.range_high = data_doubles[4];
786
+ node.remainder = data_doubles[5];
787
+
788
+ size_t data_sizets[4];
789
+ read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)4, in, buffer, diff_endian);
790
+ node.col_num = data_sizets[0];
791
+ node.tree_left = data_sizets[1];
792
+ node.tree_right = data_sizets[2];
793
+ read_bytes<signed char, signed char>(node.cat_split, data_sizets[3], in, buffer, diff_endian);
794
+ }
795
+
796
+ size_t get_size_node(const IsoHPlane &node) noexcept
797
+ {
798
+ size_t n_bytes = 0;
799
+ n_bytes += sizeof(double) * 5;
800
+ n_bytes += sizeof(size_t) * 10;
801
+ n_bytes += sizeof(size_t) * node.col_num.size();
802
+ if (node.col_type.size()) {
803
+ n_bytes += sizeof(uint8_t)*node.col_type.size();
804
+ }
805
+ n_bytes += sizeof(double)*node.coef.size();
806
+ n_bytes += sizeof(double)*node.mean.size();
807
+ if (node.cat_coef.size()) {
808
+ for (const auto &vec : node.cat_coef) {
809
+ n_bytes += sizeof(size_t);
810
+ n_bytes += sizeof(double) * vec.size();
811
+ }
812
+ }
813
+ n_bytes += sizeof(int)*node.chosen_cat.size();
814
+ n_bytes += sizeof(double)*node.fill_val.size();
815
+ n_bytes += sizeof(double)*node.fill_new.size();
816
+ return n_bytes;
817
+ }
818
+
819
+ template <class otype>
820
+ void serialize_node(const IsoHPlane &node, otype &out, std::vector<uint8_t> &buffer)
821
+ {
822
+ if (interrupt_switch) return;
823
+
824
+ double data_doubles[] = {
825
+ node.split_point,
826
+ node.score,
827
+ node.range_low,
828
+ node.range_high,
829
+ node.remainder
830
+ };
831
+ write_bytes<double>((void*)data_doubles, (size_t)5, out);
832
+
833
+ size_t data_sizets[] = {
834
+ node.hplane_left,
835
+ node.hplane_right,
836
+ node.col_num.size(),
837
+ node.col_type.size(),
838
+ node.coef.size(),
839
+ node.mean.size(),
840
+ node.cat_coef.size(),
841
+ node.chosen_cat.size(),
842
+ node.fill_val.size(),
843
+ node.fill_new.size()
844
+ };
845
+ write_bytes<size_t>((void*)data_sizets, (size_t)10, out);
846
+
847
+ write_bytes<size_t>((void*)node.col_num.data(), node.col_num.size(), out);
848
+
849
+ if (node.col_type.size()) {
850
+ if (buffer.size() < node.col_type.size())
851
+ buffer.resize((size_t)2 * node.col_type.size());
852
+ for (size_t ix = 0; ix < node.col_type.size(); ix++)
853
+ buffer[ix] = (uint8_t)node.col_type[ix];
854
+ write_bytes<uint8_t>((void*)buffer.data(), node.col_type.size(), out);
855
+ }
856
+
857
+ write_bytes<double>((void*)node.coef.data(), node.coef.size(), out);
858
+
859
+ write_bytes<double>((void*)node.mean.data(), node.mean.size(), out);
860
+
861
+ if (node.cat_coef.size()) {
862
+ size_t veclen;
863
+ for (const auto &vec : node.cat_coef) {
864
+ veclen = vec.size();
865
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
866
+ write_bytes<double>((void*)vec.data(), vec.size(), out);
867
+ }
868
+ }
869
+
870
+ write_bytes<int>((void*)node.chosen_cat.data(), node.chosen_cat.size(), out);
871
+
872
+ write_bytes<double>((void*)node.fill_val.data(), node.fill_val.size(), out);
873
+
874
+ write_bytes<double>((void*)node.fill_new.data(), node.fill_new.size(), out);
875
+ }
876
+
877
+ template <class itype>
878
+ void deserialize_node(IsoHPlane &node, itype &in, std::vector<uint8_t> &buffer)
879
+ {
880
+ if (interrupt_switch) return;
881
+
882
+ double data_doubles[5];
883
+ read_bytes<double>((void*)data_doubles, (size_t)5, in);
884
+ node.split_point = data_doubles[0];
885
+ node.score = data_doubles[1];
886
+ node.range_low = data_doubles[2];
887
+ node.range_high = data_doubles[3];
888
+ node.remainder = data_doubles[4];
889
+
890
+ size_t data_sizets[10];
891
+ read_bytes<size_t>((void*)data_sizets, (size_t)10, in);
892
+
893
+ node.hplane_left = data_sizets[0];
894
+ node.hplane_right = data_sizets[1];
895
+
896
+ read_bytes<size_t>(node.col_num, data_sizets[2], in);
897
+
898
+ if (data_sizets[3]) {
899
+ node.col_type.resize(data_sizets[3]);
900
+ node.col_type.shrink_to_fit();
901
+ if (buffer.size() < data_sizets[3])
902
+ buffer.resize((size_t)2 * data_sizets[3]);
903
+ read_bytes<uint8_t>((void*)buffer.data(), data_sizets[3], in);
904
+ for (size_t ix = 0; ix < data_sizets[3]; ix++)
905
+ node.col_type[ix] = (ColType)buffer[ix];
906
+ }
907
+
908
+ read_bytes<double>(node.coef, data_sizets[4], in);
909
+
910
+ read_bytes<double>(node.mean, data_sizets[5], in);
911
+
912
+ if (data_sizets[6]) {
913
+ node.cat_coef.resize(data_sizets[6]);
914
+ node.cat_coef.shrink_to_fit();
915
+ size_t veclen;
916
+ for (auto &vec : node.cat_coef) {
917
+ read_bytes<size_t>((void*)&veclen, (size_t)1, in);
918
+ read_bytes<double>(vec, veclen, in);
919
+ }
920
+ }
921
+
922
+ read_bytes<int>(node.chosen_cat, data_sizets[7], in);
923
+
924
+ read_bytes<double>(node.fill_val, data_sizets[8], in);
925
+
926
+ read_bytes<double>(node.fill_new, data_sizets[9], in);
927
+ }
928
+
929
+ template <class itype, class saved_int_t, class saved_size_t>
930
+ void deserialize_node(IsoHPlane &node, itype &in, std::vector<uint8_t> &buffer, std::vector<char> &buffer2, const bool diff_endian)
931
+ {
932
+ if (interrupt_switch) return;
933
+
934
+ double data_doubles[5];
935
+ read_bytes<double, double>((void*)data_doubles, (size_t)5, in, buffer2, diff_endian);
936
+ node.split_point = data_doubles[0];
937
+ node.score = data_doubles[1];
938
+ node.range_low = data_doubles[2];
939
+ node.range_high = data_doubles[3];
940
+ node.remainder = data_doubles[4];
941
+
942
+ size_t data_sizets[10];
943
+ read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)10, in, buffer2, diff_endian);
944
+
945
+ node.hplane_left = data_sizets[0];
946
+ node.hplane_right = data_sizets[1];
947
+
948
+ read_bytes<size_t, saved_size_t>(node.col_num, data_sizets[2], in, buffer2, diff_endian);
949
+
950
+ if (data_sizets[3]) {
951
+ node.col_type.resize(data_sizets[3]);
952
+ node.col_type.shrink_to_fit();
953
+ if (buffer.size() < data_sizets[3])
954
+ buffer.resize((size_t)2 * data_sizets[3]);
955
+ read_bytes<uint8_t>((void*)buffer.data(), data_sizets[3], in);
956
+ for (size_t ix = 0; ix < data_sizets[3]; ix++)
957
+ node.col_type[ix] = (ColType)buffer[ix];
958
+ }
959
+
960
+ read_bytes<double, double>(node.coef, data_sizets[4], in, buffer2, diff_endian);
961
+
962
+ read_bytes<double, double>(node.mean, data_sizets[5], in, buffer2, diff_endian);
963
+
964
+ if (data_sizets[6]) {
965
+ node.cat_coef.resize(data_sizets[6]);
966
+ node.cat_coef.shrink_to_fit();
967
+ size_t veclen;
968
+ for (auto &vec : node.cat_coef) {
969
+ read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer2, diff_endian);
970
+ read_bytes<double, double>(vec, veclen, in, buffer2, diff_endian);
971
+ }
972
+ }
973
+
974
+ read_bytes<int, saved_int_t>(node.chosen_cat, data_sizets[7], in, buffer2, diff_endian);
975
+
976
+ read_bytes<double, double>(node.fill_val, data_sizets[8], in, buffer2, diff_endian);
977
+
978
+ read_bytes<double, double>(node.fill_new, data_sizets[9], in, buffer2, diff_endian);
979
+ }
980
+
981
+ size_t get_size_node(const ImputeNode &node) noexcept
982
+ {
983
+ size_t n_bytes = 0;
984
+ n_bytes += sizeof(size_t) * 5;
985
+ n_bytes += sizeof(double) * node.num_sum.size();
986
+ n_bytes += sizeof(double) * node.num_weight.size();
987
+ if (node.cat_sum.size()) {
988
+ for (const auto &v : node.cat_sum) {
989
+ n_bytes += sizeof(size_t);
990
+ n_bytes += sizeof(double) * v.size();
991
+ }
992
+ }
993
+ n_bytes += sizeof(double) * node.cat_weight.size();
994
+ return n_bytes;
257
995
  }
258
996
 
259
- #endif /* ifdef _MSC_VER */
997
+ template <class otype>
998
+ void serialize_node(const ImputeNode &node, otype &out)
999
+ {
1000
+ if (interrupt_switch) return;
1001
+
1002
+ size_t data_sizets[] = {
1003
+ node.parent,
1004
+ node.num_sum.size(),
1005
+ node.num_weight.size(),
1006
+ node.cat_sum.size(),
1007
+ node.cat_weight.size(),
1008
+ };
1009
+ write_bytes<size_t>((void*)data_sizets, (size_t)5, out);
1010
+
1011
+ write_bytes<double>((void*)node.num_sum.data(), node.num_sum.size(), out);
1012
+
1013
+ write_bytes<double>((void*)node.num_weight.data(), node.num_weight.size(), out);
1014
+
1015
+ if (node.cat_sum.size()) {
1016
+ size_t veclen;
1017
+ for (const auto &v : node.cat_sum) {
1018
+ veclen = v.size();
1019
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
1020
+ write_bytes<double>((void*)v.data(), veclen, out);
1021
+ }
1022
+ }
1023
+
1024
+ write_bytes<double>((void*)node.cat_weight.data(), node.cat_weight.size(), out);
1025
+ }
1026
+
1027
+ template <class itype>
1028
+ void deserialize_node(ImputeNode &node, itype &in)
1029
+ {
1030
+ if (interrupt_switch) return;
1031
+
1032
+ size_t data_sizets[5];
1033
+ read_bytes<size_t>((void*)data_sizets, (size_t)5, in);
1034
+ node.parent = data_sizets[0];
1035
+
1036
+ read_bytes<double>(node.num_sum, data_sizets[1], in);
1037
+
1038
+ read_bytes<double>(node.num_weight, data_sizets[2], in);
1039
+
1040
+ node.cat_sum.resize(data_sizets[3]);
1041
+ if (data_sizets[3]) {
1042
+ size_t veclen;
1043
+ for (auto &v : node.cat_sum) {
1044
+ read_bytes<size_t>((void*)&veclen, (size_t)1, in);
1045
+ read_bytes<double>(v, veclen, in);
1046
+ }
1047
+ }
1048
+ node.cat_sum.shrink_to_fit();
1049
+
1050
+ read_bytes<double>(node.cat_weight, data_sizets[4], in);
1051
+ }
1052
+
1053
+ template <class itype, class saved_int_t, class saved_size_t>
1054
+ void deserialize_node(ImputeNode &node, itype &in, std::vector<char> &buffer, const bool diff_endian)
1055
+ {
1056
+ if (interrupt_switch) return;
1057
+
1058
+ size_t data_sizets[5];
1059
+ read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)5, in, buffer, diff_endian);
1060
+ node.parent = data_sizets[0];
1061
+
1062
+ read_bytes<double, double>(node.num_sum, data_sizets[1], in, buffer, diff_endian);
1063
+
1064
+ read_bytes<double, double>(node.num_weight, data_sizets[2], in, buffer, diff_endian);
260
1065
 
1066
+ node.cat_sum.resize(data_sizets[3]);
1067
+ if (data_sizets[3]) {
1068
+ size_t veclen;
1069
+ for (auto &v : node.cat_sum) {
1070
+ read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer, diff_endian);
1071
+ read_bytes<double, double>(v, veclen, in, buffer, diff_endian);
1072
+ }
1073
+ }
1074
+ node.cat_sum.shrink_to_fit();
1075
+
1076
+ read_bytes<double, double>(node.cat_weight, data_sizets[4], in, buffer, diff_endian);
1077
+ }
261
1078
 
262
- #endif /* _ENABLE_CEREAL */
1079
+ size_t get_size_node(const SingleTreeIndex &node) noexcept
1080
+ {
1081
+ size_t n_bytes = 0;
1082
+ n_bytes += sizeof(size_t);
1083
+ n_bytes += node.terminal_node_mappings.size() * sizeof(size_t);
1084
+ n_bytes += sizeof(size_t);
1085
+ n_bytes += node.node_distances.size() * sizeof(double);
1086
+ n_bytes += sizeof(size_t);
1087
+ n_bytes += node.node_depths.size() * sizeof(double);
1088
+ n_bytes += sizeof(size_t);
1089
+ n_bytes += node.reference_points.size() * sizeof(size_t);
1090
+ n_bytes += sizeof(size_t);
1091
+ n_bytes += node.reference_indptr.size() * sizeof(size_t);
1092
+ n_bytes += sizeof(size_t);
1093
+ n_bytes += node.reference_mapping.size() * sizeof(size_t);
1094
+ n_bytes += sizeof(size_t);
1095
+ return n_bytes;
1096
+ }
1097
+
1098
+ template <class otype>
1099
+ void serialize_node(const SingleTreeIndex &node, otype &out)
1100
+ {
1101
+ if (interrupt_switch) return;
1102
+
1103
+ size_t vec_size = node.terminal_node_mappings.size();
1104
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1105
+ if (vec_size)
1106
+ write_bytes<size_t>((void*)node.terminal_node_mappings.data(), vec_size, out);
1107
+
1108
+ vec_size = node.node_distances.size();
1109
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1110
+ if (vec_size)
1111
+ write_bytes<double>((void*)node.node_distances.data(), vec_size, out);
1112
+
1113
+ vec_size = node.node_depths.size();
1114
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1115
+ if (vec_size)
1116
+ write_bytes<double>((void*)node.node_depths.data(), vec_size, out);
1117
+
1118
+ vec_size = node.reference_points.size();
1119
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1120
+ if (vec_size)
1121
+ write_bytes<size_t>((void*)node.reference_points.data(), vec_size, out);
1122
+
1123
+ vec_size = node.reference_indptr.size();
1124
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1125
+ if (vec_size)
1126
+ write_bytes<size_t>((void*)node.reference_indptr.data(), vec_size, out);
1127
+
1128
+ vec_size = node.reference_mapping.size();
1129
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1130
+ if (vec_size)
1131
+ write_bytes<size_t>((void*)node.reference_mapping.data(), vec_size, out);
1132
+
1133
+ vec_size = node.n_terminal;
1134
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1135
+ }
1136
+
1137
+ template <class itype>
1138
+ void deserialize_node(SingleTreeIndex &node, itype &in)
1139
+ {
1140
+ if (interrupt_switch) return;
1141
+
1142
+ size_t vec_size;
1143
+ read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
1144
+ read_bytes<size_t>(node.terminal_node_mappings, vec_size, in);
1145
+
1146
+ read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
1147
+ read_bytes<double>(node.node_distances, vec_size, in);
1148
+
1149
+ read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
1150
+ read_bytes<double>(node.node_depths, vec_size, in);
1151
+
1152
+ read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
1153
+ read_bytes<size_t>(node.reference_points, vec_size, in);
1154
+
1155
+ read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
1156
+ read_bytes<size_t>(node.reference_indptr, vec_size, in);
1157
+
1158
+ read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
1159
+ read_bytes<size_t>(node.reference_mapping, vec_size, in);
1160
+
1161
+ read_bytes<size_t>((void*)&node.n_terminal, (size_t)1, in);
1162
+ }
1163
+
1164
+ template <class itype, class saved_int_t, class saved_size_t>
1165
+ void deserialize_node(SingleTreeIndex &node, itype &in, std::vector<char> &buffer, const bool diff_endian)
1166
+ {
1167
+ if (interrupt_switch) return;
1168
+
1169
+ size_t vec_size;
1170
+ read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
1171
+ read_bytes<size_t, saved_size_t>(node.terminal_node_mappings, vec_size, in, buffer, diff_endian);
1172
+
1173
+ read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
1174
+ read_bytes<double, double>(node.node_distances, vec_size, in, buffer, diff_endian);
1175
+
1176
+ read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
1177
+ read_bytes<double, double>(node.node_depths, vec_size, in, buffer, diff_endian);
1178
+
1179
+ read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
1180
+ read_bytes<size_t, saved_size_t>(node.reference_points, vec_size, in, buffer, diff_endian);
1181
+
1182
+ read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
1183
+ read_bytes<size_t, saved_size_t>(node.reference_indptr, vec_size, in, buffer, diff_endian);
1184
+
1185
+ read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
1186
+ read_bytes<size_t, saved_size_t>(node.reference_mapping, vec_size, in, buffer, diff_endian);
1187
+
1188
+ read_bytes<size_t, saved_size_t>((void*)&node.n_terminal, (size_t)1, in, buffer, diff_endian);
1189
+ }
1190
+
1191
+ size_t get_size_model(const IsoForest &model) noexcept
1192
+ {
1193
+ size_t n_bytes = 0;
1194
+ n_bytes += sizeof(uint8_t) * 5;
1195
+ n_bytes += sizeof(double) * 2;
1196
+ n_bytes += sizeof(size_t) * 2;
1197
+ for (const auto &tree : model.trees) {
1198
+ n_bytes += sizeof(size_t);
1199
+ for (const auto &node : tree)
1200
+ n_bytes += get_size_node(node);
1201
+ }
1202
+ return n_bytes;
1203
+ }
1204
+
1205
+ template <class otype>
1206
+ void serialize_model(const IsoForest &model, otype &out)
1207
+ {
1208
+ if (interrupt_switch) return;
1209
+
1210
+ uint8_t data_en[] = {
1211
+ (uint8_t)model.new_cat_action,
1212
+ (uint8_t)model.cat_split_type,
1213
+ (uint8_t)model.missing_action,
1214
+ (uint8_t)model.has_range_penalty,
1215
+ (uint8_t)model.scoring_metric,
1216
+ };
1217
+ write_bytes<uint8_t>((void*)data_en, (size_t)5, out);
1218
+
1219
+ double data_doubles[] = {
1220
+ model.exp_avg_depth,
1221
+ model.exp_avg_sep
1222
+ };
1223
+ write_bytes<double>((void*)data_doubles, (size_t)2, out);
1224
+
1225
+ size_t data_sizets[] = {
1226
+ model.orig_sample_size,
1227
+ model.trees.size()
1228
+ };
1229
+ write_bytes<size_t>((void*)data_sizets, (size_t)2, out);
1230
+
1231
+ size_t veclen;
1232
+ for (const auto &tree : model.trees) {
1233
+ veclen = tree.size();
1234
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
1235
+ for (const auto &node : tree)
1236
+ serialize_node(node, out);
1237
+ }
1238
+ }
1239
+
1240
+ template <class itype>
1241
+ void deserialize_model(IsoForest &model, itype &in)
1242
+ {
1243
+ if (interrupt_switch) return;
1244
+
1245
+ uint8_t data_en[5];
1246
+ read_bytes<uint8_t>((void*)data_en, (size_t)5, in);
1247
+ model.new_cat_action = (NewCategAction)data_en[0];
1248
+ model.cat_split_type = (CategSplit)data_en[1];
1249
+ model.missing_action = (MissingAction)data_en[2];
1250
+ model.has_range_penalty = (bool)data_en[3];
1251
+ model.scoring_metric = (ScoringMetric)data_en[4];
1252
+
1253
+ double data_doubles[2];
1254
+ read_bytes<double>((void*)data_doubles, (size_t)2, in);
1255
+ model.exp_avg_depth = data_doubles[0];
1256
+ model.exp_avg_sep = data_doubles[1];
1257
+
1258
+ size_t data_sizets[2];
1259
+ read_bytes<size_t>((void*)data_sizets, (size_t)2, in);
1260
+ model.orig_sample_size = data_sizets[0];
1261
+ model.trees.resize(data_sizets[1]);
1262
+ model.trees.shrink_to_fit();
1263
+
1264
+ size_t veclen;
1265
+ for (auto &tree : model.trees) {
1266
+ read_bytes<size_t>((void*)&veclen, (size_t)1, in);
1267
+ tree.resize(veclen);
1268
+ tree.shrink_to_fit();
1269
+ for (auto &node : tree)
1270
+ deserialize_node(node, in);
1271
+ }
1272
+ }
1273
+
1274
+ template <class itype, class saved_int_t, class saved_size_t>
1275
+ void deserialize_model(IsoForest &model, itype &in, std::vector<char> &buffer,
1276
+ const bool diff_endian, const bool lacks_range_penalty,
1277
+ const bool lacks_scoring_metric)
1278
+ {
1279
+ if (interrupt_switch) return;
1280
+
1281
+ if (lacks_range_penalty)
1282
+ {
1283
+ uint8_t data_en[3];
1284
+ read_bytes<uint8_t>((void*)data_en, (size_t)3, in);
1285
+ model.new_cat_action = (NewCategAction)data_en[0];
1286
+ model.cat_split_type = (CategSplit)data_en[1];
1287
+ model.missing_action = (MissingAction)data_en[2];
1288
+ }
1289
+
1290
+ else
1291
+ {
1292
+ uint8_t data_en[4];
1293
+ read_bytes<uint8_t>((void*)data_en, (size_t)4, in);
1294
+ model.new_cat_action = (NewCategAction)data_en[0];
1295
+ model.cat_split_type = (CategSplit)data_en[1];
1296
+ model.missing_action = (MissingAction)data_en[2];
1297
+ model.has_range_penalty = (bool)data_en[3];
1298
+ }
1299
+
1300
+ if (lacks_scoring_metric)
1301
+ {
1302
+ model.scoring_metric = Depth;
1303
+ }
1304
+
1305
+ else
1306
+ {
1307
+ uint8_t data_en;
1308
+ read_bytes<uint8_t>((void*)&data_en, (size_t)1, in);
1309
+ model.scoring_metric = (ScoringMetric)data_en;
1310
+ }
1311
+
1312
+ double data_doubles[2];
1313
+ read_bytes<double, double>((void*)data_doubles, (size_t)2, in, buffer, diff_endian);
1314
+ model.exp_avg_depth = data_doubles[0];
1315
+ model.exp_avg_sep = data_doubles[1];
1316
+
1317
+ size_t data_sizets[2];
1318
+ read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)2, in, buffer, diff_endian);
1319
+ model.orig_sample_size = data_sizets[0];
1320
+ model.trees.resize(data_sizets[1]);
1321
+ model.trees.shrink_to_fit();
1322
+
1323
+ size_t veclen;
1324
+ for (auto &tree : model.trees) {
1325
+ read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer, diff_endian);
1326
+ tree.resize(veclen);
1327
+ tree.shrink_to_fit();
1328
+ for (auto &node : tree)
1329
+ deserialize_node<itype, saved_int_t, saved_size_t>(node, in, buffer, diff_endian);
1330
+ }
1331
+ }
1332
+
1333
+ template <class otype>
1334
+ void serialize_additional_trees(const IsoForest &model, otype &out, size_t trees_prev)
1335
+ {
1336
+ size_t veclen;
1337
+ for (size_t ix = trees_prev; ix < model.trees.size(); ix++) {
1338
+ veclen = model.trees[ix].size();
1339
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
1340
+ for (const auto &node : model.trees[ix])
1341
+ serialize_node(node, out);
1342
+ }
1343
+ }
1344
+
1345
+ size_t determine_serialized_size_additional_trees(const IsoForest &model, size_t old_ntrees) noexcept
1346
+ {
1347
+ size_t n_bytes = 0;
1348
+ for (size_t ix = 0; ix < model.trees.size(); ix++) {
1349
+ n_bytes += sizeof(size_t);
1350
+ for (const auto &node : model.trees[ix])
1351
+ n_bytes += get_size_node(node);
1352
+ }
1353
+ return n_bytes;
1354
+ }
1355
+
1356
+ size_t get_size_model(const ExtIsoForest &model) noexcept
1357
+ {
1358
+ size_t n_bytes = 0;
1359
+ n_bytes += sizeof(uint8_t) * 5;
1360
+ n_bytes += sizeof(double) * 2;
1361
+ n_bytes += sizeof(size_t) * 2;
1362
+ for (const auto &tree : model.hplanes) {
1363
+ n_bytes += sizeof(size_t);
1364
+ for (const auto &node : tree)
1365
+ n_bytes += get_size_node(node);
1366
+ }
1367
+ return n_bytes;
1368
+ }
1369
+
1370
+ template <class otype>
1371
+ void serialize_model(const ExtIsoForest &model, otype &out)
1372
+ {
1373
+ if (interrupt_switch) return;
1374
+
1375
+ uint8_t data_en[] = {
1376
+ (uint8_t)model.new_cat_action,
1377
+ (uint8_t)model.cat_split_type,
1378
+ (uint8_t)model.missing_action,
1379
+ (uint8_t)model.has_range_penalty,
1380
+ (uint8_t)model.scoring_metric
1381
+ };
1382
+ write_bytes<uint8_t>((void*)data_en, (size_t)5, out);
1383
+
1384
+ double data_doubles[] = {
1385
+ model.exp_avg_depth,
1386
+ model.exp_avg_sep
1387
+ };
1388
+ write_bytes<double>((void*)data_doubles, (size_t)2, out);
1389
+
1390
+ size_t data_sizets[] = {
1391
+ model.orig_sample_size,
1392
+ model.hplanes.size()
1393
+ };
1394
+ write_bytes<size_t>((void*)data_sizets, (size_t)2, out);
1395
+
1396
+ std::vector<uint8_t> buffer;
1397
+ size_t veclen;
1398
+ for (const auto &tree : model.hplanes) {
1399
+ veclen = tree.size();
1400
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
1401
+ for (const auto &node : tree)
1402
+ serialize_node(node, out, buffer);
1403
+ }
1404
+ }
1405
+
1406
+ template <class itype>
1407
+ void deserialize_model(ExtIsoForest &model, itype &in)
1408
+ {
1409
+ if (interrupt_switch) return;
1410
+
1411
+ uint8_t data_en[5];
1412
+ read_bytes<uint8_t>((void*)data_en, (size_t)5, in);
1413
+ model.new_cat_action = (NewCategAction)data_en[0];
1414
+ model.cat_split_type = (CategSplit)data_en[1];
1415
+ model.missing_action = (MissingAction)data_en[2];
1416
+ model.has_range_penalty = (bool)data_en[3];
1417
+ model.scoring_metric = (ScoringMetric)data_en[4];
1418
+
1419
+ double data_doubles[2];
1420
+ read_bytes<double>((void*)data_doubles, (size_t)2, in);
1421
+ model.exp_avg_depth = data_doubles[0];
1422
+ model.exp_avg_sep = data_doubles[1];
1423
+
1424
+ size_t data_sizets[2];
1425
+ read_bytes<size_t>((void*)data_sizets, (size_t)2, in);
1426
+ model.orig_sample_size = data_sizets[0];
1427
+ model.hplanes.resize(data_sizets[1]);
1428
+ model.hplanes.shrink_to_fit();
1429
+
1430
+ size_t veclen;
1431
+ std::vector<uint8_t> buffer;
1432
+ for (auto &tree : model.hplanes) {
1433
+ read_bytes<size_t>((void*)&veclen, (size_t)1, in);
1434
+ tree.resize(veclen);
1435
+ tree.shrink_to_fit();
1436
+ for (auto &node : tree)
1437
+ deserialize_node(node, in, buffer);
1438
+ }
1439
+ }
1440
+
1441
+ template <class itype, class saved_int_t, class saved_size_t>
1442
+ void deserialize_model(ExtIsoForest &model, itype &in, std::vector<char> &buffer,
1443
+ const bool diff_endian, const bool lacks_range_penalty,
1444
+ const bool lacks_scoring_metric)
1445
+ {
1446
+ if (interrupt_switch) return;
1447
+
1448
+ if (lacks_range_penalty)
1449
+ {
1450
+ uint8_t data_en[3];
1451
+ read_bytes<uint8_t>((void*)data_en, (size_t)3, in);
1452
+ model.new_cat_action = (NewCategAction)data_en[0];
1453
+ model.cat_split_type = (CategSplit)data_en[1];
1454
+ model.missing_action = (MissingAction)data_en[2];
1455
+ }
1456
+
1457
+ else
1458
+ {
1459
+ uint8_t data_en[4];
1460
+ read_bytes<uint8_t>((void*)data_en, (size_t)4, in);
1461
+ model.new_cat_action = (NewCategAction)data_en[0];
1462
+ model.cat_split_type = (CategSplit)data_en[1];
1463
+ model.missing_action = (MissingAction)data_en[2];
1464
+ model.has_range_penalty = (bool)data_en[3];
1465
+ }
1466
+
1467
+ if (lacks_scoring_metric)
1468
+ {
1469
+ model.scoring_metric = Depth;
1470
+ }
1471
+
1472
+ else
1473
+ {
1474
+ uint8_t data_en;
1475
+ read_bytes<uint8_t>((void*)&data_en, (size_t)1, in);
1476
+ model.scoring_metric = (ScoringMetric)data_en;
1477
+ }
1478
+
1479
+ double data_doubles[2];
1480
+ read_bytes<double, double>((void*)data_doubles, (size_t)2, in, buffer, diff_endian);
1481
+ model.exp_avg_depth = data_doubles[0];
1482
+ model.exp_avg_sep = data_doubles[1];
1483
+
1484
+ size_t data_sizets[2];
1485
+ read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)2, in, buffer, diff_endian);
1486
+ model.orig_sample_size = data_sizets[0];
1487
+ model.hplanes.resize(data_sizets[1]);
1488
+ model.hplanes.shrink_to_fit();
1489
+
1490
+ size_t veclen;
1491
+ std::vector<uint8_t> buffer_;
1492
+ for (auto &tree : model.hplanes) {
1493
+ read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer, diff_endian);
1494
+ tree.resize(veclen);
1495
+ tree.shrink_to_fit();
1496
+ for (auto &node : tree)
1497
+ deserialize_node<itype, saved_int_t, saved_size_t>(node, in, buffer_, buffer, diff_endian);
1498
+ }
1499
+ }
1500
+
1501
+ template <class otype>
1502
+ void serialize_additional_trees(const ExtIsoForest &model, otype &out, size_t trees_prev)
1503
+ {
1504
+ if (interrupt_switch) return;
1505
+
1506
+ std::vector<uint8_t> buffer;
1507
+ size_t veclen;
1508
+ for (size_t ix = trees_prev; ix < model.hplanes.size(); ix++) {
1509
+ veclen = model.hplanes[ix].size();
1510
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
1511
+ for (const auto &node : model.hplanes[ix])
1512
+ serialize_node(node, out, buffer);
1513
+ }
1514
+ }
1515
+
1516
+ size_t determine_serialized_size_additional_trees(const ExtIsoForest &model, size_t old_ntrees) noexcept
1517
+ {
1518
+ size_t n_bytes = 0;
1519
+ for (size_t ix = 0; ix < model.hplanes.size(); ix++) {
1520
+ n_bytes += sizeof(size_t);
1521
+ for (const auto &node : model.hplanes[ix])
1522
+ n_bytes += get_size_node(node);
1523
+ }
1524
+ return n_bytes;
1525
+ }
1526
+
1527
+ size_t get_size_model(const Imputer &model) noexcept
1528
+ {
1529
+ size_t n_bytes = 0;
1530
+ n_bytes += sizeof(size_t) * 6;
1531
+ n_bytes += sizeof(int) * model.ncat.size();
1532
+ n_bytes += sizeof(double) * model.col_means.size();
1533
+ n_bytes += sizeof(int) * model.col_modes.size();
1534
+ for (const auto &tree : model.imputer_tree) {
1535
+ n_bytes += sizeof(size_t);
1536
+ for (const auto &node : tree)
1537
+ n_bytes += get_size_node(node);
1538
+ }
1539
+ return n_bytes;
1540
+ }
1541
+
1542
+ template <class otype>
1543
+ void serialize_model(const Imputer &model, otype &out)
1544
+ {
1545
+ if (interrupt_switch) return;
1546
+
1547
+ size_t data_sizets[] = {
1548
+ model.ncols_numeric,
1549
+ model.ncols_categ,
1550
+ model.ncat.size(),
1551
+ model.imputer_tree.size(),
1552
+ model.col_means.size(),
1553
+ model.col_modes.size()
1554
+ };
1555
+ write_bytes<size_t>((void*)data_sizets, (size_t)6, out);
1556
+
1557
+ write_bytes<int>((void*)model.ncat.data(), model.ncat.size(), out);
1558
+
1559
+ write_bytes<double>((void*)model.col_means.data(), model.col_means.size(), out);
1560
+
1561
+ write_bytes<int>((void*)model.col_modes.data(), model.col_modes.size(), out);
1562
+
1563
+ size_t veclen;
1564
+ for (const auto &tree : model.imputer_tree) {
1565
+ veclen = tree.size();
1566
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
1567
+ for (const auto &node : tree)
1568
+ serialize_node(node, out);
1569
+ }
1570
+ }
1571
+
1572
+ template <class itype>
1573
+ void deserialize_model(Imputer &model, itype &in)
1574
+ {
1575
+ if (interrupt_switch) return;
1576
+
1577
+ size_t data_sizets[6];
1578
+ read_bytes<size_t>((void*)data_sizets, (size_t)6, in);
1579
+ model.ncols_numeric = data_sizets[0];
1580
+ model.ncols_categ = data_sizets[1];
1581
+ model.ncat.resize(data_sizets[2]);
1582
+ model.imputer_tree.resize(data_sizets[3]);
1583
+ model.col_means.resize(data_sizets[4]);
1584
+ model.col_modes.resize(data_sizets[5]);
1585
+
1586
+ model.ncat.shrink_to_fit();
1587
+ model.imputer_tree.shrink_to_fit();
1588
+ model.col_means.shrink_to_fit();
1589
+ model.col_modes.shrink_to_fit();
1590
+
1591
+ read_bytes<int>(model.ncat, model.ncat.size(), in);
1592
+
1593
+ read_bytes<double>(model.col_means, model.col_means.size(), in);
1594
+
1595
+ read_bytes<int>(model.col_modes, model.col_modes.size(), in);
1596
+
1597
+ size_t veclen;
1598
+ for (auto &tree : model.imputer_tree) {
1599
+ read_bytes<size_t>((void*)&veclen, (size_t)1, in);
1600
+ tree.resize(veclen);
1601
+ tree.shrink_to_fit();
1602
+ for (auto &node : tree)
1603
+ deserialize_node(node, in);
1604
+ }
1605
+ }
1606
+
1607
+ template <class itype, class saved_int_t, class saved_size_t>
1608
+ void deserialize_model(Imputer &model, itype &in, std::vector<char> &buffer,
1609
+ const bool diff_endian, const bool lacks_range_penalty,
1610
+ const bool lacks_scoring_metric)
1611
+ {
1612
+ if (interrupt_switch) return;
1613
+
1614
+ size_t data_sizets[6];
1615
+ read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)6, in, buffer, diff_endian);
1616
+ model.ncols_numeric = data_sizets[0];
1617
+ model.ncols_categ = data_sizets[1];
1618
+ model.ncat.resize(data_sizets[2]);
1619
+ model.imputer_tree.resize(data_sizets[3]);
1620
+ model.col_means.resize(data_sizets[4]);
1621
+ model.col_modes.resize(data_sizets[5]);
1622
+
1623
+ model.ncat.shrink_to_fit();
1624
+ model.imputer_tree.shrink_to_fit();
1625
+ model.col_means.shrink_to_fit();
1626
+ model.col_modes.shrink_to_fit();
1627
+
1628
+ read_bytes<int, saved_int_t>(model.ncat, model.ncat.size(), in, buffer, diff_endian);
1629
+
1630
+ read_bytes<double, double>(model.col_means, model.col_means.size(), in, buffer, diff_endian);
1631
+
1632
+ read_bytes<int, saved_int_t>(model.col_modes, model.col_modes.size(), in, buffer, diff_endian);
1633
+
1634
+ size_t veclen;
1635
+ for (auto &tree : model.imputer_tree) {
1636
+ read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer, diff_endian);
1637
+ tree.resize(veclen);
1638
+ tree.shrink_to_fit();
1639
+ for (auto &node : tree)
1640
+ deserialize_node<itype, saved_int_t, saved_size_t>(node, in, buffer, diff_endian);
1641
+ }
1642
+ }
1643
+
1644
+ template <class otype>
1645
+ void serialize_additional_trees(const Imputer &model, otype &out, size_t trees_prev)
1646
+ {
1647
+ size_t veclen;
1648
+ for (size_t ix = trees_prev; ix < model.imputer_tree.size(); ix++) {
1649
+ veclen = model.imputer_tree[ix].size();
1650
+ write_bytes<size_t>((void*)&veclen, (size_t)1, out);
1651
+ for (const auto &node : model.imputer_tree[ix])
1652
+ serialize_node(node, out);
1653
+ }
1654
+ }
1655
+
1656
+ size_t determine_serialized_size_additional_trees(const Imputer &model, size_t old_ntrees) noexcept
1657
+ {
1658
+ size_t n_bytes = 0;
1659
+ for (size_t ix = 0; ix < model.imputer_tree.size(); ix++) {
1660
+ n_bytes += sizeof(size_t);
1661
+ for (const auto &node : model.imputer_tree[ix])
1662
+ n_bytes += get_size_node(node);
1663
+ }
1664
+ return n_bytes;
1665
+ }
1666
+
1667
+ size_t get_size_model(const TreesIndexer &model) noexcept
1668
+ {
1669
+ size_t n_bytes = 0;
1670
+ n_bytes += sizeof(size_t);
1671
+ for (const auto &node : model.indices)
1672
+ n_bytes += get_size_node(node);
1673
+ return n_bytes;
1674
+ }
1675
+
1676
+ template <class otype>
1677
+ void serialize_model(const TreesIndexer &model, otype &out)
1678
+ {
1679
+ if (interrupt_switch) return;
1680
+
1681
+ size_t vec_size = model.indices.size();
1682
+ write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
1683
+
1684
+ for (const auto &tree : model.indices)
1685
+ serialize_node(tree, out);
1686
+ }
1687
+
1688
+ template <class itype>
1689
+ void deserialize_model(TreesIndexer &model, itype &in)
1690
+ {
1691
+ if (interrupt_switch) return;
1692
+
1693
+ size_t vec_size;
1694
+ read_bytes<size_t>(&vec_size, (size_t)1, in);
1695
+ model.indices.resize(vec_size);
1696
+ model.indices.shrink_to_fit();
1697
+ for (auto &tree : model.indices)
1698
+ deserialize_node(tree, in);
1699
+ }
1700
+
1701
+ template <class itype, class saved_int_t, class saved_size_t>
1702
+ void deserialize_model(TreesIndexer &model, itype &in, std::vector<char> &buffer,
1703
+ const bool diff_endian, const bool lacks_range_penalty,
1704
+ const bool lacks_scoring_metric)
1705
+ {
1706
+ if (interrupt_switch) return;
1707
+
1708
+ size_t vec_size;
1709
+ read_bytes<size_t, saved_size_t>(&vec_size, (size_t)1, in, buffer, diff_endian);
1710
+ model.indices.resize(vec_size);
1711
+ model.indices.shrink_to_fit();
1712
+ for (auto &tree : model.indices)
1713
+ deserialize_node<itype, saved_int_t, saved_size_t>(tree, in, buffer, diff_endian);
1714
+ }
1715
+
1716
+ template <class otype>
1717
+ void serialize_additional_trees(const TreesIndexer &model, otype &out, size_t trees_prev)
1718
+ {
1719
+ for (size_t ix = trees_prev; ix < model.indices.size(); ix++)
1720
+ serialize_node(model.indices[ix], out);
1721
+ }
1722
+
1723
+ size_t determine_serialized_size_additional_trees(const TreesIndexer &model, size_t old_ntrees) noexcept
1724
+ {
1725
+ size_t n_bytes = 0;
1726
+ for (size_t ix = 0; ix < model.indices.size(); ix++)
1727
+ n_bytes += get_size_node(model.indices[ix]);
1728
+ return n_bytes;
1729
+ }
1730
+
1731
+ bool get_is_little_endian() noexcept
1732
+ {
1733
+ const int one = 1;
1734
+ return *((unsigned char*)&one) != 0;
1735
+ }
1736
+
1737
+ size_t get_size_setup_info() noexcept
1738
+ {
1739
+ size_t n_bytes = 0;
1740
+ n_bytes += sizeof(unsigned char) * SIZE_WATERMARK;
1741
+ n_bytes += sizeof(uint8_t) * 9;
1742
+ return n_bytes;
1743
+ }
1744
+
1745
+ template <class otype>
1746
+ void add_setup_info(otype &out, bool full_watermark)
1747
+ {
1748
+ write_bytes<unsigned char>((void*)(full_watermark? watermark: incomplete_watermark), SIZE_WATERMARK, out);
1749
+ /*
1750
+ 0 : endianness
1751
+ 1-3: isotree version
1752
+ 4: double type
1753
+ 5: size_t limit
1754
+ 6: sizeof(int)
1755
+ 7: sizeof(size_t)
1756
+ 8: sizeof(double)
1757
+ */
1758
+ uint8_t setup_info[] = {
1759
+ (uint8_t)get_is_little_endian(),
1760
+ (uint8_t)ISOTREE_VERSION_MAJOR,
1761
+ (uint8_t)ISOTREE_VERSION_MINOR,
1762
+ (uint8_t)ISOTREE_VERSION_PATCH,
1763
+ #if defined(HAS_IEEE_DOUBLE)
1764
+ (uint8_t)IsNormalDouble,
1765
+ #else
1766
+ (uint8_t)IsAbnormalDouble,
1767
+ #endif
1768
+ #if SIZE_MAX == UINT32_MAX
1769
+ (uint8_t)Is32Bit,
1770
+ #elif SIZE_MAX == UINT64_MAX
1771
+ (uint8_t)Is64Bit,
1772
+ #else
1773
+ (uint8_t)IsOther,
1774
+ #endif
1775
+ (uint8_t)sizeof(int),
1776
+ (uint8_t)sizeof(size_t),
1777
+ (uint8_t)sizeof(double)
1778
+ };
1779
+ write_bytes<uint8_t>((void*)setup_info, (size_t)9, out);
1780
+ }
1781
+
1782
+ template <class otype>
1783
+ void add_full_watermark(otype &out)
1784
+ {
1785
+ write_bytes<unsigned char>((void*)watermark, SIZE_WATERMARK, out);
1786
+ }
1787
+
1788
+ template <class itype>
1789
+ void check_setup_info
1790
+ (
1791
+ itype &in,
1792
+ bool &has_watermark,
1793
+ bool &has_incomplete_watermark,
1794
+ bool &has_same_double,
1795
+ bool &has_same_int_size,
1796
+ bool &has_same_size_t_size,
1797
+ bool &has_same_endianness,
1798
+ PlatformSize &saved_int_t,
1799
+ PlatformSize &saved_size_t,
1800
+ PlatformEndianness &saved_endian,
1801
+ bool &is_deserializable,
1802
+ bool &lacks_range_penalty,
1803
+ bool &lacks_scoring_metric,
1804
+ bool &lacks_indexer
1805
+ )
1806
+ {
1807
+ is_deserializable = false;
1808
+ has_incomplete_watermark = false;
1809
+ lacks_range_penalty = false;
1810
+ lacks_scoring_metric = false;
1811
+ lacks_indexer = false;
1812
+
1813
+ unsigned char watermark_in[SIZE_WATERMARK];
1814
+ read_bytes<unsigned char>((void*)watermark_in, SIZE_WATERMARK, in);
1815
+ if (memcmp(watermark_in, (unsigned char*)watermark, SIZE_WATERMARK)) {
1816
+ has_watermark = false;
1817
+ if (!memcmp(watermark_in, (unsigned char*)incomplete_watermark, SIZE_WATERMARK))
1818
+ has_incomplete_watermark = true;
1819
+ return;
1820
+ }
1821
+ else {
1822
+ has_watermark = true;
1823
+ }
1824
+
1825
+ uint8_t setup_info[9];
1826
+ read_bytes<uint8_t>((void*)setup_info, (size_t)9, in);
1827
+
1828
+ bool is_little_endian = get_is_little_endian();
1829
+ if ((bool)is_little_endian != (bool)setup_info[0]) {
1830
+ has_same_endianness = false;
1831
+ saved_endian = is_little_endian? PlatformLittleEndian : PlatformBigEndian;
1832
+ }
1833
+ else {
1834
+ has_same_endianness = true;
1835
+ }
1836
+
1837
+ if (setup_info[1] == 0 && setup_info[2] == 3 && setup_info[3] == 0) {
1838
+ lacks_range_penalty = true;
1839
+ }
1840
+
1841
+ if (setup_info[1] == 0 && setup_info[2] < 4) {
1842
+ lacks_scoring_metric = true;
1843
+ }
1844
+
1845
+ if (setup_info[1] == 0 && setup_info[2] < 5) {
1846
+ lacks_indexer = true;
1847
+ }
1848
+
1849
+ if (setup_info[4] == (uint8_t)IsAbnormalDouble)
1850
+ fprintf(stderr, "Warning: input model uses non-standard numeric type, might read correctly.\n");
1851
+
1852
+ switch(setup_info[6])
1853
+ {
1854
+ case 16: {saved_int_t = Is16Bit; break;}
1855
+ case 32: {saved_int_t = Is32Bit; break;}
1856
+ case 64: {saved_int_t = Is64Bit; break;}
1857
+ default: {saved_int_t = IsOther; break;}
1858
+ }
1859
+ if ((uint8_t)sizeof(int) != setup_info[6]) {
1860
+ has_same_int_size = false;
1861
+ if (sizeof(uint8_t) != 1) return;
1862
+ if (saved_int_t == IsOther) return;
1863
+ }
1864
+ else {
1865
+ has_same_int_size = true;
1866
+ }
1867
+
1868
+
1869
+ if ((uint8_t)sizeof(size_t) != setup_info[7]) {
1870
+ has_same_size_t_size = false;
1871
+ if (sizeof(uint8_t) != 1) return;
1872
+ }
1873
+ else {
1874
+ has_same_size_t_size = true;
1875
+ }
1876
+
1877
+
1878
+ if ((uint8_t)sizeof(double) != setup_info[8]) {
1879
+ has_same_double = false;
1880
+ return;
1881
+ }
1882
+ else {
1883
+ has_same_double = true;
1884
+ }
1885
+
1886
+ saved_size_t = (PlatformSize)setup_info[5];
1887
+ #if SIZE_MAX == UINT32_MAX
1888
+ if (setup_info[5] != (uint8_t)Is32Bit)
1889
+ #elif SIZE_MAX == UINT64_MAX
1890
+ if (setup_info[5] != (uint8_t)Is64Bit)
1891
+ #else
1892
+ if (setup_info[5] != (uint8_t)IsOther)
1893
+ #endif
1894
+ {
1895
+ has_same_size_t_size = false;
1896
+ if (saved_size_t == IsOther)
1897
+ return;
1898
+ }
1899
+
1900
+ else {
1901
+ has_same_size_t_size = true;
1902
+ }
1903
+
1904
+ is_deserializable = true;
1905
+ }
1906
+
1907
+ template <class itype>
1908
+ void check_setup_info(itype &in)
1909
+ {
1910
+ bool has_watermark = false;
1911
+ bool has_incomplete_watermark = false;
1912
+ bool has_same_double = false;
1913
+ bool has_same_int_size = false;
1914
+ bool has_same_size_t_size = false;
1915
+ bool has_same_endianness = false;
1916
+ PlatformSize saved_int_t;
1917
+ PlatformSize saved_size_t;
1918
+ PlatformEndianness saved_endian;
1919
+ bool is_deserializable = false;
1920
+ bool lacks_range_penalty = false;
1921
+ bool lacks_scoring_metric = false;
1922
+ bool lacks_indexer = false;
1923
+
1924
+ check_setup_info(
1925
+ in,
1926
+ has_watermark,
1927
+ has_incomplete_watermark,
1928
+ has_same_double,
1929
+ has_same_int_size,
1930
+ has_same_size_t_size,
1931
+ has_same_endianness,
1932
+ saved_int_t,
1933
+ saved_size_t,
1934
+ saved_endian,
1935
+ is_deserializable,
1936
+ lacks_range_penalty,
1937
+ lacks_scoring_metric,
1938
+ lacks_indexer
1939
+ );
1940
+
1941
+ if (!has_watermark) {
1942
+ if (has_incomplete_watermark)
1943
+ throw std::runtime_error("Error: serialized model is incomplete.\n");
1944
+ else
1945
+ throw std::runtime_error("Error: input is not an isotree model.\n");
1946
+ }
1947
+ if (!has_same_double)
1948
+ throw std::runtime_error("Error: input model was saved in a machine with different 'double' type.\n");
1949
+ if (!has_same_int_size)
1950
+ throw std::runtime_error("Error: input model was saved in a machine with different integer type.\n");
1951
+ if (!has_same_size_t_size)
1952
+ throw std::runtime_error("Error: input model was saved in a machine with different 'size_t' type.\n");
1953
+ if (!has_same_endianness)
1954
+ throw std::runtime_error("Error: input model was saved in a machine with different endianness.\n");
1955
+ if (lacks_range_penalty || lacks_scoring_metric || lacks_indexer)
1956
+ throw std::runtime_error("Error: input model was produced with an incompatible earlier version, needs to be re-serialized.\n");
1957
+ }
1958
+
1959
+ template <class itype>
1960
+ void check_setup_info
1961
+ (
1962
+ itype &in,
1963
+ bool &has_same_int_size,
1964
+ bool &has_same_size_t_size,
1965
+ bool &has_same_endianness,
1966
+ PlatformSize &saved_int_t,
1967
+ PlatformSize &saved_size_t,
1968
+ PlatformEndianness &saved_endian,
1969
+ bool &lacks_range_penalty,
1970
+ bool &lacks_scoring_metric,
1971
+ bool &lacks_indexer
1972
+ )
1973
+ {
1974
+ bool has_watermark = false;
1975
+ bool has_incomplete_watermark = false;
1976
+ bool has_same_double = false;
1977
+ bool is_deserializable = false;
1978
+
1979
+ check_setup_info(
1980
+ in,
1981
+ has_watermark,
1982
+ has_incomplete_watermark,
1983
+ has_same_double,
1984
+ has_same_int_size,
1985
+ has_same_size_t_size,
1986
+ has_same_endianness,
1987
+ saved_int_t,
1988
+ saved_size_t,
1989
+ saved_endian,
1990
+ is_deserializable,
1991
+ lacks_range_penalty,
1992
+ lacks_scoring_metric,
1993
+ lacks_indexer
1994
+ );
1995
+
1996
+ if (!has_watermark) {
1997
+ if (has_incomplete_watermark)
1998
+ throw std::runtime_error("Error: serialized model is incomplete.\n");
1999
+ else
2000
+ throw std::runtime_error("Error: input is not an isotree model.\n");
2001
+ }
2002
+ if (!has_same_double)
2003
+ throw std::runtime_error("Error: input model was saved in a machine with different 'double' type.\n");
2004
+ if (!is_deserializable)
2005
+ throw std::runtime_error("Error: input format is incompatible.\n");
2006
+ }
2007
+
2008
+ size_t get_size_ending_metadata() noexcept
2009
+ {
2010
+ size_t n_bytes = 0;
2011
+ n_bytes += sizeof(uint8_t);
2012
+ n_bytes += sizeof(size_t);
2013
+ return n_bytes;
2014
+ }
2015
+
2016
+ template <class Model>
2017
+ size_t determine_serialized_size(const Model &model) noexcept
2018
+ {
2019
+ size_t n_bytes = 0;
2020
+ n_bytes += get_size_setup_info();
2021
+ n_bytes += sizeof(uint8_t);
2022
+ n_bytes += sizeof(size_t);
2023
+ n_bytes += get_size_model(model);
2024
+ n_bytes += get_size_ending_metadata();
2025
+ return n_bytes;
2026
+ }
2027
+
2028
+ uint8_t get_model_code(const IsoForest &model) noexcept
2029
+ {
2030
+ return IsoForestModel;
2031
+ }
2032
+
2033
+ uint8_t get_model_code(const ExtIsoForest &model) noexcept
2034
+ {
2035
+ return ExtIsoForestModel;
2036
+ }
2037
+
2038
+ uint8_t get_model_code(const Imputer &model) noexcept
2039
+ {
2040
+ return ImputerModel;
2041
+ }
2042
+
2043
+ uint8_t get_model_code(const TreesIndexer &model) noexcept
2044
+ {
2045
+ return IndexerModel;
2046
+ }
2047
+
2048
+ template <class Model, class otype>
2049
+ void serialization_pipeline(const Model &model, otype &out)
2050
+ {
2051
+ SignalSwitcher ss = SignalSwitcher();
2052
+
2053
+ auto pos_watermark = set_return_position(out);
2054
+
2055
+ add_setup_info(out, false);
2056
+ uint8_t model_type = get_model_code(model);
2057
+ write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
2058
+ size_t size_model = get_size_model(model);
2059
+ write_bytes<size_t>((void*)&size_model, (size_t)1, out);
2060
+ serialize_model(model, out);
2061
+ check_interrupt_switch(ss);
2062
+
2063
+ /* This last bit will be left open in order to signal if anything follows,
2064
+ in case it's decided to change the format in the future or to add
2065
+ something additional, along with a 'size_t' slot in case it would need
2066
+ to jump ahead or something like that. */
2067
+ uint8_t ending_type = (uint8_t)EndsHere;
2068
+ write_bytes<uint8_t>((void*)&ending_type, (size_t)1, out);
2069
+ size_t jump_ahead = 0;
2070
+ write_bytes<size_t>((void*)&jump_ahead, (size_t)1, out);
2071
+
2072
+ auto end_pos = set_return_position(out);
2073
+ return_to_position(out, pos_watermark);
2074
+ add_full_watermark(out);
2075
+ return_to_position(out, end_pos);
2076
+ }
2077
+
2078
+ template <class Model, class itype>
2079
+ void deserialization_pipeline(Model &model, itype &in)
2080
+ {
2081
+ SignalSwitcher ss = SignalSwitcher();
2082
+
2083
+ bool has_same_int_size;
2084
+ bool has_same_size_t_size;
2085
+ bool has_same_endianness;
2086
+ PlatformSize saved_int_t;
2087
+ PlatformSize saved_size_t;
2088
+ PlatformEndianness saved_endian;
2089
+ bool lacks_range_penalty;
2090
+ bool lacks_scoring_metric;
2091
+ bool lacks_indexer; /* <- ignored */
2092
+
2093
+ check_setup_info(
2094
+ in,
2095
+ has_same_int_size,
2096
+ has_same_size_t_size,
2097
+ has_same_endianness,
2098
+ saved_int_t,
2099
+ saved_size_t,
2100
+ saved_endian,
2101
+ lacks_range_penalty,
2102
+ lacks_scoring_metric,
2103
+ lacks_indexer
2104
+ );
2105
+
2106
+ uint8_t model_type = get_model_code(model);
2107
+ uint8_t model_in;
2108
+ read_bytes<uint8_t>((void*)&model_in, (size_t)1, in);
2109
+ if (model_type != model_in)
2110
+ throw std::runtime_error("Object to de-serialize does not match with the supplied type.\n");
2111
+
2112
+ size_t size_model;
2113
+ if (has_same_int_size && has_same_size_t_size && has_same_endianness && !lacks_range_penalty && !lacks_scoring_metric)
2114
+ {
2115
+ read_bytes<size_t>((void*)&size_model, (size_t)1, in);
2116
+ deserialize_model(model, in);
2117
+ }
2118
+
2119
+ else
2120
+ {
2121
+ std::vector<char> buffer;
2122
+ const bool diff_endian = !has_same_endianness;
2123
+
2124
+ if (saved_int_t == Is16Bit && saved_size_t == Is32Bit)
2125
+ {
2126
+ read_bytes<size_t, uint32_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
2127
+ deserialize_model<itype, int16_t, uint32_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
2128
+ }
2129
+
2130
+ else if (saved_int_t == Is32Bit && saved_size_t == Is32Bit)
2131
+ {
2132
+ read_bytes<size_t, uint32_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
2133
+ deserialize_model<itype, int32_t, uint32_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
2134
+ }
2135
+
2136
+ else if (saved_int_t == Is64Bit && saved_size_t == Is32Bit)
2137
+ {
2138
+ read_bytes<size_t, uint32_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
2139
+ deserialize_model<itype, int64_t, uint32_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
2140
+ }
2141
+
2142
+ else if (saved_int_t == Is16Bit && saved_size_t == Is64Bit)
2143
+ {
2144
+ read_bytes<size_t, uint64_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
2145
+ deserialize_model<itype, int16_t, uint64_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
2146
+ }
2147
+
2148
+ else if (saved_int_t == Is32Bit && saved_size_t == Is64Bit)
2149
+ {
2150
+ read_bytes<size_t, uint64_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
2151
+ deserialize_model<itype, int32_t, uint64_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
2152
+ }
2153
+
2154
+ else if (saved_int_t == Is64Bit && saved_size_t == Is64Bit)
2155
+ {
2156
+ read_bytes<size_t, uint64_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
2157
+ deserialize_model<itype, int64_t, uint64_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
2158
+ }
2159
+
2160
+ else
2161
+ {
2162
+ unexpected_error();
2163
+ }
2164
+ }
2165
+
2166
+ check_interrupt_switch(ss);
2167
+
2168
+ if (lacks_range_penalty)
2169
+ {
2170
+ add_range_penalty(model);
2171
+ check_interrupt_switch(ss);
2172
+ }
2173
+
2174
+ /* Not currently used, but left in case the format changes */
2175
+ uint8_t ending_type;
2176
+ read_bytes<uint8_t>((void*)&ending_type, (size_t)1, in);
2177
+ size_t jump_ahead;
2178
+ read_bytes<size_t>((void*)&jump_ahead, (size_t)1, in);
2179
+ }
2180
+
2181
+ void re_serialization_pipeline(const IsoForest &model, char *&out)
2182
+ {
2183
+ SignalSwitcher ss = SignalSwitcher();
2184
+
2185
+ check_setup_info(out);
2186
+
2187
+ uint8_t model_in;
2188
+ memcpy(&model_in, out, sizeof(uint8_t));
2189
+ out += sizeof(uint8_t);
2190
+ if (model_in != get_model_code(model))
2191
+ throw std::runtime_error("Object to incrementally-serialize does not match with the supplied type.\n");
2192
+
2193
+ char *pos_size = out;
2194
+ size_t old_size;
2195
+ memcpy(&old_size, out, sizeof(size_t));
2196
+ out += sizeof(size_t);
2197
+
2198
+ char *old_end = out + old_size;
2199
+ uint8_t old_ending_type;
2200
+ memcpy(&old_ending_type, old_end, sizeof(uint8_t));
2201
+ size_t old_jump_ahead;
2202
+ memcpy(&old_jump_ahead, old_end + sizeof(uint8_t), sizeof(size_t));
2203
+
2204
+ size_t new_size = get_size_model(model);
2205
+ size_t new_ntrees = model.trees.size();
2206
+
2207
+ try
2208
+ {
2209
+ out += sizeof(uint8_t) * 3;
2210
+ if (model.has_range_penalty)
2211
+ {
2212
+ uint8_t has_range_penalty;
2213
+ memcpy(&has_range_penalty, out, sizeof(uint8_t));
2214
+ if (!has_range_penalty)
2215
+ memcpy(out, &has_range_penalty, sizeof(uint8_t));
2216
+ }
2217
+ out += sizeof(uint8_t);
2218
+ out += sizeof(double) * 2;
2219
+ out += sizeof(size_t);
2220
+
2221
+ char *pos_ntrees = out;
2222
+ size_t old_ntrees;
2223
+ memcpy(&old_ntrees, out, sizeof(size_t));
2224
+
2225
+ serialize_additional_trees(model, old_end, old_ntrees);
2226
+
2227
+ out = old_end;
2228
+ uint8_t ending_type = (uint8_t)EndsHere;
2229
+ memcpy(out, &ending_type, sizeof(uint8_t));
2230
+ out += sizeof(uint8_t);
2231
+ size_t jump_ahead = 0;
2232
+ memcpy(out, &jump_ahead, sizeof(size_t));
2233
+ out += sizeof(size_t);
2234
+
2235
+ /* Leave this for the end in case something fails, so as not to
2236
+ render the serialized bytes unusable. */
2237
+ memcpy(pos_size, &new_size, sizeof(size_t));
2238
+ memcpy(pos_ntrees, &new_ntrees, sizeof(size_t));
2239
+ }
2240
+
2241
+ catch(...)
2242
+ {
2243
+ memcpy(out, &old_ending_type, sizeof(uint8_t));
2244
+ memcpy(out + sizeof(uint8_t), &old_jump_ahead, sizeof(size_t));
2245
+ throw;
2246
+ }
2247
+
2248
+ check_interrupt_switch(ss);
2249
+ }
2250
+
2251
+ void re_serialization_pipeline(const ExtIsoForest &model, char *&out)
2252
+ {
2253
+ SignalSwitcher ss = SignalSwitcher();
2254
+
2255
+ check_setup_info(out);
2256
+
2257
+ uint8_t model_in;
2258
+ memcpy(&model_in, out, sizeof(uint8_t));
2259
+ out += sizeof(uint8_t);
2260
+ if (model_in != get_model_code(model))
2261
+ throw std::runtime_error("Object to incrementally-serialize does not match with the supplied type.\n");
2262
+
2263
+ char *pos_size = out;
2264
+ size_t old_size;
2265
+ memcpy(&old_size, out, sizeof(size_t));
2266
+ out += sizeof(size_t);
2267
+
2268
+ char *old_end = out + old_size;
2269
+ uint8_t old_ending_type;
2270
+ memcpy(&old_ending_type, old_end, sizeof(uint8_t));
2271
+ size_t old_jump_ahead;
2272
+ memcpy(&old_jump_ahead, old_end + sizeof(uint8_t), sizeof(size_t));
2273
+
2274
+ size_t new_size = get_size_model(model);
2275
+ size_t new_ntrees = model.hplanes.size();
2276
+
2277
+ try
2278
+ {
2279
+ out += sizeof(uint8_t) * 3;
2280
+ if (model.has_range_penalty)
2281
+ {
2282
+ uint8_t has_range_penalty;
2283
+ memcpy(&has_range_penalty, out, sizeof(uint8_t));
2284
+ if (!has_range_penalty)
2285
+ memcpy(out, &has_range_penalty, sizeof(uint8_t));
2286
+ }
2287
+ out += sizeof(uint8_t);
2288
+ out += sizeof(double) * 2;
2289
+ out += sizeof(size_t);
2290
+ char *pos_ntrees = out;
2291
+ size_t old_ntrees;
2292
+ memcpy(&old_ntrees, out, sizeof(size_t));
2293
+ out += sizeof(size_t);
2294
+
2295
+ serialize_additional_trees(model, old_end, old_ntrees);
2296
+
2297
+ out = old_end;
2298
+ uint8_t ending_type = (uint8_t)EndsHere;
2299
+ memcpy(out, &ending_type, sizeof(uint8_t));
2300
+ out += sizeof(uint8_t);
2301
+ size_t jump_ahead = 0;
2302
+ memcpy(out, &jump_ahead, sizeof(size_t));
2303
+ out += sizeof(size_t);
2304
+
2305
+ /* Leave this for the end in case something fails, so as not to
2306
+ render the serialized bytes unusable. */
2307
+ memcpy(pos_size, &new_size, sizeof(size_t));
2308
+ memcpy(pos_ntrees, &new_ntrees, sizeof(size_t));
2309
+ }
2310
+
2311
+ catch(...)
2312
+ {
2313
+ memcpy(out, &old_ending_type, sizeof(uint8_t));
2314
+ memcpy(out + sizeof(uint8_t), &old_jump_ahead, sizeof(size_t));
2315
+ throw;
2316
+ }
2317
+
2318
+ check_interrupt_switch(ss);
2319
+ }
2320
+
2321
+ void re_serialization_pipeline(const Imputer &model, char *&out)
2322
+ {
2323
+ SignalSwitcher ss = SignalSwitcher();
2324
+
2325
+ check_setup_info(out);
2326
+
2327
+ uint8_t model_in;
2328
+ memcpy(&model_in, out, sizeof(uint8_t));
2329
+ out += sizeof(uint8_t);
2330
+ if (model_in != get_model_code(model))
2331
+ throw std::runtime_error("Object to incrementally-serialize does not match with the supplied type.\n");
2332
+
2333
+ char *pos_size = out;
2334
+ size_t old_size;
2335
+ memcpy(&old_size, out, sizeof(size_t));
2336
+ out += sizeof(size_t);
2337
+
2338
+ char *old_end = out + old_size;
2339
+ uint8_t old_ending_type;
2340
+ memcpy(&old_ending_type, old_end, sizeof(uint8_t));
2341
+ size_t old_jump_ahead;
2342
+ memcpy(&old_jump_ahead, old_end + sizeof(uint8_t), sizeof(size_t));
2343
+
2344
+ size_t new_size = get_size_model(model);
2345
+ size_t new_ntrees = model.imputer_tree.size();
2346
+
2347
+ try
2348
+ {
2349
+ out += sizeof(size_t) * 3;
2350
+
2351
+ char *pos_ntrees = out;
2352
+ size_t old_ntrees;
2353
+ memcpy(&old_ntrees, out, sizeof(size_t));
2354
+
2355
+ serialize_additional_trees(model, old_end, old_ntrees);
2356
+
2357
+ out = old_end;
2358
+ uint8_t ending_type = (uint8_t)EndsHere;
2359
+ memcpy(out, &ending_type, sizeof(uint8_t));
2360
+ out += sizeof(uint8_t);
2361
+ size_t jump_ahead = 0;
2362
+ memcpy(out, &jump_ahead, sizeof(size_t));
2363
+ out += sizeof(size_t);
2364
+
2365
+ /* Leave this for the end in case something fails, so as not to
2366
+ render the serialized bytes unusable. */
2367
+ memcpy(pos_size, &new_size, sizeof(size_t));
2368
+ memcpy(pos_ntrees, &new_ntrees, sizeof(size_t));
2369
+ }
2370
+
2371
+ catch(...)
2372
+ {
2373
+ memcpy(out, &old_ending_type, sizeof(uint8_t));
2374
+ memcpy(out + sizeof(uint8_t), &old_jump_ahead, sizeof(size_t));
2375
+ throw;
2376
+ }
2377
+
2378
+ check_interrupt_switch(ss);
2379
+ }
2380
+
2381
+ void re_serialization_pipeline(const TreesIndexer &model, char *&out)
2382
+ {
2383
+ SignalSwitcher ss = SignalSwitcher();
2384
+
2385
+ check_setup_info(out);
2386
+
2387
+ uint8_t model_in;
2388
+ memcpy(&model_in, out, sizeof(uint8_t));
2389
+ out += sizeof(uint8_t);
2390
+ if (model_in != get_model_code(model))
2391
+ throw std::runtime_error("Object to incrementally-serialize does not match with the supplied type.\n");
2392
+
2393
+ char *pos_size = out;
2394
+ size_t old_size;
2395
+ memcpy(&old_size, out, sizeof(size_t));
2396
+ out += sizeof(size_t);
2397
+
2398
+ char *old_end = out + old_size;
2399
+ uint8_t old_ending_type;
2400
+ memcpy(&old_ending_type, old_end, sizeof(uint8_t));
2401
+ size_t old_jump_ahead;
2402
+ memcpy(&old_jump_ahead, old_end + sizeof(uint8_t), sizeof(size_t));
2403
+
2404
+ size_t new_size = get_size_model(model);
2405
+ size_t new_ntrees = model.indices.size();
2406
+
2407
+ try
2408
+ {
2409
+ char *pos_ntrees = out;
2410
+ size_t old_ntrees;
2411
+ memcpy(&old_ntrees, out, sizeof(size_t));
2412
+
2413
+ serialize_additional_trees(model, old_end, old_ntrees);
2414
+
2415
+ out = old_end;
2416
+ uint8_t ending_type = (uint8_t)EndsHere;
2417
+ memcpy(out, &ending_type, sizeof(uint8_t));
2418
+ out += sizeof(uint8_t);
2419
+ size_t jump_ahead = 0;
2420
+ memcpy(out, &jump_ahead, sizeof(size_t));
2421
+ out += sizeof(size_t);
2422
+
2423
+ /* Leave this for the end in case something fails, so as not to
2424
+ render the serialized bytes unusable. */
2425
+ memcpy(pos_size, &new_size, sizeof(size_t));
2426
+ memcpy(pos_ntrees, &new_ntrees, sizeof(size_t));
2427
+ }
2428
+
2429
+ catch(...)
2430
+ {
2431
+ memcpy(out, &old_ending_type, sizeof(uint8_t));
2432
+ memcpy(out + sizeof(uint8_t), &old_jump_ahead, sizeof(size_t));
2433
+ throw;
2434
+ }
2435
+
2436
+ check_interrupt_switch(ss);
2437
+ }
2438
+
2439
+ void incremental_serialize_IsoForest(const IsoForest &model, char *old_bytes_reallocated)
2440
+ {
2441
+ char *out = old_bytes_reallocated;
2442
+ re_serialization_pipeline(model, out);
2443
+ }
2444
+
2445
+ void incremental_serialize_ExtIsoForest(const ExtIsoForest &model, char *old_bytes_reallocated)
2446
+ {
2447
+ char *out = old_bytes_reallocated;
2448
+ re_serialization_pipeline(model, out);
2449
+ }
2450
+
2451
+ void incremental_serialize_Imputer(const Imputer &model, char *old_bytes_reallocated)
2452
+ {
2453
+ char *out = old_bytes_reallocated;
2454
+ re_serialization_pipeline(model, out);
2455
+ }
2456
+
2457
+ void incremental_serialize_Indexer(const TreesIndexer &model, char *old_bytes_reallocated)
2458
+ {
2459
+ char *out = old_bytes_reallocated;
2460
+ re_serialization_pipeline(model, out);
2461
+ }
2462
+
2463
+ template <class Model>
2464
+ void incremental_serialize_string(const Model &model, std::string &old_bytes)
2465
+ {
2466
+ size_t new_size = determine_serialized_size(model);
2467
+ if (old_bytes.size() > new_size)
2468
+ throw std::runtime_error("'old_bytes' is not a subset of 'model'.\n");
2469
+ if (!new_size)
2470
+ unexpected_error();
2471
+ old_bytes.resize(new_size);
2472
+ char *out = &old_bytes[0];
2473
+ re_serialization_pipeline(model, out);
2474
+ }
2475
+
2476
+ void incremental_serialize_IsoForest(const IsoForest &model, std::string &old_bytes)
2477
+ {
2478
+ incremental_serialize_string(model, old_bytes);
2479
+ }
2480
+
2481
+ void incremental_serialize_ExtIsoForest(const ExtIsoForest &model, std::string &old_bytes)
2482
+ {
2483
+ incremental_serialize_string(model, old_bytes);
2484
+ }
2485
+
2486
+ void incremental_serialize_Imputer(const Imputer &model, std::string &old_bytes)
2487
+ {
2488
+ incremental_serialize_string(model, old_bytes);
2489
+ }
2490
+
2491
+ void incremental_serialize_Indexer(const TreesIndexer &model, std::string &old_bytes)
2492
+ {
2493
+ incremental_serialize_string(model, old_bytes);
2494
+ }
2495
+
2496
+ template <class Model>
2497
+ std::string serialization_pipeline(const Model &model)
2498
+ {
2499
+ std::string serialized;
2500
+ serialized.resize(get_size_model(model));
2501
+ char *ptr = &serialized[0];
2502
+ serialization_pipeline(model, ptr);
2503
+ return serialized;
2504
+ }
2505
+
2506
+ template <class Model>
2507
+ void serialization_pipeline_ToFile(const Model &model, const char *fname)
2508
+ {
2509
+ FileHandle f(fname, "wb");
2510
+ serialization_pipeline(model, f.handle);
2511
+ }
2512
+
2513
+ #ifdef WCHAR_T_FUNS
2514
+ template <class Model>
2515
+ void serialization_pipeline_ToFile(const Model &model, const wchar_t *fname)
2516
+ {
2517
+ WFileHandle f(fname, L"wb");
2518
+ serialization_pipeline(model, f.handle);
2519
+ }
2520
+ #endif
2521
+
2522
+ size_t determine_serialized_size(const IsoForest &model) noexcept
2523
+ {
2524
+ return determine_serialized_size<IsoForest>(model);
2525
+ }
2526
+
2527
+ size_t determine_serialized_size(const ExtIsoForest &model) noexcept
2528
+ {
2529
+ return determine_serialized_size<ExtIsoForest>(model);
2530
+ }
2531
+
2532
+ size_t determine_serialized_size(const Imputer &model) noexcept
2533
+ {
2534
+ return determine_serialized_size<Imputer>(model);
2535
+ }
2536
+
2537
+ size_t determine_serialized_size(const TreesIndexer &model) noexcept
2538
+ {
2539
+ return determine_serialized_size<TreesIndexer>(model);
2540
+ }
2541
+
2542
+ void serialize_IsoForest(const IsoForest &model, char *out)
2543
+ {
2544
+ serialization_pipeline(model, out);
2545
+ }
2546
+
2547
+ void serialize_IsoForest(const IsoForest &model, FILE *out)
2548
+ {
2549
+ serialization_pipeline(model, out);
2550
+ }
2551
+
2552
+ void serialize_IsoForest(const IsoForest &model, std::ostream &out)
2553
+ {
2554
+ serialization_pipeline(model, out);
2555
+ }
2556
+
2557
+ std::string serialize_IsoForest(const IsoForest &model)
2558
+ {
2559
+ return serialization_pipeline(model);
2560
+ }
2561
+
2562
+ void serialize_IsoForest_ToFile(const IsoForest &model, const char *fname)
2563
+ {
2564
+ serialization_pipeline_ToFile(model, fname);
2565
+ }
2566
+
2567
+ #ifdef WCHAR_T_FUNS
2568
+ void serialize_IsoForest_ToFile(const IsoForest &model, const wchar_t *fname)
2569
+ {
2570
+ serialization_pipeline_ToFile(model, fname);
2571
+ }
2572
+ #endif
2573
+
2574
+ void deserialize_IsoForest(IsoForest &model, const char *in)
2575
+ {
2576
+ deserialization_pipeline(model, in);
2577
+ }
2578
+
2579
+ void deserialize_IsoForest(IsoForest &model, FILE *in)
2580
+ {
2581
+ deserialization_pipeline(model, in);
2582
+ }
2583
+
2584
+ void deserialize_IsoForest(IsoForest &model, std::istream &in)
2585
+ {
2586
+ deserialization_pipeline(model, in);
2587
+ }
2588
+
2589
+ void deserialize_IsoForest(IsoForest &model, const std::string &in)
2590
+ {
2591
+ if (!in.size())
2592
+ throw std::runtime_error("Invalid input model to deserialize.");
2593
+ const char *in_ = &in[0];
2594
+ deserialization_pipeline(model, in_);
2595
+ }
2596
+
2597
+ void deserialize_IsoForest_FromFile(IsoForest &model, const char *fname)
2598
+ {
2599
+ FileHandle f(fname, "rb");
2600
+ deserialize_IsoForest(model, f.handle);
2601
+ }
2602
+
2603
+ #ifdef WCHAR_T_FUNS
2604
+ void deserialize_IsoForest_FromFile(IsoForest &model, const wchar_t *fname)
2605
+ {
2606
+ WFileHandle f(fname, L"rb");
2607
+ deserialize_IsoForest(model, f.handle);
2608
+ }
2609
+ #endif
2610
+
2611
+ void serialize_ExtIsoForest(const ExtIsoForest &model, char *out)
2612
+ {
2613
+ serialization_pipeline(model, out);
2614
+ }
2615
+
2616
+ void serialize_ExtIsoForest(const ExtIsoForest &model, FILE *out)
2617
+ {
2618
+ serialization_pipeline(model, out);
2619
+ }
2620
+
2621
+ void serialize_ExtIsoForest(const ExtIsoForest &model, std::ostream &out)
2622
+ {
2623
+ serialization_pipeline(model, out);
2624
+ }
2625
+
2626
+ std::string serialize_ExtIsoForest(const ExtIsoForest &model)
2627
+ {
2628
+ return serialization_pipeline(model);
2629
+ }
2630
+
2631
+ void serialize_ExtIsoForest_ToFile(const ExtIsoForest &model, const char *fname)
2632
+ {
2633
+ serialization_pipeline_ToFile(model, fname);
2634
+ }
2635
+
2636
+ #ifdef WCHAR_T_FUNS
2637
+ void serialize_ExtIsoForest_ToFile(const ExtIsoForest &model, const wchar_t *fname)
2638
+ {
2639
+ serialization_pipeline_ToFile(model, fname);
2640
+ }
2641
+ #endif
2642
+
2643
+ void deserialize_ExtIsoForest(ExtIsoForest &model, const char *in)
2644
+ {
2645
+ deserialization_pipeline(model, in);
2646
+ }
2647
+
2648
+ void deserialize_ExtIsoForest(ExtIsoForest &model, FILE *in)
2649
+ {
2650
+ deserialization_pipeline(model, in);
2651
+ }
2652
+
2653
+ void deserialize_ExtIsoForest(ExtIsoForest &model, std::istream &in)
2654
+ {
2655
+ deserialization_pipeline(model, in);
2656
+ }
2657
+
2658
+ void deserialize_ExtIsoForest(ExtIsoForest &model, const std::string &in)
2659
+ {
2660
+ if (!in.size())
2661
+ throw std::runtime_error("Invalid input model to deserialize.");
2662
+ const char *in_ = &in[0];
2663
+ deserialization_pipeline(model, in_);
2664
+ }
2665
+
2666
+ void deserialize_ExtIsoForest_FromFile(ExtIsoForest &model, const char *fname)
2667
+ {
2668
+ FileHandle f(fname, "rb");
2669
+ deserialize_ExtIsoForest(model, f.handle);
2670
+ }
2671
+
2672
+ #ifdef WCHAR_T_FUNS
2673
+ void deserialize_ExtIsoForest_FromFile(ExtIsoForest &model, const wchar_t *fname)
2674
+ {
2675
+ WFileHandle f(fname, L"rb");
2676
+ deserialize_ExtIsoForest(model, f.handle);
2677
+ }
2678
+ #endif
2679
+
2680
+ void serialize_Imputer(const Imputer &model, char *out)
2681
+ {
2682
+ serialization_pipeline(model, out);
2683
+ }
2684
+
2685
+ void serialize_Imputer(const Imputer &model, FILE *out)
2686
+ {
2687
+ serialization_pipeline(model, out);
2688
+ }
2689
+
2690
+ void serialize_Imputer(const Imputer &model, std::ostream &out)
2691
+ {
2692
+ serialization_pipeline(model, out);
2693
+ }
2694
+
2695
+ std::string serialize_Imputer(const Imputer &model)
2696
+ {
2697
+ return serialization_pipeline(model);
2698
+ }
2699
+
2700
+ void serialize_Imputer_ToFile(const Imputer &model, const char *fname)
2701
+ {
2702
+ serialization_pipeline_ToFile(model, fname);
2703
+ }
2704
+
2705
+ #ifdef WCHAR_T_FUNS
2706
+ void serialize_Imputer_ToFile(const Imputer &model, const wchar_t *fname)
2707
+ {
2708
+ serialization_pipeline_ToFile(model, fname);
2709
+ }
2710
+ #endif
2711
+
2712
+ void deserialize_Imputer(Imputer &model, const char *in)
2713
+ {
2714
+ deserialization_pipeline(model, in);
2715
+ }
2716
+
2717
+ void deserialize_Imputer(Imputer &model, FILE *in)
2718
+ {
2719
+ deserialization_pipeline(model, in);
2720
+ }
2721
+
2722
+ void deserialize_Imputer(Imputer &model, std::istream &in)
2723
+ {
2724
+ deserialization_pipeline(model, in);
2725
+ }
2726
+
2727
+ void deserialize_Imputer(Imputer &model, const std::string &in)
2728
+ {
2729
+ if (!in.size())
2730
+ throw std::runtime_error("Invalid input model to deserialize.");
2731
+ const char *in_ = &in[0];
2732
+ deserialization_pipeline(model, in_);
2733
+ }
2734
+
2735
+ void deserialize_Imputer_FromFile(Imputer &model, const char *fname)
2736
+ {
2737
+ FileHandle f(fname, "rb");
2738
+ deserialize_Imputer(model, f.handle);
2739
+ }
2740
+
2741
+ #ifdef WCHAR_T_FUNS
2742
+ void deserialize_Imputer_FromFile(Imputer &model, const wchar_t *fname)
2743
+ {
2744
+ WFileHandle f(fname, L"rb");
2745
+ deserialize_Imputer(model, f.handle);
2746
+ }
2747
+ #endif
2748
+
2749
+ void serialize_Indexer(const TreesIndexer &model, char *out)
2750
+ {
2751
+ serialization_pipeline(model, out);
2752
+ }
2753
+
2754
+ void serialize_Indexer(const TreesIndexer &model, FILE *out)
2755
+ {
2756
+ serialization_pipeline(model, out);
2757
+ }
2758
+
2759
+ void serialize_Indexer(const TreesIndexer &model, std::ostream &out)
2760
+ {
2761
+ serialization_pipeline(model, out);
2762
+ }
2763
+
2764
+ std::string serialize_Indexer(const TreesIndexer &model)
2765
+ {
2766
+ return serialization_pipeline(model);
2767
+ }
2768
+
2769
+ void serialize_Indexer_ToFile(const TreesIndexer &model, const char *fname)
2770
+ {
2771
+ serialization_pipeline_ToFile(model, fname);
2772
+ }
2773
+
2774
+ #ifdef WCHAR_T_FUNS
2775
+ void serialize_Indexer_ToFile(const TreesIndexer &model, const wchar_t *fname)
2776
+ {
2777
+ serialization_pipeline_ToFile(model, fname);
2778
+ }
2779
+ #endif
2780
+
2781
+ void deserialize_Indexer(TreesIndexer &model, const char *in)
2782
+ {
2783
+ deserialization_pipeline(model, in);
2784
+ }
2785
+
2786
+ void deserialize_Indexer(TreesIndexer &model, FILE *in)
2787
+ {
2788
+ deserialization_pipeline(model, in);
2789
+ }
2790
+
2791
+ void deserialize_Indexer(TreesIndexer &model, std::istream &in)
2792
+ {
2793
+ deserialization_pipeline(model, in);
2794
+ }
2795
+
2796
+ void deserialize_Indexer(TreesIndexer &model, const std::string &in)
2797
+ {
2798
+ if (!in.size())
2799
+ throw std::runtime_error("Invalid input model to deserialize.");
2800
+ const char *in_ = &in[0];
2801
+ deserialization_pipeline(model, in_);
2802
+ }
2803
+
2804
+ void deserialize_Indexer_FromFile(TreesIndexer &model, const char *fname)
2805
+ {
2806
+ FileHandle f(fname, "rb");
2807
+ deserialize_Indexer(model, f.handle);
2808
+ }
2809
+
2810
+ #ifdef WCHAR_T_FUNS
2811
+ void deserialize_Indexer_FromFile(TreesIndexer &model, const wchar_t *fname)
2812
+ {
2813
+ WFileHandle f(fname, L"rb");
2814
+ deserialize_Indexer(model, f.handle);
2815
+ }
2816
+ #endif
2817
+
2818
+ /* Shorthands to use in templates (will be used in R) */
2819
+ void serialize_isotree(const IsoForest &model, char *out)
2820
+ {
2821
+ serialize_IsoForest(model, out);
2822
+ }
2823
+
2824
+ void serialize_isotree(const ExtIsoForest &model, char *out)
2825
+ {
2826
+ serialize_ExtIsoForest(model, out);
2827
+ }
2828
+
2829
+ void serialize_isotree(const Imputer &model, char *out)
2830
+ {
2831
+ serialize_Imputer(model, out);
2832
+ }
2833
+
2834
+ void serialize_isotree(const TreesIndexer &model, char *out)
2835
+ {
2836
+ serialize_Indexer(model, out);
2837
+ }
2838
+
2839
+ void deserialize_isotree(IsoForest &model, const char *in)
2840
+ {
2841
+ deserialize_IsoForest(model, in);
2842
+ }
2843
+
2844
+ void deserialize_isotree(ExtIsoForest &model, const char *in)
2845
+ {
2846
+ deserialize_ExtIsoForest(model, in);
2847
+ }
2848
+
2849
+ void deserialize_isotree(Imputer &model, const char *in)
2850
+ {
2851
+ deserialize_Imputer(model, in);
2852
+ }
2853
+
2854
+ void deserialize_isotree(TreesIndexer &model, const char *in)
2855
+ {
2856
+ deserialize_Indexer(model, in);
2857
+ }
2858
+
2859
+ void incremental_serialize_isotree(const IsoForest &model, char *old_bytes_reallocated)
2860
+ {
2861
+ incremental_serialize_IsoForest(model, old_bytes_reallocated);
2862
+ }
2863
+
2864
+ void incremental_serialize_isotree(const ExtIsoForest &model, char *old_bytes_reallocated)
2865
+ {
2866
+ incremental_serialize_ExtIsoForest(model, old_bytes_reallocated);
2867
+ }
2868
+
2869
+ void incremental_serialize_isotree(const Imputer &model, char *old_bytes_reallocated)
2870
+ {
2871
+ incremental_serialize_Imputer(model, old_bytes_reallocated);
2872
+ }
2873
+
2874
+ void incremental_serialize_isotree(const TreesIndexer &model, char *old_bytes_reallocated)
2875
+ {
2876
+ incremental_serialize_Indexer(model, old_bytes_reallocated);
2877
+ }
2878
+
2879
+ template <class itype>
2880
+ void read_bytes_size_t(void *ptr, const size_t n_els, itype &in, const PlatformSize saved_size_t, const bool has_same_endianness)
2881
+ {
2882
+ std::vector<char> buffer;
2883
+ switch(saved_size_t)
2884
+ {
2885
+ case Is32Bit:
2886
+ {
2887
+ read_bytes<size_t, uint32_t>(ptr, n_els, in, buffer, !has_same_endianness);
2888
+ break;
2889
+ }
2890
+
2891
+ case Is64Bit:
2892
+ {
2893
+ read_bytes<size_t, uint64_t>(ptr, n_els, in, buffer, !has_same_endianness);
2894
+ break;
2895
+ }
2896
+
2897
+ default:
2898
+ {
2899
+ unexpected_error();
2900
+ }
2901
+ }
2902
+ }
2903
+
2904
+ template <class itype>
2905
+ void inspect_serialized_object
2906
+ (
2907
+ itype &serialized_bytes,
2908
+ bool &is_isotree_model,
2909
+ bool &is_compatible,
2910
+ bool &has_combined_objects,
2911
+ bool &has_IsoForest,
2912
+ bool &has_ExtIsoForest,
2913
+ bool &has_Imputer,
2914
+ bool &has_Indexer,
2915
+ bool &has_metadata,
2916
+ size_t &size_metadata,
2917
+ bool &has_same_int_size,
2918
+ bool &has_same_size_t_size,
2919
+ bool &has_same_endianness,
2920
+ bool &lacks_range_penalty,
2921
+ bool &lacks_scoring_metric
2922
+ )
2923
+ {
2924
+ auto saved_position = set_return_position(serialized_bytes);
2925
+
2926
+ is_isotree_model = false;
2927
+ is_compatible = false;
2928
+ has_combined_objects = false;
2929
+ has_IsoForest = false;
2930
+ has_ExtIsoForest = false;
2931
+ has_Imputer = false;
2932
+ has_Indexer = false;
2933
+ has_metadata = false;
2934
+ size_metadata = 0;
2935
+
2936
+ bool lacks_indexer = false;
2937
+
2938
+ bool has_same_double = false;
2939
+ bool has_incomplete_watermark = false;
2940
+ PlatformSize saved_int_t;
2941
+ PlatformSize saved_size_t;
2942
+ PlatformEndianness saved_endian;
2943
+ check_setup_info(
2944
+ serialized_bytes,
2945
+ is_isotree_model,
2946
+ has_incomplete_watermark,
2947
+ has_same_double,
2948
+ has_same_int_size,
2949
+ has_same_size_t_size,
2950
+ has_same_endianness,
2951
+ saved_int_t,
2952
+ saved_size_t,
2953
+ saved_endian,
2954
+ is_compatible,
2955
+ lacks_range_penalty,
2956
+ lacks_scoring_metric,
2957
+ lacks_indexer
2958
+ );
2959
+
2960
+ if (!is_isotree_model || !is_compatible)
2961
+ return;
2962
+
2963
+ uint8_t model_type;
2964
+ read_bytes<uint8_t>((void*)&model_type, (size_t)1, serialized_bytes);
2965
+
2966
+ switch(model_type)
2967
+ {
2968
+ case IsoForestModel:
2969
+ {
2970
+ has_IsoForest = true;
2971
+ break;
2972
+ }
2973
+
2974
+ case ExtIsoForestModel:
2975
+ {
2976
+ has_ExtIsoForest = true;
2977
+ break;
2978
+ }
2979
+
2980
+ case ImputerModel:
2981
+ {
2982
+ has_Imputer = true;
2983
+ break;
2984
+ }
2985
+
2986
+ case IndexerModel:
2987
+ {
2988
+ has_Indexer = true;
2989
+ }
2990
+
2991
+ case AllObjectsCombined:
2992
+ {
2993
+ has_combined_objects = true;
2994
+ break;
2995
+ }
2996
+
2997
+ default:
2998
+ {
2999
+
3000
+ }
3001
+ }
3002
+
3003
+ if (has_combined_objects)
3004
+ {
3005
+ size_t size_model[4] = {0};
3006
+
3007
+ read_bytes<uint8_t>((void*)&model_type, (size_t)1, serialized_bytes);
3008
+ switch(model_type)
3009
+ {
3010
+ case HasSingleVarModelNext:
3011
+ {
3012
+ has_IsoForest = true;
3013
+ break;
3014
+ }
3015
+ case HasExtModelNext:
3016
+ {
3017
+ has_ExtIsoForest = true;
3018
+ break;
3019
+ }
3020
+ case HasSingleVarModelPlusImputerNext:
3021
+ {
3022
+ has_IsoForest = true;
3023
+ has_Imputer = true;
3024
+ break;
3025
+ }
3026
+ case HasSingleVarModelPlusIndexerNext:
3027
+ {
3028
+ has_IsoForest = true;
3029
+ has_Indexer = true;
3030
+ break;
3031
+ }
3032
+ case HasSingleVarModelPlusImputerPlusIndexerNext:
3033
+ {
3034
+ has_IsoForest = true;
3035
+ has_Imputer = true;
3036
+ has_Indexer = true;
3037
+ break;
3038
+ }
3039
+ case HasExtModelPlusImputerNext:
3040
+ {
3041
+ has_ExtIsoForest = true;
3042
+ has_Imputer = true;
3043
+ break;
3044
+ }
3045
+ case HasExtModelPlusIndexerNext:
3046
+ {
3047
+ has_ExtIsoForest = true;
3048
+ has_Indexer = true;
3049
+ break;
3050
+ }
3051
+ case HasExtModelPlusImputerPlusIndexerNext:
3052
+ {
3053
+ has_ExtIsoForest = true;
3054
+ has_Imputer = true;
3055
+ has_Indexer = true;
3056
+ break;
3057
+ }
3058
+ case HasSingleVarModelPlusMetadataNext:
3059
+ {
3060
+ has_IsoForest = true;
3061
+ has_metadata = true;
3062
+ read_bytes_size_t(size_model, (size_t)(3+!lacks_indexer), serialized_bytes, saved_size_t, has_same_endianness);
3063
+ size_metadata = size_model[2+!lacks_indexer];
3064
+ break;
3065
+ }
3066
+ case HasSingleVarModelPlusIndexerPlusMetadataNext:
3067
+ {
3068
+ has_IsoForest = true;
3069
+ has_Indexer = true;
3070
+ has_metadata = true;
3071
+ read_bytes_size_t(size_model, (size_t)4, serialized_bytes, saved_size_t, has_same_endianness);
3072
+ size_metadata = size_model[3];
3073
+ break;
3074
+ }
3075
+ case HasExtModelPlusMetadataNext:
3076
+ {
3077
+ has_ExtIsoForest = true;
3078
+ has_metadata = true;
3079
+ read_bytes_size_t(size_model, (size_t)(3+!lacks_indexer), serialized_bytes, saved_size_t, has_same_endianness);
3080
+ size_metadata = size_model[2+!lacks_indexer];
3081
+ break;
3082
+ }
3083
+ case HasExtModelPlusIndexerPlusMetadataNext:
3084
+ {
3085
+ has_ExtIsoForest = true;
3086
+ has_Indexer = true;
3087
+ has_metadata = true;
3088
+ read_bytes_size_t(size_model, (size_t)4, serialized_bytes, saved_size_t, has_same_endianness);
3089
+ size_metadata = size_model[3];
3090
+ break;
3091
+ break;
3092
+ }
3093
+ case HasSingleVarModelPlusImputerPlusMetadataNext:
3094
+ {
3095
+ has_IsoForest = true;
3096
+ has_Imputer = true;
3097
+ has_metadata = true;
3098
+ read_bytes_size_t(size_model, (size_t)(3+!lacks_indexer), serialized_bytes, saved_size_t, has_same_endianness);
3099
+ size_metadata = size_model[2+!lacks_indexer];
3100
+ break;
3101
+ }
3102
+ case HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext:
3103
+ {
3104
+ has_IsoForest = true;
3105
+ has_Imputer = true;
3106
+ has_Indexer = true;
3107
+ has_metadata = true;
3108
+ read_bytes_size_t(size_model, (size_t)4, serialized_bytes, saved_size_t, has_same_endianness);
3109
+ size_metadata = size_model[3];
3110
+ break;
3111
+ }
3112
+ case HasExtModelPlusImputerPlusMetadataNext:
3113
+ {
3114
+ has_ExtIsoForest = true;
3115
+ has_Imputer = true;
3116
+ has_metadata = true;
3117
+ read_bytes_size_t(size_model, (size_t)(3+!lacks_indexer), serialized_bytes, saved_size_t, has_same_endianness);
3118
+ size_metadata = size_model[2+!lacks_indexer];
3119
+ break;
3120
+ }
3121
+ case HasExtModelPlusImputerPlusIndexerPlusMetadataNext:
3122
+ {
3123
+ has_ExtIsoForest = true;
3124
+ has_Imputer = true;
3125
+ has_Indexer = true;
3126
+ has_metadata = true;
3127
+ read_bytes_size_t(size_model, (size_t)4, serialized_bytes, saved_size_t, has_same_endianness);
3128
+ size_metadata = size_model[3];
3129
+ break;
3130
+ }
3131
+
3132
+ default:
3133
+ {
3134
+
3135
+ }
3136
+ }
3137
+ }
3138
+
3139
+ return_to_position(serialized_bytes, saved_position);
3140
+ }
3141
+
3142
+ template <class itype>
3143
+ void inspect_serialized_object
3144
+ (
3145
+ itype &serialized_bytes,
3146
+ bool &is_isotree_model,
3147
+ bool &is_compatible,
3148
+ bool &has_combined_objects,
3149
+ bool &has_IsoForest,
3150
+ bool &has_ExtIsoForest,
3151
+ bool &has_Imputer,
3152
+ bool &has_Indexer,
3153
+ bool &has_metadata,
3154
+ size_t &size_metadata
3155
+ )
3156
+ {
3157
+ bool ignored[5];
3158
+ inspect_serialized_object(
3159
+ serialized_bytes,
3160
+ is_isotree_model,
3161
+ is_compatible,
3162
+ has_combined_objects,
3163
+ has_IsoForest,
3164
+ has_ExtIsoForest,
3165
+ has_Imputer,
3166
+ has_Indexer,
3167
+ has_metadata,
3168
+ size_metadata,
3169
+ ignored[0],
3170
+ ignored[1],
3171
+ ignored[2],
3172
+ ignored[3],
3173
+ ignored[4]
3174
+ );
3175
+ }
3176
+
3177
+ void inspect_serialized_object
3178
+ (
3179
+ const char *serialized_bytes,
3180
+ bool &is_isotree_model,
3181
+ bool &is_compatible,
3182
+ bool &has_combined_objects,
3183
+ bool &has_IsoForest,
3184
+ bool &has_ExtIsoForest,
3185
+ bool &has_Imputer,
3186
+ bool &has_Indexer,
3187
+ bool &has_metadata,
3188
+ size_t &size_metadata
3189
+ )
3190
+ {
3191
+ const char *in = serialized_bytes;
3192
+ inspect_serialized_object<const char*>(
3193
+ in,
3194
+ is_isotree_model,
3195
+ is_compatible,
3196
+ has_combined_objects,
3197
+ has_IsoForest,
3198
+ has_ExtIsoForest,
3199
+ has_Imputer,
3200
+ has_Indexer,
3201
+ has_metadata,
3202
+ size_metadata
3203
+ );
3204
+ }
3205
+
3206
+ void inspect_serialized_object
3207
+ (
3208
+ const std::string &serialized_bytes,
3209
+ bool &is_isotree_model,
3210
+ bool &is_compatible,
3211
+ bool &has_combined_objects,
3212
+ bool &has_IsoForest,
3213
+ bool &has_ExtIsoForest,
3214
+ bool &has_Imputer,
3215
+ bool &has_Indexer,
3216
+ bool &has_metadata,
3217
+ size_t &size_metadata
3218
+ )
3219
+ {
3220
+ if (!serialized_bytes.size()) {
3221
+ is_isotree_model = false;
3222
+ is_compatible = false;
3223
+ has_IsoForest = false;
3224
+ has_ExtIsoForest = false;
3225
+ has_Imputer = false;
3226
+ has_Indexer = false;
3227
+ has_metadata = false;
3228
+ return;
3229
+ }
3230
+ const char *in = &serialized_bytes[0];
3231
+ inspect_serialized_object<const char*>(
3232
+ in,
3233
+ is_isotree_model,
3234
+ is_compatible,
3235
+ has_combined_objects,
3236
+ has_IsoForest,
3237
+ has_ExtIsoForest,
3238
+ has_Imputer,
3239
+ has_Indexer,
3240
+ has_metadata,
3241
+ size_metadata
3242
+ );
3243
+ }
3244
+
3245
+ void inspect_serialized_object
3246
+ (
3247
+ FILE *serialized_bytes,
3248
+ bool &is_isotree_model,
3249
+ bool &is_compatible,
3250
+ bool &has_combined_objects,
3251
+ bool &has_IsoForest,
3252
+ bool &has_ExtIsoForest,
3253
+ bool &has_Imputer,
3254
+ bool &has_Indexer,
3255
+ bool &has_metadata,
3256
+ size_t &size_metadata
3257
+ )
3258
+ {
3259
+ FILE *in = serialized_bytes;
3260
+ inspect_serialized_object<FILE*>(
3261
+ in,
3262
+ is_isotree_model,
3263
+ is_compatible,
3264
+ has_combined_objects,
3265
+ has_IsoForest,
3266
+ has_ExtIsoForest,
3267
+ has_Imputer,
3268
+ has_Indexer,
3269
+ has_metadata,
3270
+ size_metadata
3271
+ );
3272
+ }
3273
+
3274
+ void inspect_serialized_object
3275
+ (
3276
+ std::istream &serialized_bytes,
3277
+ bool &is_isotree_model,
3278
+ bool &is_compatible,
3279
+ bool &has_combined_objects,
3280
+ bool &has_IsoForest,
3281
+ bool &has_ExtIsoForest,
3282
+ bool &has_Imputer,
3283
+ bool &has_Indexer,
3284
+ bool &has_metadata,
3285
+ size_t &size_metadata
3286
+ )
3287
+ {
3288
+ inspect_serialized_object<std::istream>(
3289
+ serialized_bytes,
3290
+ is_isotree_model,
3291
+ is_compatible,
3292
+ has_combined_objects,
3293
+ has_IsoForest,
3294
+ has_ExtIsoForest,
3295
+ has_Imputer,
3296
+ has_Indexer,
3297
+ has_metadata,
3298
+ size_metadata
3299
+ );
3300
+ }
3301
+
3302
+ template <class Model>
3303
+ bool prev_cols_match(const Model &model, const char *serialized_bytes)
3304
+ {
3305
+ return true;
3306
+ }
3307
+
3308
+ bool prev_cols_match(const Imputer &model, const char *serialized_bytes)
3309
+ {
3310
+ size_t prev[6];
3311
+ read_bytes<size_t>((void*)prev, (size_t)6, serialized_bytes);
3312
+ if (prev[0] != model.ncols_numeric ||
3313
+ prev[1] != model.ncols_categ ||
3314
+ prev[2] != model.ncat.size() ||
3315
+ prev[4] != model.col_means.size() ||
3316
+ prev[5] != model.col_modes.size())
3317
+ {
3318
+ return false;
3319
+ }
3320
+
3321
+ return true;
3322
+ }
3323
+
3324
+ template <class Model>
3325
+ bool check_can_undergo_incremental_serialization(const Model &model, const char *serialized_bytes)
3326
+ {
3327
+ const char *start = serialized_bytes;
3328
+ size_t curr_ntrees = get_ntrees(model);
3329
+
3330
+ bool is_isotree_model;
3331
+ bool is_compatible;
3332
+ bool has_combined_objects;
3333
+ bool has_IsoForest;
3334
+ bool has_ExtIsoForest;
3335
+ bool has_Imputer;
3336
+ bool has_Indexer;
3337
+ bool has_metadata;
3338
+ size_t size_metadata;
3339
+ bool has_same_int_size;
3340
+ bool has_same_size_t_size;
3341
+ bool has_same_endianness;
3342
+ bool lacks_range_penalty;
3343
+ bool lacks_scoring_metric;
3344
+
3345
+ inspect_serialized_object(
3346
+ serialized_bytes,
3347
+ is_isotree_model,
3348
+ is_compatible,
3349
+ has_combined_objects,
3350
+ has_IsoForest,
3351
+ has_ExtIsoForest,
3352
+ has_Imputer,
3353
+ has_Indexer,
3354
+ has_metadata,
3355
+ size_metadata,
3356
+ has_same_int_size,
3357
+ has_same_size_t_size,
3358
+ has_same_endianness,
3359
+ lacks_range_penalty,
3360
+ lacks_scoring_metric
3361
+ );
3362
+
3363
+ if (!is_isotree_model || !is_compatible || has_combined_objects ||
3364
+ !has_same_int_size || !has_same_size_t_size || !has_same_endianness ||
3365
+ lacks_range_penalty || lacks_scoring_metric)
3366
+ return false;
3367
+
3368
+ if (std::is_same<Model, IsoForest>::value) {
3369
+ if (!has_IsoForest || has_ExtIsoForest || has_Imputer || has_Indexer)
3370
+ return false;
3371
+ }
3372
+
3373
+ else if (std::is_same<Model, ExtIsoForest>::value) {
3374
+ if (has_IsoForest || !has_ExtIsoForest || has_Imputer || has_Indexer)
3375
+ return false;
3376
+ }
3377
+
3378
+ else if (std::is_same<Model, Imputer>::value) {
3379
+ if (has_IsoForest || has_ExtIsoForest || !has_Imputer || has_Indexer)
3380
+ return false;
3381
+ }
3382
+
3383
+ else if (std::is_same<Model, TreesIndexer>::value) {
3384
+ if (has_IsoForest || has_ExtIsoForest || has_Imputer || !has_Indexer)
3385
+ return false;
3386
+ }
3387
+
3388
+ else {
3389
+ assert(0);
3390
+ }
3391
+
3392
+ start += get_size_setup_info();
3393
+ start += sizeof(uint8_t);
3394
+ start += sizeof(size_t);
3395
+
3396
+ if (std::is_same<Model, IsoForest>::value) {
3397
+ start += sizeof(uint8_t) * 4;
3398
+ start += sizeof(double) * 2;
3399
+ start += sizeof(size_t);
3400
+ }
3401
+
3402
+ else if (std::is_same<Model, ExtIsoForest>::value) {
3403
+ start += sizeof(uint8_t) * 4;
3404
+ start += sizeof(double) * 2;
3405
+ start += sizeof(size_t);
3406
+ }
3407
+
3408
+ else if (std::is_same<Model, Imputer>::value) {
3409
+ if (!prev_cols_match(model, start))
3410
+ return false;
3411
+ start += sizeof(size_t) * 3;
3412
+ }
3413
+
3414
+ else if (std::is_same<Model, TreesIndexer>::value) {
3415
+ /* Nothing is required here */
3416
+ }
3417
+
3418
+ else {
3419
+ assert(0);
3420
+ }
3421
+
3422
+ size_t old_ntrees;
3423
+ memcpy(&old_ntrees, start, sizeof(size_t));
3424
+ if (old_ntrees > curr_ntrees)
3425
+ return false;
3426
+
3427
+ return true;
3428
+ }
3429
+
3430
+ bool check_can_undergo_incremental_serialization(const IsoForest &model, const char *serialized_bytes)
3431
+ {
3432
+ return check_can_undergo_incremental_serialization<IsoForest>(model, serialized_bytes);
3433
+ }
3434
+
3435
+ bool check_can_undergo_incremental_serialization(const ExtIsoForest &model, const char *serialized_bytes)
3436
+ {
3437
+ return check_can_undergo_incremental_serialization<ExtIsoForest>(model, serialized_bytes);
3438
+ }
3439
+
3440
+ bool check_can_undergo_incremental_serialization(const Imputer &model, const char *serialized_bytes)
3441
+ {
3442
+ return check_can_undergo_incremental_serialization<Imputer>(model, serialized_bytes);
3443
+ }
3444
+
3445
+ bool check_can_undergo_incremental_serialization(const TreesIndexer &model, const char *serialized_bytes)
3446
+ {
3447
+ return check_can_undergo_incremental_serialization<TreesIndexer>(model, serialized_bytes);
3448
+ }
3449
+
3450
+ size_t determine_serialized_size_combined
3451
+ (
3452
+ const IsoForest *model,
3453
+ const ExtIsoForest *model_ext,
3454
+ const Imputer *imputer,
3455
+ const TreesIndexer *indexer,
3456
+ const size_t size_optional_metadata
3457
+ ) noexcept
3458
+ {
3459
+ size_t n_bytes = get_size_setup_info();
3460
+ n_bytes += 3 * sizeof(uint8_t);
3461
+ n_bytes += 5 * sizeof(size_t);
3462
+
3463
+ if (model != NULL)
3464
+ n_bytes += get_size_model(*model);
3465
+ else
3466
+ n_bytes += get_size_model(*model_ext);
3467
+ if (imputer != NULL)
3468
+ n_bytes += get_size_model(*imputer);
3469
+ if (indexer != NULL)
3470
+ n_bytes += get_size_model(*indexer);
3471
+
3472
+ n_bytes += get_size_ending_metadata();
3473
+ return n_bytes;
3474
+ }
3475
+
3476
+ template <class otype>
3477
+ void serialize_combined
3478
+ (
3479
+ const IsoForest *model,
3480
+ const ExtIsoForest *model_ext,
3481
+ const Imputer *imputer,
3482
+ const TreesIndexer *indexer,
3483
+ const char *optional_metadata,
3484
+ const size_t size_optional_metadata,
3485
+ otype &out
3486
+ )
3487
+ {
3488
+ SignalSwitcher ss = SignalSwitcher();
3489
+
3490
+ auto pos_watermark = set_return_position(out);
3491
+
3492
+ add_setup_info(out, false);
3493
+ uint8_t model_type = AllObjectsCombined;
3494
+ write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
3495
+
3496
+ if (model != NULL)
3497
+ {
3498
+
3499
+ if (!size_optional_metadata)
3500
+ {
3501
+ if (imputer == NULL) {
3502
+ if (indexer == NULL)
3503
+ model_type = HasSingleVarModelNext;
3504
+ else
3505
+ model_type = HasSingleVarModelPlusIndexerNext;
3506
+ }
3507
+ else {
3508
+ if (indexer == NULL)
3509
+ model_type = HasSingleVarModelPlusImputerNext;
3510
+ else
3511
+ model_type = HasSingleVarModelPlusImputerPlusIndexerNext;
3512
+ }
3513
+ }
3514
+
3515
+ else
3516
+ {
3517
+ if (imputer == NULL) {
3518
+ if (indexer == NULL)
3519
+ model_type = HasSingleVarModelPlusMetadataNext;
3520
+ else
3521
+ model_type = HasSingleVarModelPlusIndexerPlusMetadataNext;
3522
+ }
3523
+ else {
3524
+ if (indexer == NULL)
3525
+ model_type = HasSingleVarModelPlusImputerPlusMetadataNext;
3526
+ else
3527
+ model_type = HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext;
3528
+ }
3529
+ }
3530
+
3531
+ }
3532
+
3533
+ else if (model_ext != NULL)
3534
+ {
3535
+
3536
+ if (!size_optional_metadata)
3537
+ {
3538
+ if (imputer == NULL) {
3539
+ if (indexer == NULL)
3540
+ model_type = HasExtModelNext;
3541
+ else
3542
+ model_type = HasExtModelPlusIndexerNext;
3543
+ }
3544
+ else {
3545
+ if (indexer == NULL)
3546
+ model_type = HasExtModelPlusImputerNext;
3547
+ else
3548
+ model_type = HasExtModelPlusImputerPlusIndexerNext;
3549
+ }
3550
+ }
3551
+
3552
+ else
3553
+ {
3554
+ if (imputer == NULL) {
3555
+ if (indexer == NULL)
3556
+ model_type = HasExtModelPlusMetadataNext;
3557
+ else
3558
+ model_type = HasExtModelPlusIndexerPlusMetadataNext;
3559
+ }
3560
+ else {
3561
+ if (indexer == NULL)
3562
+ model_type = HasExtModelPlusImputerPlusMetadataNext;
3563
+ else
3564
+ model_type = HasExtModelPlusImputerPlusIndexerPlusMetadataNext;
3565
+ }
3566
+ }
3567
+ }
3568
+
3569
+ else {
3570
+ throw std::runtime_error("Must pass one of 'model' or 'model_ext'.\n");
3571
+ }
3572
+
3573
+ write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
3574
+
3575
+ size_t size_model;
3576
+ if (model != NULL)
3577
+ size_model = get_size_model(*model);
3578
+ else
3579
+ size_model = get_size_model(*model_ext);
3580
+ write_bytes<size_t>((void*)&size_model, (size_t)1, out);
3581
+
3582
+ if (imputer != NULL)
3583
+ size_model = get_size_model(*imputer);
3584
+ else
3585
+ size_model = 0;
3586
+ write_bytes<size_t>((void*)&size_model, (size_t)1, out);
3587
+
3588
+ if (indexer != NULL)
3589
+ size_model = get_size_model(*indexer);
3590
+ else
3591
+ size_model = 0;
3592
+ write_bytes<size_t>((void*)&size_model, (size_t)1, out);
3593
+
3594
+ write_bytes<size_t>((void*)&size_optional_metadata, (size_t)1, out);
3595
+
3596
+
3597
+ check_interrupt_switch(ss);
3598
+
3599
+ if (model != NULL)
3600
+ serialize_model(*model, out);
3601
+ else
3602
+ serialize_model(*model_ext, out);
3603
+
3604
+ if (imputer != NULL)
3605
+ serialize_model(*imputer, out);
3606
+
3607
+ if (indexer != NULL)
3608
+ serialize_model(*indexer, out);
3609
+
3610
+ if (size_optional_metadata)
3611
+ write_bytes<char>((void*)optional_metadata, size_optional_metadata, out);
3612
+
3613
+ check_interrupt_switch(ss);
3614
+
3615
+ uint8_t ending_type = (uint8_t)EndsHere;
3616
+ write_bytes<uint8_t>((void*)&ending_type, (size_t)1, out);
3617
+ size_t jump_ahead = 0;
3618
+ write_bytes<size_t>((void*)&jump_ahead, (size_t)1, out);
3619
+
3620
+ auto end_pos = set_return_position(out);
3621
+ return_to_position(out, pos_watermark);
3622
+ add_full_watermark(out);
3623
+ return_to_position(out, end_pos);
3624
+ }
3625
+
3626
+ void serialize_combined
3627
+ (
3628
+ const IsoForest *model,
3629
+ const ExtIsoForest *model_ext,
3630
+ const Imputer *imputer,
3631
+ const TreesIndexer *indexer,
3632
+ const char *optional_metadata,
3633
+ const size_t size_optional_metadata,
3634
+ char *out
3635
+ )
3636
+ {
3637
+ serialize_combined<char*>(
3638
+ model,
3639
+ model_ext,
3640
+ imputer,
3641
+ indexer,
3642
+ optional_metadata,
3643
+ size_optional_metadata,
3644
+ out
3645
+ );
3646
+ }
3647
+
3648
+ void serialize_combined
3649
+ (
3650
+ const IsoForest *model,
3651
+ const ExtIsoForest *model_ext,
3652
+ const Imputer *imputer,
3653
+ const TreesIndexer *indexer,
3654
+ const char *optional_metadata,
3655
+ const size_t size_optional_metadata,
3656
+ FILE *out
3657
+ )
3658
+ {
3659
+ serialize_combined<FILE*>(
3660
+ model,
3661
+ model_ext,
3662
+ imputer,
3663
+ indexer,
3664
+ optional_metadata,
3665
+ size_optional_metadata,
3666
+ out
3667
+ );
3668
+ }
3669
+
3670
+ void serialize_combined
3671
+ (
3672
+ const IsoForest *model,
3673
+ const ExtIsoForest *model_ext,
3674
+ const Imputer *imputer,
3675
+ const TreesIndexer *indexer,
3676
+ const char *optional_metadata,
3677
+ const size_t size_optional_metadata,
3678
+ std::ostream &out
3679
+ )
3680
+ {
3681
+ serialize_combined<std::ostream>(
3682
+ model,
3683
+ model_ext,
3684
+ imputer,
3685
+ indexer,
3686
+ optional_metadata,
3687
+ size_optional_metadata,
3688
+ out
3689
+ );
3690
+ }
3691
+
3692
+ std::string serialize_combined
3693
+ (
3694
+ const IsoForest *model,
3695
+ const ExtIsoForest *model_ext,
3696
+ const Imputer *imputer,
3697
+ const TreesIndexer *indexer,
3698
+ const char *optional_metadata,
3699
+ const size_t size_optional_metadata
3700
+ )
3701
+ {
3702
+ std::string serialized;
3703
+ serialized.resize(determine_serialized_size_combined(model, model_ext, imputer, indexer, size_optional_metadata));
3704
+ char *ptr = &serialized[0];
3705
+ serialize_combined(model, model_ext, imputer, indexer, optional_metadata, size_optional_metadata, ptr);
3706
+ return serialized;
3707
+ }
3708
+
3709
+ size_t determine_serialized_size_combined
3710
+ (
3711
+ const char *serialized_model,
3712
+ const char *serialized_model_ext,
3713
+ const char *serialized_imputer,
3714
+ const char *serialized_indexer,
3715
+ const size_t size_optional_metadata
3716
+ ) noexcept
3717
+ {
3718
+ size_t n_bytes = get_size_setup_info();
3719
+ n_bytes += 3 * sizeof(uint8_t);
3720
+ n_bytes += 5 * sizeof(size_t);
3721
+
3722
+ size_t model_size;
3723
+
3724
+ if (serialized_model != NULL)
3725
+ memcpy(&model_size, serialized_model + get_size_setup_info() + sizeof(uint8_t), sizeof(size_t));
3726
+ else
3727
+ memcpy(&model_size, serialized_model_ext + get_size_setup_info() + sizeof(uint8_t), sizeof(size_t));
3728
+ n_bytes += model_size;
3729
+ if (serialized_imputer != NULL) {
3730
+ memcpy(&model_size, serialized_imputer + get_size_setup_info() + sizeof(uint8_t), sizeof(size_t));
3731
+ n_bytes += model_size;
3732
+ }
3733
+ if (serialized_indexer != NULL) {
3734
+ memcpy(&model_size, serialized_indexer + get_size_setup_info() + sizeof(uint8_t), sizeof(size_t));
3735
+ n_bytes += model_size;
3736
+ }
3737
+
3738
+ n_bytes += size_optional_metadata;
3739
+
3740
+ n_bytes += get_size_ending_metadata();
3741
+ return n_bytes;
3742
+ }
3743
+
3744
+ template <class otype>
3745
+ void serialize_combined
3746
+ (
3747
+ const char *serialized_model,
3748
+ const char *serialized_model_ext,
3749
+ const char *serialized_imputer,
3750
+ const char *serialized_indexer,
3751
+ const char *optional_metadata,
3752
+ const size_t size_optional_metadata,
3753
+ otype &out
3754
+ )
3755
+ {
3756
+ SignalSwitcher ss = SignalSwitcher();
3757
+
3758
+ std::unique_ptr<char[]> curr_setup(new char[get_size_setup_info()]);
3759
+ char *ptr_curr_setup = curr_setup.get();
3760
+ add_setup_info(ptr_curr_setup, true);
3761
+ auto pos_watermark = set_return_position(out);
3762
+ add_setup_info(out, false);
3763
+
3764
+ uint8_t model_type = AllObjectsCombined;
3765
+ write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
3766
+
3767
+ if (serialized_model != NULL)
3768
+ {
3769
+ if (!size_optional_metadata)
3770
+ {
3771
+ if (serialized_imputer == NULL) {
3772
+ if (serialized_indexer == NULL)
3773
+ model_type = HasSingleVarModelNext;
3774
+ else
3775
+ model_type = HasSingleVarModelPlusIndexerNext;
3776
+ }
3777
+ else {
3778
+ if (serialized_indexer == NULL)
3779
+ model_type = HasSingleVarModelPlusImputerNext;
3780
+ else
3781
+ model_type = HasSingleVarModelPlusImputerPlusIndexerNext;
3782
+ }
3783
+ }
3784
+
3785
+ else
3786
+ {
3787
+ if (serialized_imputer == NULL) {
3788
+ if (serialized_indexer == NULL)
3789
+ model_type = HasSingleVarModelPlusMetadataNext;
3790
+ else
3791
+ model_type = HasSingleVarModelPlusIndexerPlusMetadataNext;
3792
+ }
3793
+ else {
3794
+ if (serialized_indexer == NULL)
3795
+ model_type = HasSingleVarModelPlusImputerPlusMetadataNext;
3796
+ else
3797
+ model_type = HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext;
3798
+ }
3799
+ }
3800
+ }
3801
+
3802
+ else
3803
+ {
3804
+ if (!size_optional_metadata)
3805
+ {
3806
+ if (serialized_imputer == NULL) {
3807
+ if (serialized_indexer == NULL)
3808
+ model_type = HasExtModelNext;
3809
+ else
3810
+ model_type = HasExtModelPlusIndexerNext;
3811
+ }
3812
+ else {
3813
+ if (serialized_indexer == NULL)
3814
+ model_type = HasExtModelPlusImputerNext;
3815
+ else
3816
+ model_type = HasExtModelPlusImputerPlusIndexerNext;
3817
+ }
3818
+ }
3819
+
3820
+ else
3821
+ {
3822
+ if (serialized_imputer == NULL) {
3823
+ if (serialized_indexer == NULL)
3824
+ model_type = HasExtModelPlusMetadataNext;
3825
+ else
3826
+ model_type = HasExtModelPlusIndexerPlusMetadataNext;
3827
+ }
3828
+ else {
3829
+ if (serialized_indexer == NULL)
3830
+ model_type = HasExtModelPlusImputerPlusMetadataNext;
3831
+ else
3832
+ model_type = HasExtModelPlusImputerPlusIndexerPlusMetadataNext;
3833
+ }
3834
+ }
3835
+ }
3836
+
3837
+ write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
3838
+
3839
+ size_t model_size;
3840
+ size_t size_model1, size_model2, size_model3, size_model4;
3841
+
3842
+ std::unique_ptr<char[]> new_model;
3843
+ if (serialized_model != NULL)
3844
+ {
3845
+ if (memcmp(curr_setup.get(), serialized_model, get_size_setup_info()))
3846
+ {
3847
+ fprintf(stderr, "Warning: 'model' was serialized in a different setup, will need to convert.\n");
3848
+ IsoForest model;
3849
+ deserialization_pipeline(model, serialized_model);
3850
+ new_model = std::unique_ptr<char[]>(new char[get_size_model(model)]);
3851
+ char *ptr_new_model_ser = new_model.get();
3852
+ serialization_pipeline(model, ptr_new_model_ser);
3853
+ serialized_model = new_model.get();
3854
+ }
3855
+ serialized_model += get_size_setup_info() + sizeof(uint8_t);
3856
+ memcpy(&model_size, serialized_model, sizeof(size_t));
3857
+ serialized_model += sizeof(size_t);
3858
+ size_model1 = model_size;
3859
+ }
3860
+
3861
+ else
3862
+ {
3863
+ if (memcmp(curr_setup.get(), serialized_model_ext, get_size_setup_info()))
3864
+ {
3865
+ fprintf(stderr, "Warning: 'model_ext' was serialized in a different setup, will need to convert.\n");
3866
+ ExtIsoForest model;
3867
+ deserialization_pipeline(model, serialized_model_ext);
3868
+ new_model = std::unique_ptr<char[]>(new char[get_size_model(model)]);
3869
+ char *ptr_new_model_ser = new_model.get();
3870
+ serialization_pipeline(model, ptr_new_model_ser);
3871
+ serialized_model_ext = new_model.get();
3872
+ }
3873
+ serialized_model_ext += get_size_setup_info() + sizeof(uint8_t);
3874
+ memcpy(&model_size, serialized_model_ext, sizeof(size_t));
3875
+ serialized_model_ext += sizeof(size_t);
3876
+ size_model2 = model_size;
3877
+ }
3878
+
3879
+ check_interrupt_switch(ss);
3880
+
3881
+ write_bytes<size_t>((void*)&model_size, (size_t)1, out);
3882
+
3883
+ if (serialized_imputer != NULL)
3884
+ {
3885
+ if (memcmp(curr_setup.get(), serialized_imputer, get_size_setup_info()))
3886
+ {
3887
+ fprintf(stderr, "Warning: 'imputer' was serialized in a different setup, will need to convert.\n");
3888
+ Imputer model;
3889
+ deserialization_pipeline(model, serialized_imputer);
3890
+ new_model = std::unique_ptr<char[]>(new char[get_size_model(model)]);
3891
+ char *ptr_new_model_ser = new_model.get();
3892
+ serialization_pipeline(model, ptr_new_model_ser);
3893
+ serialized_imputer = new_model.get();
3894
+ }
3895
+ serialized_imputer += get_size_setup_info() + sizeof(uint8_t);
3896
+ memcpy(&model_size, serialized_imputer, sizeof(size_t));
3897
+ serialized_imputer += sizeof(size_t);
3898
+ size_model3 = model_size;
3899
+ }
3900
+
3901
+ else {
3902
+ model_size = 0;
3903
+ }
3904
+ write_bytes<size_t>((void*)&model_size, (size_t)1, out);
3905
+
3906
+ if (serialized_indexer != NULL)
3907
+ {
3908
+ if (memcmp(curr_setup.get(), serialized_indexer, get_size_setup_info()))
3909
+ {
3910
+ fprintf(stderr, "Warning: 'indexer' was serialized in a different setup, will need to convert.\n");
3911
+ TreesIndexer model;
3912
+ deserialization_pipeline(model, serialized_indexer);
3913
+ new_model = std::unique_ptr<char[]>(new char[get_size_model(model)]);
3914
+ char *ptr_new_model_ser = new_model.get();
3915
+ serialization_pipeline(model, ptr_new_model_ser);
3916
+ serialized_indexer = new_model.get();
3917
+ }
3918
+ serialized_indexer += get_size_setup_info() + sizeof(uint8_t);
3919
+ memcpy(&model_size, serialized_indexer, sizeof(size_t));
3920
+ serialized_indexer += sizeof(size_t);
3921
+ size_model4 = model_size;
3922
+ }
3923
+
3924
+ else {
3925
+ model_size = 0;
3926
+ }
3927
+ write_bytes<size_t>((void*)&model_size, (size_t)1, out);
3928
+
3929
+ check_interrupt_switch(ss);
3930
+
3931
+ write_bytes<size_t>((void*)&size_optional_metadata, (size_t)1, out);
3932
+
3933
+ if (serialized_model != NULL)
3934
+ write_bytes<char>((void*)serialized_model, size_model1, out);
3935
+ else
3936
+ write_bytes<char>((void*)serialized_model_ext, size_model2, out);
3937
+ if (serialized_imputer != NULL)
3938
+ write_bytes<char>((void*)serialized_imputer, size_model3, out);
3939
+ if (serialized_indexer != NULL)
3940
+ write_bytes<char>((void*)serialized_indexer, size_model4, out);
3941
+
3942
+ if (size_optional_metadata)
3943
+ write_bytes<char>((void*)optional_metadata, size_optional_metadata, out);
3944
+
3945
+ check_interrupt_switch(ss);
3946
+
3947
+ uint8_t ending_type = (uint8_t)EndsHere;
3948
+ write_bytes<uint8_t>((void*)&ending_type, (size_t)1, out);
3949
+ size_t jump_ahead = 0;
3950
+ write_bytes<size_t>((void*)&jump_ahead, (size_t)1, out);
3951
+
3952
+ auto end_pos = set_return_position(out);
3953
+ return_to_position(out, pos_watermark);
3954
+ add_full_watermark(out);
3955
+ return_to_position(out, end_pos);
3956
+ }
3957
+
3958
+ void serialize_combined
3959
+ (
3960
+ const char *serialized_model,
3961
+ const char *serialized_model_ext,
3962
+ const char *serialized_imputer,
3963
+ const char *serialized_indexer,
3964
+ const char *optional_metadata,
3965
+ const size_t size_optional_metadata,
3966
+ FILE *out
3967
+ )
3968
+ {
3969
+ serialize_combined<FILE*>(
3970
+ serialized_model,
3971
+ serialized_model_ext,
3972
+ serialized_imputer,
3973
+ serialized_indexer,
3974
+ optional_metadata,
3975
+ size_optional_metadata,
3976
+ out
3977
+ );
3978
+ }
3979
+
3980
+ void serialize_combined
3981
+ (
3982
+ const char *serialized_model,
3983
+ const char *serialized_model_ext,
3984
+ const char *serialized_imputer,
3985
+ const char *serialized_indexer,
3986
+ const char *optional_metadata,
3987
+ const size_t size_optional_metadata,
3988
+ std::ostream &out
3989
+ )
3990
+ {
3991
+ serialize_combined<std::ostream>(
3992
+ serialized_model,
3993
+ serialized_model_ext,
3994
+ serialized_imputer,
3995
+ serialized_indexer,
3996
+ optional_metadata,
3997
+ size_optional_metadata,
3998
+ out
3999
+ );
4000
+ }
4001
+
4002
+ std::string serialize_combined
4003
+ (
4004
+ const char *serialized_model,
4005
+ const char *serialized_model_ext,
4006
+ const char *serialized_imputer,
4007
+ const char *serialized_indexer,
4008
+ const char *optional_metadata,
4009
+ const size_t size_optional_metadata
4010
+ )
4011
+ {
4012
+ std::string serialized;
4013
+ serialized.resize(
4014
+ determine_serialized_size_combined(
4015
+ serialized_model,
4016
+ serialized_model_ext,
4017
+ serialized_imputer,
4018
+ serialized_indexer,
4019
+ size_optional_metadata
4020
+ )
4021
+ );
4022
+ char *ptr = &serialized[0];
4023
+ serialize_combined(
4024
+ serialized_model,
4025
+ serialized_model_ext,
4026
+ serialized_imputer,
4027
+ serialized_indexer,
4028
+ optional_metadata,
4029
+ size_optional_metadata,
4030
+ ptr
4031
+ );
4032
+ return serialized;
4033
+ }
4034
+
4035
+ template <class Model, class itype>
4036
+ void deserialize_model
4037
+ (
4038
+ Model &model,
4039
+ itype &in,
4040
+ const bool has_same_endianness,
4041
+ const bool has_same_int_size,
4042
+ const bool has_same_size_t_size,
4043
+ const PlatformSize saved_int_t,
4044
+ const PlatformSize saved_size_t,
4045
+ const bool lacks_range_penalty,
4046
+ const bool lacks_scoring_metric
4047
+ )
4048
+ {
4049
+ if (has_same_endianness && has_same_int_size && has_same_size_t_size && !lacks_range_penalty && !lacks_scoring_metric)
4050
+ {
4051
+ deserialize_model(model, in);
4052
+ return;
4053
+ }
4054
+
4055
+ std::vector<char> buffer;
4056
+
4057
+ if (saved_int_t == Is16Bit && saved_size_t == Is32Bit)
4058
+ {
4059
+ deserialize_model<itype, int16_t, uint32_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
4060
+ }
4061
+
4062
+ else if (saved_int_t == Is32Bit && saved_size_t == Is32Bit)
4063
+ {
4064
+ deserialize_model<itype, int32_t, uint32_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
4065
+ }
4066
+
4067
+ else if (saved_int_t == Is64Bit && saved_size_t == Is32Bit)
4068
+ {
4069
+ deserialize_model<itype, int64_t, uint32_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
4070
+ }
4071
+
4072
+ else if (saved_int_t == Is16Bit && saved_size_t == Is64Bit)
4073
+ {
4074
+ deserialize_model<itype, int16_t, uint64_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
4075
+ }
4076
+
4077
+ else if (saved_int_t == Is32Bit && saved_size_t == Is64Bit)
4078
+ {
4079
+ deserialize_model<itype, int32_t, uint64_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
4080
+ }
4081
+
4082
+ else if (saved_int_t == Is64Bit && saved_size_t == Is64Bit)
4083
+ {
4084
+ deserialize_model<itype, int16_t, uint64_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
4085
+ }
4086
+
4087
+ else
4088
+ {
4089
+ unexpected_error();
4090
+ }
4091
+ }
4092
+
4093
+ template <class itype>
4094
+ void deserialize_combined
4095
+ (
4096
+ itype &in,
4097
+ IsoForest *model,
4098
+ ExtIsoForest *model_ext,
4099
+ Imputer *imputer,
4100
+ TreesIndexer *indexer,
4101
+ char *optional_metadata
4102
+ )
4103
+ {
4104
+ SignalSwitcher ss = SignalSwitcher();
4105
+
4106
+ bool has_same_int_size;
4107
+ bool has_same_size_t_size;
4108
+ bool has_same_endianness;
4109
+ PlatformSize saved_int_t;
4110
+ PlatformSize saved_size_t;
4111
+ PlatformEndianness saved_endian;
4112
+ bool lacks_range_penalty;
4113
+ bool lacks_scoring_metric;
4114
+ bool lacks_indexer;
4115
+
4116
+ check_setup_info(
4117
+ in,
4118
+ has_same_int_size,
4119
+ has_same_size_t_size,
4120
+ has_same_endianness,
4121
+ saved_int_t,
4122
+ saved_size_t,
4123
+ saved_endian,
4124
+ lacks_range_penalty,
4125
+ lacks_scoring_metric,
4126
+ lacks_indexer
4127
+ );
4128
+
4129
+ uint8_t model_in;
4130
+ read_bytes<uint8_t>((void*)&model_in, (size_t)1, in);
4131
+ if (model_in != AllObjectsCombined)
4132
+ throw std::runtime_error("Object to de-serialize was not created through 'serialize_combined'.\n");
4133
+
4134
+ read_bytes<uint8_t>((void*)&model_in, (size_t)1, in);
4135
+
4136
+ size_t size_model[4];
4137
+ size_t size_metadata;
4138
+ if (!lacks_indexer)
4139
+ {
4140
+ read_bytes_size_t((void*)size_model, (size_t)4, in, saved_size_t, has_same_endianness);
4141
+ size_metadata = size_model[3];
4142
+ }
4143
+
4144
+ else
4145
+ {
4146
+ read_bytes_size_t((void*)size_model, (size_t)3, in, saved_size_t, has_same_endianness);
4147
+ size_metadata = size_model[2];
4148
+ size_model[2] = 0;
4149
+ size_model[3] = size_metadata;
4150
+ }
4151
+
4152
+ switch (model_in)
4153
+ {
4154
+ case HasSingleVarModelNext:
4155
+ {
4156
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4157
+ break;
4158
+ }
4159
+ case HasSingleVarModelPlusIndexerNext:
4160
+ {
4161
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4162
+ check_interrupt_switch(ss);
4163
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4164
+ break;
4165
+ }
4166
+ case HasExtModelNext:
4167
+ {
4168
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4169
+ break;
4170
+ }
4171
+ case HasExtModelPlusIndexerNext:
4172
+ {
4173
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4174
+ check_interrupt_switch(ss);
4175
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4176
+ break;
4177
+ }
4178
+ case HasSingleVarModelPlusImputerNext:
4179
+ {
4180
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4181
+ check_interrupt_switch(ss);
4182
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4183
+ break;
4184
+ }
4185
+ case HasSingleVarModelPlusImputerPlusIndexerNext:
4186
+ {
4187
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4188
+ check_interrupt_switch(ss);
4189
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4190
+ check_interrupt_switch(ss);
4191
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4192
+ break;
4193
+ }
4194
+ case HasExtModelPlusImputerNext:
4195
+ {
4196
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4197
+ check_interrupt_switch(ss);
4198
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4199
+ break;
4200
+ }
4201
+ case HasExtModelPlusImputerPlusIndexerNext:
4202
+ {
4203
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4204
+ check_interrupt_switch(ss);
4205
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4206
+ check_interrupt_switch(ss);
4207
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4208
+ break;
4209
+ }
4210
+ case HasSingleVarModelPlusMetadataNext:
4211
+ {
4212
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4213
+ check_interrupt_switch(ss);
4214
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4215
+ break;
4216
+ }
4217
+ case HasSingleVarModelPlusIndexerPlusMetadataNext:
4218
+ {
4219
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4220
+ check_interrupt_switch(ss);
4221
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4222
+ check_interrupt_switch(ss);
4223
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4224
+ break;
4225
+ }
4226
+ case HasExtModelPlusMetadataNext:
4227
+ {
4228
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4229
+ check_interrupt_switch(ss);
4230
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4231
+ break;
4232
+ }
4233
+ case HasExtModelPlusIndexerPlusMetadataNext:
4234
+ {
4235
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4236
+ check_interrupt_switch(ss);
4237
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4238
+ check_interrupt_switch(ss);
4239
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4240
+ break;
4241
+ }
4242
+ case HasSingleVarModelPlusImputerPlusMetadataNext:
4243
+ {
4244
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4245
+ check_interrupt_switch(ss);
4246
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4247
+ check_interrupt_switch(ss);
4248
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4249
+ break;
4250
+ }
4251
+ case HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext:
4252
+ {
4253
+ deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4254
+ check_interrupt_switch(ss);
4255
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4256
+ check_interrupt_switch(ss);
4257
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4258
+ check_interrupt_switch(ss);
4259
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4260
+ break;
4261
+ }
4262
+ case HasExtModelPlusImputerPlusMetadataNext:
4263
+ {
4264
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4265
+ check_interrupt_switch(ss);
4266
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4267
+ check_interrupt_switch(ss);
4268
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4269
+ break;
4270
+ }
4271
+ case HasExtModelPlusImputerPlusIndexerPlusMetadataNext:
4272
+ {
4273
+ deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4274
+ check_interrupt_switch(ss);
4275
+ deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4276
+ check_interrupt_switch(ss);
4277
+ deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
4278
+ check_interrupt_switch(ss);
4279
+ read_bytes<char>((void*)optional_metadata, size_metadata, in);
4280
+ break;
4281
+ }
4282
+
4283
+ default:
4284
+ {
4285
+ throw std::runtime_error("Serialized format is incompatible.\n");
4286
+ }
4287
+ }
4288
+ }
4289
+
4290
+ void deserialize_combined
4291
+ (
4292
+ const char* in,
4293
+ IsoForest *model,
4294
+ ExtIsoForest *model_ext,
4295
+ Imputer *imputer,
4296
+ TreesIndexer *indexer,
4297
+ char *optional_metadata
4298
+ )
4299
+ {
4300
+ deserialize_combined<const char*>(
4301
+ in,
4302
+ model,
4303
+ model_ext,
4304
+ imputer,
4305
+ indexer,
4306
+ optional_metadata
4307
+ );
4308
+ }
4309
+
4310
+ void deserialize_combined
4311
+ (
4312
+ FILE* in,
4313
+ IsoForest *model,
4314
+ ExtIsoForest *model_ext,
4315
+ Imputer *imputer,
4316
+ TreesIndexer *indexer,
4317
+ char *optional_metadata
4318
+ )
4319
+ {
4320
+ deserialize_combined<FILE*>(
4321
+ in,
4322
+ model,
4323
+ model_ext,
4324
+ imputer,
4325
+ indexer,
4326
+ optional_metadata
4327
+ );
4328
+ }
4329
+
4330
+ void deserialize_combined
4331
+ (
4332
+ std::istream &in,
4333
+ IsoForest *model,
4334
+ ExtIsoForest *model_ext,
4335
+ Imputer *imputer,
4336
+ TreesIndexer *indexer,
4337
+ char *optional_metadata
4338
+ )
4339
+ {
4340
+ deserialize_combined<std::istream>(
4341
+ in,
4342
+ model,
4343
+ model_ext,
4344
+ imputer,
4345
+ indexer,
4346
+ optional_metadata
4347
+ );
4348
+ }
4349
+
4350
+ void deserialize_combined
4351
+ (
4352
+ const std::string &in,
4353
+ IsoForest *model,
4354
+ ExtIsoForest *model_ext,
4355
+ Imputer *imputer,
4356
+ TreesIndexer *indexer,
4357
+ char *optional_metadata
4358
+ )
4359
+ {
4360
+ const char *ptr = &in[0];
4361
+ deserialize_combined<const char*>(
4362
+ ptr,
4363
+ model,
4364
+ model_ext,
4365
+ imputer,
4366
+ indexer,
4367
+ optional_metadata
4368
+ );
4369
+ }
4370
+
4371
+ bool check_model_has_range_penalty(const IsoForest &model) noexcept
4372
+ {
4373
+ for (const auto &tree : model.trees)
4374
+ {
4375
+ for (const auto &node : tree)
4376
+ {
4377
+ if (node.score < 0 && node.col_type == Numeric)
4378
+ {
4379
+ if (node.range_low > -HUGE_VAL && node.range_high < HUGE_VAL)
4380
+ return true;
4381
+ }
4382
+ }
4383
+ }
4384
+
4385
+ return false;
4386
+ }
4387
+
4388
+ bool check_model_has_range_penalty(const ExtIsoForest &model) noexcept
4389
+ {
4390
+ for (const auto &tree : model.hplanes)
4391
+ {
4392
+ for (const auto &node : tree)
4393
+ {
4394
+ if (node.score < 0)
4395
+ {
4396
+ if (node.range_low > -HUGE_VAL && node.range_high < HUGE_VAL)
4397
+ return true;
4398
+ }
4399
+ }
4400
+ }
4401
+
4402
+ return false;
4403
+ }
4404
+
4405
+ void add_range_penalty(IsoForest &model) noexcept
4406
+ {
4407
+ model.has_range_penalty = check_model_has_range_penalty(model);
4408
+ }
4409
+
4410
+ void add_range_penalty(ExtIsoForest &model) noexcept
4411
+ {
4412
+ model.has_range_penalty = check_model_has_range_penalty(model);
4413
+ }
4414
+
4415
+ void add_range_penalty(Imputer &model) noexcept
4416
+ {
4417
+
4418
+ }
4419
+
4420
+ void add_range_penalty(TreesIndexer &model) noexcept
4421
+ {
4422
+
4423
+ }