isotree 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/LICENSE.txt +2 -2
- data/README.md +32 -14
- data/ext/isotree/ext.cpp +144 -31
- data/ext/isotree/extconf.rb +7 -7
- data/lib/isotree/isolation_forest.rb +110 -30
- data/lib/isotree/version.rb +1 -1
- data/vendor/isotree/LICENSE +1 -1
- data/vendor/isotree/README.md +165 -27
- data/vendor/isotree/include/isotree.hpp +2111 -0
- data/vendor/isotree/include/isotree_oop.hpp +394 -0
- data/vendor/isotree/inst/COPYRIGHTS +62 -0
- data/vendor/isotree/src/RcppExports.cpp +525 -52
- data/vendor/isotree/src/Rwrapper.cpp +1931 -268
- data/vendor/isotree/src/c_interface.cpp +953 -0
- data/vendor/isotree/src/crit.hpp +4232 -0
- data/vendor/isotree/src/dist.hpp +1886 -0
- data/vendor/isotree/src/exp_depth_table.hpp +134 -0
- data/vendor/isotree/src/extended.hpp +1444 -0
- data/vendor/isotree/src/external_facing_generic.hpp +399 -0
- data/vendor/isotree/src/fit_model.hpp +2401 -0
- data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
- data/vendor/isotree/src/helpers_iforest.hpp +813 -0
- data/vendor/isotree/src/{impute.cpp → impute.hpp} +353 -122
- data/vendor/isotree/src/indexer.cpp +515 -0
- data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
- data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
- data/vendor/isotree/src/isoforest.hpp +1659 -0
- data/vendor/isotree/src/isotree.hpp +1804 -392
- data/vendor/isotree/src/isotree_exportable.hpp +99 -0
- data/vendor/isotree/src/merge_models.cpp +159 -16
- data/vendor/isotree/src/mult.hpp +1321 -0
- data/vendor/isotree/src/oop_interface.cpp +842 -0
- data/vendor/isotree/src/oop_interface.hpp +278 -0
- data/vendor/isotree/src/other_helpers.hpp +219 -0
- data/vendor/isotree/src/predict.hpp +1932 -0
- data/vendor/isotree/src/python_helpers.hpp +134 -0
- data/vendor/isotree/src/ref_indexer.hpp +154 -0
- data/vendor/isotree/src/robinmap/LICENSE +21 -0
- data/vendor/isotree/src/robinmap/README.md +483 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1620 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
- data/vendor/isotree/src/serialize.cpp +4300 -139
- data/vendor/isotree/src/sql.cpp +141 -59
- data/vendor/isotree/src/subset_models.cpp +174 -0
- data/vendor/isotree/src/utils.hpp +3808 -0
- data/vendor/isotree/src/xoshiro.hpp +467 -0
- data/vendor/isotree/src/ziggurat.hpp +405 -0
- metadata +38 -104
- data/vendor/cereal/LICENSE +0 -24
- data/vendor/cereal/README.md +0 -85
- data/vendor/cereal/include/cereal/access.hpp +0 -351
- data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
- data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
- data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
- data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
- data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
- data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
- data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
- data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
- data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
- data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
- data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
- data/vendor/cereal/include/cereal/details/util.hpp +0 -84
- data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
- data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
- data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
- data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
- data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
- data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
- data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
- data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
- data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
- data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
- data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
- data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
- data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
- data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
- data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
- data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
- data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
- data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
- data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
- data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
- data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
- data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
- data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
- data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
- data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
- data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
- data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
- data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
- data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
- data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
- data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
- data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
- data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
- data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
- data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
- data/vendor/cereal/include/cereal/macros.hpp +0 -154
- data/vendor/cereal/include/cereal/specialize.hpp +0 -139
- data/vendor/cereal/include/cereal/types/array.hpp +0 -79
- data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
- data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
- data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
- data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
- data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
- data/vendor/cereal/include/cereal/types/common.hpp +0 -129
- data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
- data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
- data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
- data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
- data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
- data/vendor/cereal/include/cereal/types/list.hpp +0 -62
- data/vendor/cereal/include/cereal/types/map.hpp +0 -36
- data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
- data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
- data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
- data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
- data/vendor/cereal/include/cereal/types/set.hpp +0 -103
- data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
- data/vendor/cereal/include/cereal/types/string.hpp +0 -61
- data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
- data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
- data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
- data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
- data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
- data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
- data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
- data/vendor/cereal/include/cereal/version.hpp +0 -52
- data/vendor/isotree/src/Makevars +0 -4
- data/vendor/isotree/src/crit.cpp +0 -912
- data/vendor/isotree/src/dist.cpp +0 -749
- data/vendor/isotree/src/extended.cpp +0 -790
- data/vendor/isotree/src/fit_model.cpp +0 -1090
- data/vendor/isotree/src/helpers_iforest.cpp +0 -324
- data/vendor/isotree/src/isoforest.cpp +0 -771
- data/vendor/isotree/src/mult.cpp +0 -607
- data/vendor/isotree/src/predict.cpp +0 -853
- data/vendor/isotree/src/utils.cpp +0 -1566
|
@@ -18,11 +18,29 @@
|
|
|
18
18
|
* [5] https://sourceforge.net/projects/iforest/
|
|
19
19
|
* [6] https://math.stackexchange.com/questions/3388518/expected-number-of-paths-required-to-separate-elements-in-a-binary-tree
|
|
20
20
|
* [7] Quinlan, J. Ross. C4. 5: programs for machine learning. Elsevier, 2014.
|
|
21
|
-
* [8] Cortes, David.
|
|
22
|
-
*
|
|
21
|
+
* [8] Cortes, David.
|
|
22
|
+
* "Distance approximation using Isolation Forests."
|
|
23
|
+
* arXiv preprint arXiv:1910.12362 (2019).
|
|
24
|
+
* [9] Cortes, David.
|
|
25
|
+
* "Imputing missing values with unsupervised random trees."
|
|
26
|
+
* arXiv preprint arXiv:1911.06646 (2019).
|
|
27
|
+
* [10] https://math.stackexchange.com/questions/3333220/expected-average-depth-in-random-binary-tree-constructed-top-to-bottom
|
|
28
|
+
* [11] Cortes, David.
|
|
29
|
+
* "Revisiting randomized choices in isolation forests."
|
|
30
|
+
* arXiv preprint arXiv:2110.13402 (2021).
|
|
31
|
+
* [12] Guha, Sudipto, et al.
|
|
32
|
+
* "Robust random cut forest based anomaly detection on streams."
|
|
33
|
+
* International conference on machine learning. PMLR, 2016.
|
|
34
|
+
* [13] Cortes, David.
|
|
35
|
+
* "Isolation forests: looking beyond tree depth."
|
|
36
|
+
* arXiv preprint arXiv:2111.11639 (2021).
|
|
37
|
+
* [14] Ting, Kai Ming, Yue Zhu, and Zhi-Hua Zhou.
|
|
38
|
+
* "Isolation kernel and its effect on SVM"
|
|
39
|
+
* Proceedings of the 24th ACM SIGKDD
|
|
40
|
+
* International Conference on Knowledge Discovery & Data Mining. 2018.
|
|
23
41
|
*
|
|
24
42
|
* BSD 2-Clause License
|
|
25
|
-
* Copyright (c)
|
|
43
|
+
* Copyright (c) 2019-2022, David Cortes
|
|
26
44
|
* All rights reserved.
|
|
27
45
|
* Redistribution and use in source and binary forms, with or without
|
|
28
46
|
* modification, are permitted provided that the following conditions are met:
|
|
@@ -44,219 +62,4362 @@
|
|
|
44
62
|
*/
|
|
45
63
|
#include "isotree.hpp"
|
|
46
64
|
|
|
47
|
-
|
|
65
|
+
/* TODO: add option to serialize as JSON file */
|
|
48
66
|
|
|
67
|
+
using std::uint8_t;
|
|
68
|
+
using std::int8_t;
|
|
69
|
+
using std::uint16_t;
|
|
70
|
+
using std::int16_t;
|
|
71
|
+
using std::uint32_t;
|
|
72
|
+
using std::int32_t;
|
|
73
|
+
using std::uint64_t;
|
|
74
|
+
using std::int64_t;
|
|
49
75
|
|
|
50
|
-
|
|
51
|
-
|
|
76
|
+
/* https://stackoverflow.com/questions/16696297/ftell-at-a-position-past-2gb */
|
|
77
|
+
/* TODO: do CLANG and ICC have similar functionality? */
|
|
78
|
+
#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) && (SIZE_MAX >= UINT64_MAX)
|
|
79
|
+
# ifdef _MSC_VER
|
|
80
|
+
# include <stdio.h>
|
|
81
|
+
# define fseek_ _fseeki64
|
|
82
|
+
# define ftell_ _ftelli64
|
|
83
|
+
# define fpos_t_ __int64
|
|
84
|
+
# elif defined(__GNUG__) || defined(__GNUC__)
|
|
85
|
+
# ifndef _FILE_OFFSET_BITS
|
|
86
|
+
# define _FILE_OFFSET_BITS 64
|
|
87
|
+
# endif
|
|
88
|
+
# include <stdio.h>
|
|
89
|
+
# define fseek_ fseeko
|
|
90
|
+
# define ftell_ ftello
|
|
91
|
+
# define fpos_t_ off_t
|
|
92
|
+
# else
|
|
93
|
+
using std::feof;
|
|
94
|
+
using std::fwrite;
|
|
95
|
+
using std::fread;
|
|
96
|
+
using std::fopen;
|
|
97
|
+
using std::fclose;
|
|
98
|
+
using std::ftell;
|
|
99
|
+
using std::fseek;
|
|
100
|
+
# define fseek_ fseek
|
|
101
|
+
# define ftell_ ftell
|
|
102
|
+
# define fpos_t_ long /* <- might overflow with large files */
|
|
103
|
+
# endif
|
|
104
|
+
#else
|
|
105
|
+
using std::feof;
|
|
106
|
+
using std::fwrite;
|
|
107
|
+
using std::fread;
|
|
108
|
+
using std::fopen;
|
|
109
|
+
using std::fclose;
|
|
110
|
+
using std::ftell;
|
|
111
|
+
using std::fseek;
|
|
112
|
+
# define fseek_ fseek
|
|
113
|
+
# define ftell_ ftell
|
|
114
|
+
# define fpos_t_ long
|
|
115
|
+
#endif
|
|
116
|
+
|
|
117
|
+
#if defined(DBL_MANT_DIG) && (DBL_MANT_DIG == 53) && (FLT_RADIX == 2)
|
|
118
|
+
#define HAS_IEEE_DOUBLE
|
|
119
|
+
#endif
|
|
120
|
+
|
|
121
|
+
#if INT_MAX == INT16_MAX
|
|
122
|
+
#define HAS_INT16
|
|
123
|
+
#elif INT_MAX == INT32_MAX
|
|
124
|
+
#define HAS_INT32
|
|
125
|
+
#elif INT_MAX == INT64_MAX
|
|
126
|
+
#define HAS_INT64
|
|
127
|
+
#else
|
|
128
|
+
#define HAS_INT_OTHER
|
|
129
|
+
#endif
|
|
130
|
+
|
|
131
|
+
#if SIZE_MAX == UINT32_MAX
|
|
132
|
+
#define HAS_SIZE32
|
|
133
|
+
#elif SIZE_MAX == UINT64_MAX
|
|
134
|
+
#define HAS_SIZE64
|
|
135
|
+
#else
|
|
136
|
+
#define HAS_SIZE_OTHER
|
|
137
|
+
#endif
|
|
138
|
+
|
|
139
|
+
const char *watermark = "isotree_model";
|
|
140
|
+
const char *incomplete_watermark = "incomplete___";
|
|
141
|
+
static const size_t SIZE_WATERMARK = 13;
|
|
142
|
+
enum DoubleTypeStructure {IsNormalDouble=1, IsAbnormalDouble=2};
|
|
143
|
+
enum PlatformSize {Is16Bit=1, Is32Bit=2, Is64Bit=3, IsOther=4};
|
|
144
|
+
enum PlatformEndianness {PlatformLittleEndian=1, PlatformBigEndian=2};
|
|
145
|
+
enum ModelTypes {
|
|
146
|
+
IsoForestModel=1,
|
|
147
|
+
ExtIsoForestModel=2,
|
|
148
|
+
ImputerModel=3,
|
|
149
|
+
IndexerModel=5,
|
|
150
|
+
AllObjectsCombined=4
|
|
151
|
+
};
|
|
152
|
+
enum EndingIndicator {
|
|
153
|
+
EndsHere=0,
|
|
154
|
+
HasSingleVarModelNext=1,
|
|
155
|
+
HasExtModelNext=2,
|
|
156
|
+
HasImputerNext=3,
|
|
157
|
+
HasIndexerNext=11,
|
|
158
|
+
HasSingleVarModelPlusImputerNext=4,
|
|
159
|
+
HasSingleVarModelPlusIndexerNext=12,
|
|
160
|
+
HasSingleVarModelPlusImputerPlusIndexerNext=13,
|
|
161
|
+
HasExtModelPlusImputerNext=5,
|
|
162
|
+
HasExtModelPlusIndexerNext=14,
|
|
163
|
+
HasExtModelPlusImputerPlusIndexerNext=15,
|
|
164
|
+
HasSingleVarModelPlusMetadataNext=6,
|
|
165
|
+
HasSingleVarModelPlusIndexerPlusMetadataNext=16,
|
|
166
|
+
HasExtModelPlusMetadataNext=7,
|
|
167
|
+
HasExtModelPlusIndexerPlusMetadataNext=17,
|
|
168
|
+
HasSingleVarModelPlusImputerPlusMetadataNext=8,
|
|
169
|
+
HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext=18,
|
|
170
|
+
HasExtModelPlusImputerPlusMetadataNext=9,
|
|
171
|
+
HasExtModelPlusImputerPlusIndexerPlusMetadataNext=19,
|
|
172
|
+
HasMoreTreesNext=10
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
#ifdef _MSC_VER
|
|
176
|
+
#include <stdlib.h>
|
|
177
|
+
void swap16b(char *bytes) noexcept
|
|
178
|
+
{
|
|
179
|
+
if (std::numeric_limits<unsigned short>::max() == UINT16_MAX) {
|
|
180
|
+
unsigned short temp;
|
|
181
|
+
memcpy(&temp, bytes, sizeof(unsigned short));
|
|
182
|
+
temp = _byteswap_ushort(temp);
|
|
183
|
+
memcpy(bytes, &temp, sizeof(unsigned short));
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
else {
|
|
187
|
+
std::swap(bytes[0], bytes[1]);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
void swap32b(char *bytes) noexcept
|
|
191
|
+
{
|
|
192
|
+
if (std::numeric_limits<unsigned long>::max() == UINT32_MAX) {
|
|
193
|
+
unsigned long temp;
|
|
194
|
+
memcpy(&temp, bytes, sizeof(unsigned long));
|
|
195
|
+
temp = _byteswap_ulong(temp);
|
|
196
|
+
memcpy(bytes, &temp, sizeof(unsigned long));
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
else {
|
|
200
|
+
std::swap(bytes[0], bytes[3]);
|
|
201
|
+
std::swap(bytes[1], bytes[2]);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
void swap64b(char *bytes) noexcept
|
|
205
|
+
{
|
|
206
|
+
unsigned __int64 temp;
|
|
207
|
+
memcpy(&temp, bytes, sizeof(unsigned __int64));
|
|
208
|
+
temp = _byteswap_uint64(temp);
|
|
209
|
+
memcpy(bytes, &temp, sizeof(unsigned __int64));
|
|
210
|
+
}
|
|
211
|
+
#elif defined(__GNUC__) && (__GNUC__ >= 5) && !defined(_WIN32)
|
|
212
|
+
void swap16b(char *bytes) noexcept
|
|
213
|
+
{
|
|
214
|
+
uint16_t temp;
|
|
215
|
+
memcpy(&temp, bytes, sizeof(uint16_t));
|
|
216
|
+
temp = __builtin_bswap16(temp);
|
|
217
|
+
memcpy(bytes, &temp, sizeof(uint16_t));
|
|
218
|
+
}
|
|
219
|
+
void swap32b(char *bytes) noexcept
|
|
220
|
+
{
|
|
221
|
+
uint32_t temp;
|
|
222
|
+
memcpy(&temp, bytes, sizeof(uint32_t));
|
|
223
|
+
temp = __builtin_bswap32(temp);
|
|
224
|
+
memcpy(bytes, &temp, sizeof(uint32_t));
|
|
225
|
+
}
|
|
226
|
+
void swap64b(char *bytes) noexcept
|
|
227
|
+
{
|
|
228
|
+
uint64_t temp;
|
|
229
|
+
memcpy(&temp, bytes, sizeof(uint64_t));
|
|
230
|
+
temp = __builtin_bswap64(temp);
|
|
231
|
+
memcpy(bytes, &temp, sizeof(uint64_t));
|
|
232
|
+
}
|
|
233
|
+
#else
|
|
234
|
+
void swap16b(char *bytes) noexcept
|
|
235
|
+
{
|
|
236
|
+
std::swap(bytes[0], bytes[1]);
|
|
237
|
+
}
|
|
238
|
+
void swap32b(char *bytes) noexcept
|
|
239
|
+
{
|
|
240
|
+
std::swap(bytes[0], bytes[3]);
|
|
241
|
+
std::swap(bytes[1], bytes[2]);
|
|
242
|
+
}
|
|
243
|
+
void swap64b(char *bytes) noexcept
|
|
244
|
+
{
|
|
245
|
+
std::swap(bytes[0], bytes[7]);
|
|
246
|
+
std::swap(bytes[1], bytes[6]);
|
|
247
|
+
std::swap(bytes[2], bytes[5]);
|
|
248
|
+
std::swap(bytes[3], bytes[4]);
|
|
249
|
+
}
|
|
250
|
+
#endif
|
|
251
|
+
void endian_swap(float &bytes) noexcept
|
|
252
|
+
{
|
|
253
|
+
#ifdef HAS_IEEE_DOUBLE
|
|
254
|
+
swap32b((char*)&bytes);
|
|
255
|
+
#else
|
|
256
|
+
std::reverse((char*)&bytes, (char*)&bytes + sizeof(float));
|
|
257
|
+
#endif
|
|
258
|
+
}
|
|
259
|
+
void endian_swap(double &bytes) noexcept
|
|
260
|
+
{
|
|
261
|
+
#ifdef HAS_IEEE_DOUBLE
|
|
262
|
+
swap64b((char*)&bytes);
|
|
263
|
+
#else
|
|
264
|
+
std::reverse((char*)&bytes, (char*)&bytes + sizeof(double));
|
|
265
|
+
#endif
|
|
266
|
+
}
|
|
267
|
+
void endian_swap(uint8_t &bytes) noexcept
|
|
268
|
+
{
|
|
269
|
+
return;
|
|
270
|
+
}
|
|
271
|
+
void endian_swap(uint16_t &bytes) noexcept
|
|
272
|
+
{
|
|
273
|
+
swap16b((char*)&bytes);
|
|
274
|
+
}
|
|
275
|
+
void endian_swap(uint32_t &bytes) noexcept
|
|
276
|
+
{
|
|
277
|
+
swap32b((char*)&bytes);
|
|
278
|
+
}
|
|
279
|
+
void endian_swap(uint64_t &bytes) noexcept
|
|
280
|
+
{
|
|
281
|
+
swap64b((char*)&bytes);
|
|
282
|
+
}
|
|
283
|
+
void endian_swap(int8_t &bytes) noexcept
|
|
284
|
+
{
|
|
285
|
+
return;
|
|
286
|
+
}
|
|
287
|
+
void endian_swap(int16_t &bytes) noexcept
|
|
288
|
+
{
|
|
289
|
+
swap16b((char*)&bytes);
|
|
290
|
+
}
|
|
291
|
+
void endian_swap(int32_t &bytes) noexcept
|
|
292
|
+
{
|
|
293
|
+
swap32b((char*)&bytes);
|
|
294
|
+
}
|
|
295
|
+
void endian_swap(int64_t &bytes) noexcept
|
|
296
|
+
{
|
|
297
|
+
swap64b((char*)&bytes);
|
|
298
|
+
}
|
|
299
|
+
/* Note: on macOS, some compilers will take 'size_t' as different from 'uin64_t',
|
|
300
|
+
hence it needs a separate one. However, in other compiler and platforms this
|
|
301
|
+
leads to a a duplicated function definition, and thus needs this separation
|
|
302
|
+
in names (otherwise, compilers such as GCC will not compile it). */
|
|
303
|
+
void endian_swap_size_t(char *bytes) noexcept
|
|
304
|
+
{
|
|
305
|
+
#if (SIZE_MAX == UINT32_MAX)
|
|
306
|
+
swap32b(bytes);
|
|
307
|
+
#elif (SIZE_MAX == UINT64_MAX)
|
|
308
|
+
swap64b(bytes);
|
|
309
|
+
#else
|
|
310
|
+
std::reverse(bytes, bytes + sizeof(size_t));
|
|
311
|
+
#endif
|
|
312
|
+
}
|
|
313
|
+
void endian_swap_int(char *bytes) noexcept
|
|
52
314
|
{
|
|
53
|
-
|
|
54
|
-
|
|
315
|
+
#if (INT_MAX == INT16_MAX)
|
|
316
|
+
swap16b(bytes);
|
|
317
|
+
#elif (INT_MAX == INT32_MAX)
|
|
318
|
+
swap32b(bytes);
|
|
319
|
+
#elif (SIZE_MAX == INT64_MAX)
|
|
320
|
+
swap64b(bytes);
|
|
321
|
+
#else
|
|
322
|
+
std::reverse(bytes, bytes + sizeof(int));
|
|
323
|
+
#endif
|
|
55
324
|
}
|
|
56
325
|
template <class T>
|
|
57
|
-
|
|
326
|
+
void endian_swap(T &bytes) noexcept
|
|
327
|
+
{
|
|
328
|
+
std::reverse((char*)&bytes, (char*)&bytes + sizeof(T));
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
template <class dtype>
|
|
332
|
+
void swap_endianness(dtype *ptr, size_t n_els) noexcept
|
|
58
333
|
{
|
|
59
|
-
|
|
334
|
+
#ifndef __GNUC__
|
|
335
|
+
if (std::is_same<dtype, size_t>::value)
|
|
336
|
+
{
|
|
337
|
+
for (size_t ix = 0; ix < n_els; ix++)
|
|
338
|
+
endian_swap_size_t((char*)&ptr[ix]);
|
|
339
|
+
return;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
else if (std::is_same<dtype, int>::value)
|
|
60
343
|
{
|
|
61
|
-
|
|
62
|
-
|
|
344
|
+
for (size_t ix = 0; ix < n_els; ix++)
|
|
345
|
+
endian_swap_int((char*)&ptr[ix]);
|
|
346
|
+
return;
|
|
63
347
|
}
|
|
64
|
-
|
|
348
|
+
#endif
|
|
349
|
+
|
|
350
|
+
for (size_t ix = 0; ix < n_els; ix++)
|
|
351
|
+
endian_swap(ptr[ix]);
|
|
65
352
|
}
|
|
66
|
-
|
|
67
|
-
|
|
353
|
+
|
|
354
|
+
const char* set_return_position(const char *in) noexcept
|
|
68
355
|
{
|
|
69
|
-
|
|
70
|
-
archive(output);
|
|
356
|
+
return in;
|
|
71
357
|
}
|
|
72
|
-
|
|
73
|
-
|
|
358
|
+
|
|
359
|
+
char* set_return_position(char *in) noexcept
|
|
74
360
|
{
|
|
75
|
-
|
|
76
|
-
if (move_str)
|
|
77
|
-
ss.str(std::move(serialized));
|
|
78
|
-
else
|
|
79
|
-
/* Bug with GCC4 not implementing the move method for stringsreams
|
|
80
|
-
https://stackoverflow.com/questions/50926506/deleted-function-std-basic-stringstream-in-linux-with-g
|
|
81
|
-
https://github.com/david-cortes/isotree/issues/7 */
|
|
82
|
-
// ss = std::stringstream(serialized); /* <- fails with GCC4, CRAN complains */
|
|
83
|
-
{
|
|
84
|
-
std::string str_copy = serialized;
|
|
85
|
-
ss.str(str_copy);
|
|
86
|
-
}
|
|
87
|
-
deserialize_obj(output, ss);
|
|
361
|
+
return in;
|
|
88
362
|
}
|
|
89
363
|
|
|
364
|
+
fpos_t_ set_return_position(FILE *in)
|
|
365
|
+
{
|
|
366
|
+
return ftell_(in);
|
|
367
|
+
}
|
|
90
368
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
* with 'build_imputer=true'.
|
|
100
|
-
* - output_obj (out)
|
|
101
|
-
* An already-allocated object into which a serialized object of the same class will
|
|
102
|
-
* be de-serialized. The contents of this object will be overwritten. Should be initialized
|
|
103
|
-
* through the default constructor (e.g. 'new ExtIsoForest' or 'ExtIsoForest()').
|
|
104
|
-
* - output (out)
|
|
105
|
-
* An output stream (any type will do) in which to save/persist/serialize the
|
|
106
|
-
* model or imputer object using the cereal library. In the functions that do not
|
|
107
|
-
* take this parameter, it will be returned as a string containing the raw bytes.
|
|
108
|
-
* - serialized (in)
|
|
109
|
-
* The input stream which contains the serialized/saved/persisted model or imputer object,
|
|
110
|
-
* which will be de-serialized into 'output'.
|
|
111
|
-
* - output_file_path
|
|
112
|
-
* File name into which to write the serialized model or imputer object as raw bytes.
|
|
113
|
-
* Note that, on Windows, passing non-ASCII characters will fail, and in such case,
|
|
114
|
-
* you might instead want to use instead the versions that take 'wchar_t', which are
|
|
115
|
-
* only available in the MSVC compiler (it uses 'std::ofstream' internally, which as
|
|
116
|
-
* of C++20, is not required by the standard to accept 'wchar_t' in its constructor).
|
|
117
|
-
* Be aware that it will only write raw bytes, thus metadata such as CPU endianness
|
|
118
|
-
* will be lost. If you need to transfer files berween e.g. an x86 computer and a SPARC
|
|
119
|
-
* server, you'll have to use other methods.
|
|
120
|
-
* This functionality is intended for being easily wrapper into scripting languages
|
|
121
|
-
* without having to copy the contents to to some intermediate language.
|
|
122
|
-
* - input_file_path
|
|
123
|
-
* File name from which to read a serialized model or imputer object as raw bytes.
|
|
124
|
-
* See the description for 'output_file_path' for more details.
|
|
125
|
-
* - move_str
|
|
126
|
-
* Whether to move ('std::move') the contents of the string passed as input in order
|
|
127
|
-
* to speed things up and avoid making a redundant copy of the raw bytes. If passing
|
|
128
|
-
* 'true', the input string will be rendered empty afterwards.
|
|
129
|
-
*/
|
|
130
|
-
void serialize_isoforest(IsoForest &model, std::ostream &output)
|
|
369
|
+
#define pos_type_istream decltype(std::declval<std::istream>().tellg())
|
|
370
|
+
|
|
371
|
+
pos_type_istream set_return_position(std::istream &in)
|
|
372
|
+
{
|
|
373
|
+
return in.tellg();
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
pos_type_istream set_return_position(std::ostream &in)
|
|
131
377
|
{
|
|
132
|
-
|
|
378
|
+
return in.tellp();
|
|
133
379
|
}
|
|
134
|
-
|
|
380
|
+
|
|
381
|
+
void return_to_position(const char *&in, const char *saved_position) noexcept
|
|
382
|
+
{
|
|
383
|
+
in = saved_position;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
void return_to_position(char *&in, char *saved_position) noexcept
|
|
135
387
|
{
|
|
136
|
-
|
|
137
|
-
serialize_obj(model, output);
|
|
388
|
+
in = saved_position;
|
|
138
389
|
}
|
|
139
|
-
|
|
390
|
+
|
|
391
|
+
void return_to_position(FILE *&in, fpos_t_ saved_position)
|
|
140
392
|
{
|
|
141
|
-
|
|
393
|
+
fseek_(in, saved_position, SEEK_SET);
|
|
142
394
|
}
|
|
143
|
-
|
|
395
|
+
|
|
396
|
+
void return_to_position(std::istream &in, pos_type_istream saved_position)
|
|
144
397
|
{
|
|
145
|
-
|
|
398
|
+
in.seekg(saved_position);
|
|
146
399
|
}
|
|
147
|
-
|
|
400
|
+
|
|
401
|
+
void return_to_position(std::ostream &in, pos_type_istream saved_position)
|
|
148
402
|
{
|
|
149
|
-
|
|
150
|
-
deserialize_obj(output_obj, serialized);
|
|
403
|
+
in.seekp(saved_position);
|
|
151
404
|
}
|
|
152
|
-
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
bool has_wchar_t_file_serializers() noexcept
|
|
153
408
|
{
|
|
154
|
-
|
|
409
|
+
#ifdef WCHAR_T_FUNS
|
|
410
|
+
return true;
|
|
411
|
+
#else
|
|
412
|
+
return false;
|
|
413
|
+
#endif
|
|
155
414
|
}
|
|
156
415
|
|
|
416
|
+
void throw_errno()
|
|
417
|
+
{
|
|
418
|
+
throw std::runtime_error("Error " + std::to_string(errno) + " " + strerror(errno) + "\n");
|
|
419
|
+
}
|
|
157
420
|
|
|
421
|
+
void throw_ferror(FILE *file)
|
|
422
|
+
{
|
|
423
|
+
if (!errno) fflush(file);
|
|
424
|
+
throw_errno();
|
|
425
|
+
}
|
|
158
426
|
|
|
159
|
-
void
|
|
427
|
+
void throw_feoferr()
|
|
160
428
|
{
|
|
161
|
-
|
|
429
|
+
throw std::runtime_error("Error: file ended unexpectedly.\n");
|
|
162
430
|
}
|
|
163
|
-
|
|
431
|
+
|
|
432
|
+
template <class dtype, class saved_type>
|
|
433
|
+
void convert_dtype(void *ptr_write_, std::vector<char> &buffer, size_t n_els)
|
|
164
434
|
{
|
|
165
|
-
|
|
166
|
-
|
|
435
|
+
dtype *ptr_write = (dtype*)ptr_write_;
|
|
436
|
+
saved_type *ptr_read = (saved_type*)buffer.data();
|
|
437
|
+
|
|
438
|
+
if ((sizeof(dtype) <= sizeof(saved_type)) &&
|
|
439
|
+
(saved_type)std::numeric_limits<dtype>::max() < std::numeric_limits<saved_type>::max())
|
|
440
|
+
{
|
|
441
|
+
const saved_type maxval = (saved_type) std::numeric_limits<dtype>::max();
|
|
442
|
+
for (size_t el = 0; el < n_els; el++)
|
|
443
|
+
if (unlikely(ptr_read[el] > maxval))
|
|
444
|
+
throw std::runtime_error("Error: serialized model has values too large for the current machine's types.\n");
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
for (size_t el = 0; el < n_els; el++)
|
|
448
|
+
ptr_write[el] = (dtype)ptr_read[el];
|
|
167
449
|
}
|
|
168
|
-
|
|
450
|
+
|
|
451
|
+
template <class dtype>
|
|
452
|
+
void write_bytes(const void *ptr, const size_t n_els, char *&out) noexcept
|
|
169
453
|
{
|
|
170
|
-
|
|
454
|
+
if (n_els == 0) return;
|
|
455
|
+
memcpy(out, ptr, n_els * sizeof(dtype));
|
|
456
|
+
out += n_els * sizeof(dtype);
|
|
171
457
|
}
|
|
172
|
-
|
|
458
|
+
|
|
459
|
+
template <class dtype>
|
|
460
|
+
void write_bytes(const void *ptr, const size_t n_els, std::ostream &out)
|
|
173
461
|
{
|
|
174
|
-
|
|
462
|
+
if (n_els == 0) return;
|
|
463
|
+
out.write((char*)ptr, n_els * sizeof(dtype));
|
|
464
|
+
if (unlikely(out.bad())) throw_errno();
|
|
175
465
|
}
|
|
176
|
-
|
|
466
|
+
|
|
467
|
+
template <class dtype>
|
|
468
|
+
void write_bytes(const void *ptr, const size_t n_els, FILE *&out)
|
|
177
469
|
{
|
|
178
|
-
|
|
179
|
-
|
|
470
|
+
if (n_els == 0) return;
|
|
471
|
+
size_t n_written = fwrite(ptr, sizeof(dtype), n_els, out);
|
|
472
|
+
if (n_written != n_els || ferror(out)) throw_ferror(out);
|
|
180
473
|
}
|
|
181
|
-
|
|
474
|
+
|
|
475
|
+
template <class dtype>
|
|
476
|
+
void read_bytes(void *ptr, const size_t n_els, const char *&in) noexcept
|
|
182
477
|
{
|
|
183
|
-
|
|
478
|
+
if (n_els == 0) return;
|
|
479
|
+
memcpy(ptr, in, n_els * sizeof(dtype));
|
|
480
|
+
in += n_els * sizeof(dtype);
|
|
184
481
|
}
|
|
185
482
|
|
|
483
|
+
template <class dtype, class saved_type>
|
|
484
|
+
void read_bytes(void *ptr, const size_t n_els, const char *&in, std::vector<char> &buffer, const bool diff_endian)
|
|
485
|
+
{
|
|
486
|
+
if (std::is_same<dtype, saved_type>::value)
|
|
487
|
+
{
|
|
488
|
+
read_bytes<dtype>(ptr, n_els, in);
|
|
489
|
+
if (unlikely(diff_endian)) swap_endianness((dtype*)ptr, n_els);
|
|
490
|
+
return;
|
|
491
|
+
}
|
|
492
|
+
if (n_els == 0) return;
|
|
493
|
+
if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
|
|
494
|
+
buffer.resize((size_t)2 * n_els * sizeof(saved_type));
|
|
495
|
+
memcpy(buffer.data(), in, n_els * sizeof(saved_type));
|
|
496
|
+
in += n_els * sizeof(saved_type);
|
|
186
497
|
|
|
498
|
+
if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
|
|
499
|
+
convert_dtype<dtype, saved_type>(ptr, buffer, n_els);
|
|
500
|
+
}
|
|
187
501
|
|
|
502
|
+
template <class dtype>
|
|
503
|
+
void read_bytes(void *ptr, const size_t n_els, char *&in) noexcept
|
|
504
|
+
{
|
|
505
|
+
if (n_els == 0) return;
|
|
506
|
+
memcpy(ptr, in, n_els * sizeof(dtype));
|
|
507
|
+
in += n_els * sizeof(dtype);
|
|
508
|
+
}
|
|
188
509
|
|
|
189
|
-
|
|
510
|
+
template <class dtype, class saved_type>
|
|
511
|
+
void read_bytes(void *ptr, const size_t n_els, char *&in, std::vector<char> &buffer, const bool diff_endian)
|
|
190
512
|
{
|
|
191
|
-
|
|
513
|
+
if (std::is_same<dtype, saved_type>::value)
|
|
514
|
+
{
|
|
515
|
+
read_bytes<dtype>(ptr, n_els, in);
|
|
516
|
+
if (unlikely(diff_endian)) swap_endianness((dtype*)ptr, n_els);
|
|
517
|
+
return;
|
|
518
|
+
}
|
|
519
|
+
if (n_els == 0) return;
|
|
520
|
+
if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
|
|
521
|
+
buffer.resize((size_t)2 * n_els * sizeof(saved_type));
|
|
522
|
+
memcpy(buffer.data(), in, n_els * sizeof(saved_type));
|
|
523
|
+
in += n_els * sizeof(saved_type);
|
|
524
|
+
|
|
525
|
+
if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
|
|
526
|
+
convert_dtype<dtype, saved_type>(ptr, buffer, n_els);
|
|
192
527
|
}
|
|
193
|
-
|
|
528
|
+
|
|
529
|
+
template <class dtype>
|
|
530
|
+
void read_bytes(void *ptr, const size_t n_els, std::istream &in)
|
|
194
531
|
{
|
|
195
|
-
|
|
196
|
-
|
|
532
|
+
if (n_els == 0) return;
|
|
533
|
+
in.read((char*)ptr, n_els * sizeof(dtype));
|
|
534
|
+
if (unlikely(in.bad())) throw_errno();
|
|
197
535
|
}
|
|
198
|
-
|
|
536
|
+
|
|
537
|
+
template <class dtype, class saved_type>
|
|
538
|
+
void read_bytes(void *ptr, const size_t n_els, std::istream &in, std::vector<char> &buffer, const bool diff_endian)
|
|
199
539
|
{
|
|
200
|
-
|
|
540
|
+
if (std::is_same<dtype, saved_type>::value)
|
|
541
|
+
{
|
|
542
|
+
read_bytes<dtype>(ptr, n_els, in);
|
|
543
|
+
if (unlikely(diff_endian)) swap_endianness((dtype*)ptr, n_els);
|
|
544
|
+
return;
|
|
545
|
+
}
|
|
546
|
+
if (n_els == 0) return;
|
|
547
|
+
if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
|
|
548
|
+
buffer.resize((size_t)2 * n_els * sizeof(saved_type));
|
|
549
|
+
in.read((char*)buffer.data(), n_els * sizeof(saved_type));
|
|
550
|
+
if (unlikely(in.bad())) throw_errno();
|
|
551
|
+
|
|
552
|
+
if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
|
|
553
|
+
convert_dtype<dtype, saved_type>(ptr, buffer, n_els);
|
|
201
554
|
}
|
|
202
|
-
|
|
555
|
+
|
|
556
|
+
template <class dtype>
|
|
557
|
+
void read_bytes(void *ptr, const size_t n_els, FILE *&in)
|
|
203
558
|
{
|
|
204
|
-
|
|
559
|
+
if (n_els == 0) return;
|
|
560
|
+
if (unlikely(feof(in))) throw_feoferr();
|
|
561
|
+
size_t n_read = fread(ptr, sizeof(dtype), n_els, in);
|
|
562
|
+
if (unlikely(n_read != n_els || ferror(in))) throw_ferror(in);
|
|
205
563
|
}
|
|
206
|
-
|
|
564
|
+
|
|
565
|
+
template <class dtype, class saved_type>
|
|
566
|
+
void read_bytes(void *ptr, const size_t n_els, FILE *&in, std::vector<char> &buffer, const bool diff_endian)
|
|
207
567
|
{
|
|
208
|
-
std::
|
|
209
|
-
|
|
568
|
+
if (std::is_same<dtype, saved_type>::value)
|
|
569
|
+
{
|
|
570
|
+
read_bytes<dtype>(ptr, n_els, in);
|
|
571
|
+
if (unlikely(diff_endian)) swap_endianness((dtype*)ptr, n_els);
|
|
572
|
+
return;
|
|
573
|
+
}
|
|
574
|
+
if (n_els == 0) return;
|
|
575
|
+
if (unlikely(feof(in))) throw_feoferr();
|
|
576
|
+
if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
|
|
577
|
+
buffer.resize((size_t)2 * n_els * sizeof(saved_type));
|
|
578
|
+
size_t n_read = fread(buffer.data(), sizeof(saved_type), n_els, in);
|
|
579
|
+
if (unlikely(n_read != n_els || ferror(in))) throw_ferror(in);
|
|
580
|
+
|
|
581
|
+
if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
|
|
582
|
+
convert_dtype<dtype, saved_type>(ptr, buffer, n_els);
|
|
210
583
|
}
|
|
211
|
-
|
|
584
|
+
|
|
585
|
+
template <class dtype>
|
|
586
|
+
void read_bytes(std::vector<dtype> &vec, const size_t n_els, const char *&in)
|
|
212
587
|
{
|
|
213
|
-
|
|
588
|
+
if (n_els)
|
|
589
|
+
vec.assign((dtype*)in, (dtype*)in + n_els);
|
|
590
|
+
else
|
|
591
|
+
vec.clear();
|
|
592
|
+
vec.shrink_to_fit();
|
|
593
|
+
in += n_els * sizeof(dtype);
|
|
214
594
|
}
|
|
215
595
|
|
|
596
|
+
template <class dtype, class saved_type>
|
|
597
|
+
void read_bytes(std::vector<dtype> &vec, const size_t n_els, const char *&in, std::vector<char> &buffer, const bool diff_endian)
|
|
598
|
+
{
|
|
599
|
+
if (std::is_same<dtype, saved_type>::value)
|
|
600
|
+
{
|
|
601
|
+
read_bytes<dtype>(vec, n_els, in);
|
|
602
|
+
if (unlikely(diff_endian)) swap_endianness(vec.data(), n_els);
|
|
603
|
+
return;
|
|
604
|
+
}
|
|
605
|
+
if (n_els) {
|
|
606
|
+
if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
|
|
607
|
+
buffer.resize((size_t)2 * n_els * sizeof(saved_type));
|
|
608
|
+
read_bytes<saved_type>(buffer.data(), n_els, in);
|
|
609
|
+
vec.resize(n_els);
|
|
610
|
+
vec.shrink_to_fit();
|
|
611
|
+
|
|
612
|
+
if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
|
|
613
|
+
convert_dtype<dtype, saved_type>(vec.data(), buffer, n_els);
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
else {
|
|
617
|
+
vec.clear();
|
|
618
|
+
vec.shrink_to_fit();
|
|
619
|
+
}
|
|
216
620
|
|
|
217
|
-
|
|
218
|
-
|
|
621
|
+
in += n_els * sizeof(saved_type);
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
template <class dtype>
|
|
625
|
+
void read_bytes(std::vector<dtype> &vec, const size_t n_els, std::istream &in)
|
|
219
626
|
{
|
|
220
|
-
|
|
221
|
-
|
|
627
|
+
vec.resize(n_els);
|
|
628
|
+
vec.shrink_to_fit();
|
|
629
|
+
|
|
630
|
+
if (n_els) {
|
|
631
|
+
in.read((char*)vec.data(), n_els * sizeof(dtype));
|
|
632
|
+
if (unlikely(in.bad())) throw_errno();
|
|
633
|
+
}
|
|
222
634
|
}
|
|
223
|
-
|
|
635
|
+
|
|
636
|
+
template <class dtype, class saved_type>
|
|
637
|
+
void read_bytes(std::vector<dtype> &vec, const size_t n_els, std::istream &in, std::vector<char> &buffer, const bool diff_endian)
|
|
224
638
|
{
|
|
225
|
-
std::
|
|
226
|
-
|
|
639
|
+
if (std::is_same<dtype, saved_type>::value)
|
|
640
|
+
{
|
|
641
|
+
read_bytes<dtype>(vec, n_els, in);
|
|
642
|
+
if (unlikely(diff_endian)) swap_endianness(vec.data(), n_els);
|
|
643
|
+
return;
|
|
644
|
+
}
|
|
645
|
+
vec.resize(n_els);
|
|
646
|
+
vec.shrink_to_fit();
|
|
647
|
+
|
|
648
|
+
if (n_els) {
|
|
649
|
+
if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
|
|
650
|
+
buffer.resize((size_t)2 * n_els * sizeof(saved_type));
|
|
651
|
+
in.read(buffer.data(), n_els * sizeof(saved_type));
|
|
652
|
+
if (unlikely(in.bad())) throw_errno();
|
|
653
|
+
|
|
654
|
+
if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
|
|
655
|
+
convert_dtype<dtype, saved_type>(vec.data(), buffer, n_els);
|
|
656
|
+
}
|
|
227
657
|
}
|
|
228
|
-
|
|
658
|
+
|
|
659
|
+
template <class dtype>
|
|
660
|
+
void read_bytes(std::vector<dtype> &vec, const size_t n_els, FILE *&in)
|
|
229
661
|
{
|
|
230
|
-
|
|
231
|
-
|
|
662
|
+
vec.resize(n_els);
|
|
663
|
+
vec.shrink_to_fit();
|
|
664
|
+
|
|
665
|
+
if (n_els) {
|
|
666
|
+
if (unlikely(feof(in))) throw_feoferr();
|
|
667
|
+
size_t n_read = fread(vec.data(), sizeof(dtype), n_els, in);
|
|
668
|
+
if (unlikely(n_read != n_els || ferror(in))) throw_ferror(in);
|
|
669
|
+
}
|
|
232
670
|
}
|
|
233
|
-
|
|
671
|
+
|
|
672
|
+
template <class dtype, class saved_type>
|
|
673
|
+
void read_bytes(std::vector<dtype> &vec, const size_t n_els, FILE *&in, std::vector<char> &buffer, const bool diff_endian)
|
|
234
674
|
{
|
|
235
|
-
std::
|
|
236
|
-
|
|
675
|
+
if (std::is_same<dtype, saved_type>::value)
|
|
676
|
+
{
|
|
677
|
+
read_bytes<dtype>(vec, n_els, in);
|
|
678
|
+
if (unlikely(diff_endian)) swap_endianness(vec.data(), n_els);
|
|
679
|
+
return;
|
|
680
|
+
}
|
|
681
|
+
vec.resize(n_els);
|
|
682
|
+
vec.shrink_to_fit();
|
|
683
|
+
|
|
684
|
+
if (n_els) {
|
|
685
|
+
if (unlikely(feof(in))) throw_feoferr();
|
|
686
|
+
if (unlikely(buffer.size() < n_els * sizeof(saved_type)))
|
|
687
|
+
buffer.resize((size_t)2 * n_els * sizeof(saved_type));
|
|
688
|
+
|
|
689
|
+
size_t n_read = fread(buffer.data(), sizeof(saved_type), n_els, in);
|
|
690
|
+
if (unlikely(n_read != n_els || ferror(in))) throw_ferror(in);
|
|
691
|
+
|
|
692
|
+
if (unlikely(diff_endian)) swap_endianness((saved_type*)buffer.data(), n_els);
|
|
693
|
+
convert_dtype<dtype, saved_type>(vec.data(), buffer, n_els);
|
|
694
|
+
}
|
|
237
695
|
}
|
|
238
|
-
|
|
696
|
+
|
|
697
|
+
size_t get_size_node(const IsoTree &node) noexcept
|
|
239
698
|
{
|
|
240
|
-
|
|
241
|
-
|
|
699
|
+
size_t n_bytes = 0;
|
|
700
|
+
n_bytes += sizeof(uint8_t);
|
|
701
|
+
n_bytes += sizeof(int);
|
|
702
|
+
n_bytes += sizeof(double) * 6;
|
|
703
|
+
n_bytes += sizeof(size_t) * 4;
|
|
704
|
+
n_bytes += sizeof(signed char) * node.cat_split.size();
|
|
705
|
+
return n_bytes;
|
|
242
706
|
}
|
|
243
|
-
|
|
707
|
+
|
|
708
|
+
template <class otype>
|
|
709
|
+
void serialize_node(const IsoTree &node, otype &out)
|
|
244
710
|
{
|
|
245
|
-
|
|
246
|
-
|
|
711
|
+
if (interrupt_switch) return;
|
|
712
|
+
|
|
713
|
+
uint8_t data_en = (uint8_t)node.col_type;
|
|
714
|
+
write_bytes<uint8_t>((void*)&data_en, (size_t)1, out);
|
|
715
|
+
|
|
716
|
+
write_bytes<int>((void*)&node.chosen_cat, (size_t)1, out);
|
|
717
|
+
|
|
718
|
+
double data_doubles[] = {
|
|
719
|
+
node.num_split,
|
|
720
|
+
node.pct_tree_left,
|
|
721
|
+
node.score,
|
|
722
|
+
node.range_low,
|
|
723
|
+
node.range_high,
|
|
724
|
+
node.remainder
|
|
725
|
+
};
|
|
726
|
+
write_bytes<double>((void*)data_doubles, (size_t)6, out);
|
|
727
|
+
|
|
728
|
+
size_t data_sizets[] = {
|
|
729
|
+
node.col_num,
|
|
730
|
+
node.tree_left,
|
|
731
|
+
node.tree_right,
|
|
732
|
+
node.cat_split.size()
|
|
733
|
+
};
|
|
734
|
+
write_bytes<size_t>((void*)data_sizets, (size_t)4, out);
|
|
735
|
+
|
|
736
|
+
if (node.cat_split.size())
|
|
737
|
+
write_bytes<signed char>((void*)node.cat_split.data(), node.cat_split.size(), out);
|
|
247
738
|
}
|
|
248
|
-
|
|
739
|
+
|
|
740
|
+
template <class itype>
|
|
741
|
+
void deserialize_node(IsoTree &node, itype &in)
|
|
249
742
|
{
|
|
250
|
-
return
|
|
743
|
+
if (interrupt_switch) return;
|
|
744
|
+
|
|
745
|
+
uint8_t data_en;
|
|
746
|
+
read_bytes<uint8_t>((void*)&data_en, (size_t)1, in);
|
|
747
|
+
node.col_type = (ColType)data_en;
|
|
748
|
+
|
|
749
|
+
read_bytes<int>((void*)&node.chosen_cat, (size_t)1, in);
|
|
750
|
+
|
|
751
|
+
double data_doubles[6];
|
|
752
|
+
read_bytes<double>((void*)data_doubles, (size_t)6, in);
|
|
753
|
+
node.num_split = data_doubles[0];
|
|
754
|
+
node.pct_tree_left = data_doubles[1];
|
|
755
|
+
node.score = data_doubles[2];
|
|
756
|
+
node.range_low = data_doubles[3];
|
|
757
|
+
node.range_high = data_doubles[4];
|
|
758
|
+
node.remainder = data_doubles[5];
|
|
759
|
+
|
|
760
|
+
size_t data_sizets[4];
|
|
761
|
+
read_bytes<size_t>((void*)data_sizets, (size_t)4, in);
|
|
762
|
+
node.col_num = data_sizets[0];
|
|
763
|
+
node.tree_left = data_sizets[1];
|
|
764
|
+
node.tree_right = data_sizets[2];
|
|
765
|
+
read_bytes<signed char>(node.cat_split, data_sizets[3], in);
|
|
251
766
|
}
|
|
252
767
|
|
|
253
|
-
|
|
254
|
-
bool
|
|
768
|
+
template <class itype, class saved_int_t, class saved_size_t>
|
|
769
|
+
void deserialize_node(IsoTree &node, itype &in, std::vector<char> &buffer, const bool diff_endian)
|
|
255
770
|
{
|
|
256
|
-
return
|
|
771
|
+
if (interrupt_switch) return;
|
|
772
|
+
|
|
773
|
+
uint8_t data_en;
|
|
774
|
+
read_bytes<uint8_t>((void*)&data_en, (size_t)1, in);
|
|
775
|
+
node.col_type = (ColType)data_en;
|
|
776
|
+
|
|
777
|
+
read_bytes<int, saved_int_t>((void*)&node.chosen_cat, (size_t)1, in, buffer, diff_endian);
|
|
778
|
+
|
|
779
|
+
double data_doubles[6];
|
|
780
|
+
read_bytes<double, double>((void*)data_doubles, (size_t)6, in, buffer, diff_endian);
|
|
781
|
+
node.num_split = data_doubles[0];
|
|
782
|
+
node.pct_tree_left = data_doubles[1];
|
|
783
|
+
node.score = data_doubles[2];
|
|
784
|
+
node.range_low = data_doubles[3];
|
|
785
|
+
node.range_high = data_doubles[4];
|
|
786
|
+
node.remainder = data_doubles[5];
|
|
787
|
+
|
|
788
|
+
size_t data_sizets[4];
|
|
789
|
+
read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)4, in, buffer, diff_endian);
|
|
790
|
+
node.col_num = data_sizets[0];
|
|
791
|
+
node.tree_left = data_sizets[1];
|
|
792
|
+
node.tree_right = data_sizets[2];
|
|
793
|
+
read_bytes<signed char, signed char>(node.cat_split, data_sizets[3], in, buffer, diff_endian);
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
size_t get_size_node(const IsoHPlane &node) noexcept
|
|
797
|
+
{
|
|
798
|
+
size_t n_bytes = 0;
|
|
799
|
+
n_bytes += sizeof(double) * 5;
|
|
800
|
+
n_bytes += sizeof(size_t) * 10;
|
|
801
|
+
n_bytes += sizeof(size_t) * node.col_num.size();
|
|
802
|
+
if (node.col_type.size()) {
|
|
803
|
+
n_bytes += sizeof(uint8_t)*node.col_type.size();
|
|
804
|
+
}
|
|
805
|
+
n_bytes += sizeof(double)*node.coef.size();
|
|
806
|
+
n_bytes += sizeof(double)*node.mean.size();
|
|
807
|
+
if (node.cat_coef.size()) {
|
|
808
|
+
for (const auto &vec : node.cat_coef) {
|
|
809
|
+
n_bytes += sizeof(size_t);
|
|
810
|
+
n_bytes += sizeof(double) * vec.size();
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
n_bytes += sizeof(int)*node.chosen_cat.size();
|
|
814
|
+
n_bytes += sizeof(double)*node.fill_val.size();
|
|
815
|
+
n_bytes += sizeof(double)*node.fill_new.size();
|
|
816
|
+
return n_bytes;
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
template <class otype>
|
|
820
|
+
void serialize_node(const IsoHPlane &node, otype &out, std::vector<uint8_t> &buffer)
|
|
821
|
+
{
|
|
822
|
+
if (interrupt_switch) return;
|
|
823
|
+
|
|
824
|
+
double data_doubles[] = {
|
|
825
|
+
node.split_point,
|
|
826
|
+
node.score,
|
|
827
|
+
node.range_low,
|
|
828
|
+
node.range_high,
|
|
829
|
+
node.remainder
|
|
830
|
+
};
|
|
831
|
+
write_bytes<double>((void*)data_doubles, (size_t)5, out);
|
|
832
|
+
|
|
833
|
+
size_t data_sizets[] = {
|
|
834
|
+
node.hplane_left,
|
|
835
|
+
node.hplane_right,
|
|
836
|
+
node.col_num.size(),
|
|
837
|
+
node.col_type.size(),
|
|
838
|
+
node.coef.size(),
|
|
839
|
+
node.mean.size(),
|
|
840
|
+
node.cat_coef.size(),
|
|
841
|
+
node.chosen_cat.size(),
|
|
842
|
+
node.fill_val.size(),
|
|
843
|
+
node.fill_new.size()
|
|
844
|
+
};
|
|
845
|
+
write_bytes<size_t>((void*)data_sizets, (size_t)10, out);
|
|
846
|
+
|
|
847
|
+
write_bytes<size_t>((void*)node.col_num.data(), node.col_num.size(), out);
|
|
848
|
+
|
|
849
|
+
if (node.col_type.size()) {
|
|
850
|
+
if (buffer.size() < node.col_type.size())
|
|
851
|
+
buffer.resize((size_t)2 * node.col_type.size());
|
|
852
|
+
for (size_t ix = 0; ix < node.col_type.size(); ix++)
|
|
853
|
+
buffer[ix] = (uint8_t)node.col_type[ix];
|
|
854
|
+
write_bytes<uint8_t>((void*)buffer.data(), node.col_type.size(), out);
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
write_bytes<double>((void*)node.coef.data(), node.coef.size(), out);
|
|
858
|
+
|
|
859
|
+
write_bytes<double>((void*)node.mean.data(), node.mean.size(), out);
|
|
860
|
+
|
|
861
|
+
if (node.cat_coef.size()) {
|
|
862
|
+
size_t veclen;
|
|
863
|
+
for (const auto &vec : node.cat_coef) {
|
|
864
|
+
veclen = vec.size();
|
|
865
|
+
write_bytes<size_t>((void*)&veclen, (size_t)1, out);
|
|
866
|
+
write_bytes<double>((void*)vec.data(), vec.size(), out);
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
write_bytes<int>((void*)node.chosen_cat.data(), node.chosen_cat.size(), out);
|
|
871
|
+
|
|
872
|
+
write_bytes<double>((void*)node.fill_val.data(), node.fill_val.size(), out);
|
|
873
|
+
|
|
874
|
+
write_bytes<double>((void*)node.fill_new.data(), node.fill_new.size(), out);
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
template <class itype>
|
|
878
|
+
void deserialize_node(IsoHPlane &node, itype &in, std::vector<uint8_t> &buffer)
|
|
879
|
+
{
|
|
880
|
+
if (interrupt_switch) return;
|
|
881
|
+
|
|
882
|
+
double data_doubles[5];
|
|
883
|
+
read_bytes<double>((void*)data_doubles, (size_t)5, in);
|
|
884
|
+
node.split_point = data_doubles[0];
|
|
885
|
+
node.score = data_doubles[1];
|
|
886
|
+
node.range_low = data_doubles[2];
|
|
887
|
+
node.range_high = data_doubles[3];
|
|
888
|
+
node.remainder = data_doubles[4];
|
|
889
|
+
|
|
890
|
+
size_t data_sizets[10];
|
|
891
|
+
read_bytes<size_t>((void*)data_sizets, (size_t)10, in);
|
|
892
|
+
|
|
893
|
+
node.hplane_left = data_sizets[0];
|
|
894
|
+
node.hplane_right = data_sizets[1];
|
|
895
|
+
|
|
896
|
+
read_bytes<size_t>(node.col_num, data_sizets[2], in);
|
|
897
|
+
|
|
898
|
+
if (data_sizets[3]) {
|
|
899
|
+
node.col_type.resize(data_sizets[3]);
|
|
900
|
+
node.col_type.shrink_to_fit();
|
|
901
|
+
if (buffer.size() < data_sizets[3])
|
|
902
|
+
buffer.resize((size_t)2 * data_sizets[3]);
|
|
903
|
+
read_bytes<uint8_t>((void*)buffer.data(), data_sizets[3], in);
|
|
904
|
+
for (size_t ix = 0; ix < data_sizets[3]; ix++)
|
|
905
|
+
node.col_type[ix] = (ColType)buffer[ix];
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
read_bytes<double>(node.coef, data_sizets[4], in);
|
|
909
|
+
|
|
910
|
+
read_bytes<double>(node.mean, data_sizets[5], in);
|
|
911
|
+
|
|
912
|
+
if (data_sizets[6]) {
|
|
913
|
+
node.cat_coef.resize(data_sizets[6]);
|
|
914
|
+
node.cat_coef.shrink_to_fit();
|
|
915
|
+
size_t veclen;
|
|
916
|
+
for (auto &vec : node.cat_coef) {
|
|
917
|
+
read_bytes<size_t>((void*)&veclen, (size_t)1, in);
|
|
918
|
+
read_bytes<double>(vec, veclen, in);
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
read_bytes<int>(node.chosen_cat, data_sizets[7], in);
|
|
923
|
+
|
|
924
|
+
read_bytes<double>(node.fill_val, data_sizets[8], in);
|
|
925
|
+
|
|
926
|
+
read_bytes<double>(node.fill_new, data_sizets[9], in);
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
template <class itype, class saved_int_t, class saved_size_t>
|
|
930
|
+
void deserialize_node(IsoHPlane &node, itype &in, std::vector<uint8_t> &buffer, std::vector<char> &buffer2, const bool diff_endian)
|
|
931
|
+
{
|
|
932
|
+
if (interrupt_switch) return;
|
|
933
|
+
|
|
934
|
+
double data_doubles[5];
|
|
935
|
+
read_bytes<double, double>((void*)data_doubles, (size_t)5, in, buffer2, diff_endian);
|
|
936
|
+
node.split_point = data_doubles[0];
|
|
937
|
+
node.score = data_doubles[1];
|
|
938
|
+
node.range_low = data_doubles[2];
|
|
939
|
+
node.range_high = data_doubles[3];
|
|
940
|
+
node.remainder = data_doubles[4];
|
|
941
|
+
|
|
942
|
+
size_t data_sizets[10];
|
|
943
|
+
read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)10, in, buffer2, diff_endian);
|
|
944
|
+
|
|
945
|
+
node.hplane_left = data_sizets[0];
|
|
946
|
+
node.hplane_right = data_sizets[1];
|
|
947
|
+
|
|
948
|
+
read_bytes<size_t, saved_size_t>(node.col_num, data_sizets[2], in, buffer2, diff_endian);
|
|
949
|
+
|
|
950
|
+
if (data_sizets[3]) {
|
|
951
|
+
node.col_type.resize(data_sizets[3]);
|
|
952
|
+
node.col_type.shrink_to_fit();
|
|
953
|
+
if (buffer.size() < data_sizets[3])
|
|
954
|
+
buffer.resize((size_t)2 * data_sizets[3]);
|
|
955
|
+
read_bytes<uint8_t>((void*)buffer.data(), data_sizets[3], in);
|
|
956
|
+
for (size_t ix = 0; ix < data_sizets[3]; ix++)
|
|
957
|
+
node.col_type[ix] = (ColType)buffer[ix];
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
read_bytes<double, double>(node.coef, data_sizets[4], in, buffer2, diff_endian);
|
|
961
|
+
|
|
962
|
+
read_bytes<double, double>(node.mean, data_sizets[5], in, buffer2, diff_endian);
|
|
963
|
+
|
|
964
|
+
if (data_sizets[6]) {
|
|
965
|
+
node.cat_coef.resize(data_sizets[6]);
|
|
966
|
+
node.cat_coef.shrink_to_fit();
|
|
967
|
+
size_t veclen;
|
|
968
|
+
for (auto &vec : node.cat_coef) {
|
|
969
|
+
read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer2, diff_endian);
|
|
970
|
+
read_bytes<double, double>(vec, veclen, in, buffer2, diff_endian);
|
|
971
|
+
}
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
read_bytes<int, saved_int_t>(node.chosen_cat, data_sizets[7], in, buffer2, diff_endian);
|
|
975
|
+
|
|
976
|
+
read_bytes<double, double>(node.fill_val, data_sizets[8], in, buffer2, diff_endian);
|
|
977
|
+
|
|
978
|
+
read_bytes<double, double>(node.fill_new, data_sizets[9], in, buffer2, diff_endian);
|
|
979
|
+
}
|
|
980
|
+
|
|
981
|
+
size_t get_size_node(const ImputeNode &node) noexcept
|
|
982
|
+
{
|
|
983
|
+
size_t n_bytes = 0;
|
|
984
|
+
n_bytes += sizeof(size_t) * 5;
|
|
985
|
+
n_bytes += sizeof(double) * node.num_sum.size();
|
|
986
|
+
n_bytes += sizeof(double) * node.num_weight.size();
|
|
987
|
+
if (node.cat_sum.size()) {
|
|
988
|
+
for (const auto &v : node.cat_sum) {
|
|
989
|
+
n_bytes += sizeof(size_t);
|
|
990
|
+
n_bytes += sizeof(double) * v.size();
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
n_bytes += sizeof(double) * node.cat_weight.size();
|
|
994
|
+
return n_bytes;
|
|
257
995
|
}
|
|
258
996
|
|
|
259
|
-
|
|
997
|
+
template <class otype>
|
|
998
|
+
void serialize_node(const ImputeNode &node, otype &out)
|
|
999
|
+
{
|
|
1000
|
+
if (interrupt_switch) return;
|
|
1001
|
+
|
|
1002
|
+
size_t data_sizets[] = {
|
|
1003
|
+
node.parent,
|
|
1004
|
+
node.num_sum.size(),
|
|
1005
|
+
node.num_weight.size(),
|
|
1006
|
+
node.cat_sum.size(),
|
|
1007
|
+
node.cat_weight.size(),
|
|
1008
|
+
};
|
|
1009
|
+
write_bytes<size_t>((void*)data_sizets, (size_t)5, out);
|
|
1010
|
+
|
|
1011
|
+
write_bytes<double>((void*)node.num_sum.data(), node.num_sum.size(), out);
|
|
1012
|
+
|
|
1013
|
+
write_bytes<double>((void*)node.num_weight.data(), node.num_weight.size(), out);
|
|
1014
|
+
|
|
1015
|
+
if (node.cat_sum.size()) {
|
|
1016
|
+
size_t veclen;
|
|
1017
|
+
for (const auto &v : node.cat_sum) {
|
|
1018
|
+
veclen = v.size();
|
|
1019
|
+
write_bytes<size_t>((void*)&veclen, (size_t)1, out);
|
|
1020
|
+
write_bytes<double>((void*)v.data(), veclen, out);
|
|
1021
|
+
}
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
write_bytes<double>((void*)node.cat_weight.data(), node.cat_weight.size(), out);
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
template <class itype>
|
|
1028
|
+
void deserialize_node(ImputeNode &node, itype &in)
|
|
1029
|
+
{
|
|
1030
|
+
if (interrupt_switch) return;
|
|
1031
|
+
|
|
1032
|
+
size_t data_sizets[5];
|
|
1033
|
+
read_bytes<size_t>((void*)data_sizets, (size_t)5, in);
|
|
1034
|
+
node.parent = data_sizets[0];
|
|
1035
|
+
|
|
1036
|
+
read_bytes<double>(node.num_sum, data_sizets[1], in);
|
|
1037
|
+
|
|
1038
|
+
read_bytes<double>(node.num_weight, data_sizets[2], in);
|
|
1039
|
+
|
|
1040
|
+
node.cat_sum.resize(data_sizets[3]);
|
|
1041
|
+
if (data_sizets[3]) {
|
|
1042
|
+
size_t veclen;
|
|
1043
|
+
for (auto &v : node.cat_sum) {
|
|
1044
|
+
read_bytes<size_t>((void*)&veclen, (size_t)1, in);
|
|
1045
|
+
read_bytes<double>(v, veclen, in);
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
node.cat_sum.shrink_to_fit();
|
|
1049
|
+
|
|
1050
|
+
read_bytes<double>(node.cat_weight, data_sizets[4], in);
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
template <class itype, class saved_int_t, class saved_size_t>
|
|
1054
|
+
void deserialize_node(ImputeNode &node, itype &in, std::vector<char> &buffer, const bool diff_endian)
|
|
1055
|
+
{
|
|
1056
|
+
if (interrupt_switch) return;
|
|
1057
|
+
|
|
1058
|
+
size_t data_sizets[5];
|
|
1059
|
+
read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)5, in, buffer, diff_endian);
|
|
1060
|
+
node.parent = data_sizets[0];
|
|
1061
|
+
|
|
1062
|
+
read_bytes<double, double>(node.num_sum, data_sizets[1], in, buffer, diff_endian);
|
|
1063
|
+
|
|
1064
|
+
read_bytes<double, double>(node.num_weight, data_sizets[2], in, buffer, diff_endian);
|
|
260
1065
|
|
|
1066
|
+
node.cat_sum.resize(data_sizets[3]);
|
|
1067
|
+
if (data_sizets[3]) {
|
|
1068
|
+
size_t veclen;
|
|
1069
|
+
for (auto &v : node.cat_sum) {
|
|
1070
|
+
read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer, diff_endian);
|
|
1071
|
+
read_bytes<double, double>(v, veclen, in, buffer, diff_endian);
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
node.cat_sum.shrink_to_fit();
|
|
1075
|
+
|
|
1076
|
+
read_bytes<double, double>(node.cat_weight, data_sizets[4], in, buffer, diff_endian);
|
|
1077
|
+
}
|
|
261
1078
|
|
|
262
|
-
|
|
1079
|
+
size_t get_size_node(const SingleTreeIndex &node) noexcept
|
|
1080
|
+
{
|
|
1081
|
+
size_t n_bytes = 0;
|
|
1082
|
+
n_bytes += sizeof(size_t);
|
|
1083
|
+
n_bytes += node.terminal_node_mappings.size() * sizeof(size_t);
|
|
1084
|
+
n_bytes += sizeof(size_t);
|
|
1085
|
+
n_bytes += node.node_distances.size() * sizeof(double);
|
|
1086
|
+
n_bytes += sizeof(size_t);
|
|
1087
|
+
n_bytes += node.node_depths.size() * sizeof(double);
|
|
1088
|
+
n_bytes += sizeof(size_t);
|
|
1089
|
+
n_bytes += node.reference_points.size() * sizeof(size_t);
|
|
1090
|
+
n_bytes += sizeof(size_t);
|
|
1091
|
+
n_bytes += node.reference_indptr.size() * sizeof(size_t);
|
|
1092
|
+
n_bytes += sizeof(size_t);
|
|
1093
|
+
n_bytes += node.reference_mapping.size() * sizeof(size_t);
|
|
1094
|
+
n_bytes += sizeof(size_t);
|
|
1095
|
+
return n_bytes;
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
template <class otype>
|
|
1099
|
+
void serialize_node(const SingleTreeIndex &node, otype &out)
|
|
1100
|
+
{
|
|
1101
|
+
if (interrupt_switch) return;
|
|
1102
|
+
|
|
1103
|
+
size_t vec_size = node.terminal_node_mappings.size();
|
|
1104
|
+
write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
|
|
1105
|
+
if (vec_size)
|
|
1106
|
+
write_bytes<size_t>((void*)node.terminal_node_mappings.data(), vec_size, out);
|
|
1107
|
+
|
|
1108
|
+
vec_size = node.node_distances.size();
|
|
1109
|
+
write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
|
|
1110
|
+
if (vec_size)
|
|
1111
|
+
write_bytes<double>((void*)node.node_distances.data(), vec_size, out);
|
|
1112
|
+
|
|
1113
|
+
vec_size = node.node_depths.size();
|
|
1114
|
+
write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
|
|
1115
|
+
if (vec_size)
|
|
1116
|
+
write_bytes<double>((void*)node.node_depths.data(), vec_size, out);
|
|
1117
|
+
|
|
1118
|
+
vec_size = node.reference_points.size();
|
|
1119
|
+
write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
|
|
1120
|
+
if (vec_size)
|
|
1121
|
+
write_bytes<size_t>((void*)node.reference_points.data(), vec_size, out);
|
|
1122
|
+
|
|
1123
|
+
vec_size = node.reference_indptr.size();
|
|
1124
|
+
write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
|
|
1125
|
+
if (vec_size)
|
|
1126
|
+
write_bytes<size_t>((void*)node.reference_indptr.data(), vec_size, out);
|
|
1127
|
+
|
|
1128
|
+
vec_size = node.reference_mapping.size();
|
|
1129
|
+
write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
|
|
1130
|
+
if (vec_size)
|
|
1131
|
+
write_bytes<size_t>((void*)node.reference_mapping.data(), vec_size, out);
|
|
1132
|
+
|
|
1133
|
+
vec_size = node.n_terminal;
|
|
1134
|
+
write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
|
|
1135
|
+
}
|
|
1136
|
+
|
|
1137
|
+
template <class itype>
|
|
1138
|
+
void deserialize_node(SingleTreeIndex &node, itype &in)
|
|
1139
|
+
{
|
|
1140
|
+
if (interrupt_switch) return;
|
|
1141
|
+
|
|
1142
|
+
size_t vec_size;
|
|
1143
|
+
read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
|
|
1144
|
+
read_bytes<size_t>(node.terminal_node_mappings, vec_size, in);
|
|
1145
|
+
|
|
1146
|
+
read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
|
|
1147
|
+
read_bytes<double>(node.node_distances, vec_size, in);
|
|
1148
|
+
|
|
1149
|
+
read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
|
|
1150
|
+
read_bytes<double>(node.node_depths, vec_size, in);
|
|
1151
|
+
|
|
1152
|
+
read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
|
|
1153
|
+
read_bytes<size_t>(node.reference_points, vec_size, in);
|
|
1154
|
+
|
|
1155
|
+
read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
|
|
1156
|
+
read_bytes<size_t>(node.reference_indptr, vec_size, in);
|
|
1157
|
+
|
|
1158
|
+
read_bytes<size_t>((void*)&vec_size, (size_t)1, in);
|
|
1159
|
+
read_bytes<size_t>(node.reference_mapping, vec_size, in);
|
|
1160
|
+
|
|
1161
|
+
read_bytes<size_t>((void*)&node.n_terminal, (size_t)1, in);
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
template <class itype, class saved_int_t, class saved_size_t>
|
|
1165
|
+
void deserialize_node(SingleTreeIndex &node, itype &in, std::vector<char> &buffer, const bool diff_endian)
|
|
1166
|
+
{
|
|
1167
|
+
if (interrupt_switch) return;
|
|
1168
|
+
|
|
1169
|
+
size_t vec_size;
|
|
1170
|
+
read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
|
|
1171
|
+
read_bytes<size_t, saved_size_t>(node.terminal_node_mappings, vec_size, in, buffer, diff_endian);
|
|
1172
|
+
|
|
1173
|
+
read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
|
|
1174
|
+
read_bytes<double, double>(node.node_distances, vec_size, in, buffer, diff_endian);
|
|
1175
|
+
|
|
1176
|
+
read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
|
|
1177
|
+
read_bytes<double, double>(node.node_depths, vec_size, in, buffer, diff_endian);
|
|
1178
|
+
|
|
1179
|
+
read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
|
|
1180
|
+
read_bytes<size_t, saved_size_t>(node.reference_points, vec_size, in, buffer, diff_endian);
|
|
1181
|
+
|
|
1182
|
+
read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
|
|
1183
|
+
read_bytes<size_t, saved_size_t>(node.reference_indptr, vec_size, in, buffer, diff_endian);
|
|
1184
|
+
|
|
1185
|
+
read_bytes<size_t, saved_size_t>((void*)&vec_size, (size_t)1, in, buffer, diff_endian);
|
|
1186
|
+
read_bytes<size_t, saved_size_t>(node.reference_mapping, vec_size, in, buffer, diff_endian);
|
|
1187
|
+
|
|
1188
|
+
read_bytes<size_t, saved_size_t>((void*)&node.n_terminal, (size_t)1, in, buffer, diff_endian);
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
size_t get_size_model(const IsoForest &model) noexcept
|
|
1192
|
+
{
|
|
1193
|
+
size_t n_bytes = 0;
|
|
1194
|
+
n_bytes += sizeof(uint8_t) * 5;
|
|
1195
|
+
n_bytes += sizeof(double) * 2;
|
|
1196
|
+
n_bytes += sizeof(size_t) * 2;
|
|
1197
|
+
for (const auto &tree : model.trees) {
|
|
1198
|
+
n_bytes += sizeof(size_t);
|
|
1199
|
+
for (const auto &node : tree)
|
|
1200
|
+
n_bytes += get_size_node(node);
|
|
1201
|
+
}
|
|
1202
|
+
return n_bytes;
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
template <class otype>
|
|
1206
|
+
void serialize_model(const IsoForest &model, otype &out)
|
|
1207
|
+
{
|
|
1208
|
+
if (interrupt_switch) return;
|
|
1209
|
+
|
|
1210
|
+
uint8_t data_en[] = {
|
|
1211
|
+
(uint8_t)model.new_cat_action,
|
|
1212
|
+
(uint8_t)model.cat_split_type,
|
|
1213
|
+
(uint8_t)model.missing_action,
|
|
1214
|
+
(uint8_t)model.has_range_penalty,
|
|
1215
|
+
(uint8_t)model.scoring_metric,
|
|
1216
|
+
};
|
|
1217
|
+
write_bytes<uint8_t>((void*)data_en, (size_t)5, out);
|
|
1218
|
+
|
|
1219
|
+
double data_doubles[] = {
|
|
1220
|
+
model.exp_avg_depth,
|
|
1221
|
+
model.exp_avg_sep
|
|
1222
|
+
};
|
|
1223
|
+
write_bytes<double>((void*)data_doubles, (size_t)2, out);
|
|
1224
|
+
|
|
1225
|
+
size_t data_sizets[] = {
|
|
1226
|
+
model.orig_sample_size,
|
|
1227
|
+
model.trees.size()
|
|
1228
|
+
};
|
|
1229
|
+
write_bytes<size_t>((void*)data_sizets, (size_t)2, out);
|
|
1230
|
+
|
|
1231
|
+
size_t veclen;
|
|
1232
|
+
for (const auto &tree : model.trees) {
|
|
1233
|
+
veclen = tree.size();
|
|
1234
|
+
write_bytes<size_t>((void*)&veclen, (size_t)1, out);
|
|
1235
|
+
for (const auto &node : tree)
|
|
1236
|
+
serialize_node(node, out);
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
template <class itype>
|
|
1241
|
+
void deserialize_model(IsoForest &model, itype &in)
|
|
1242
|
+
{
|
|
1243
|
+
if (interrupt_switch) return;
|
|
1244
|
+
|
|
1245
|
+
uint8_t data_en[5];
|
|
1246
|
+
read_bytes<uint8_t>((void*)data_en, (size_t)5, in);
|
|
1247
|
+
model.new_cat_action = (NewCategAction)data_en[0];
|
|
1248
|
+
model.cat_split_type = (CategSplit)data_en[1];
|
|
1249
|
+
model.missing_action = (MissingAction)data_en[2];
|
|
1250
|
+
model.has_range_penalty = (bool)data_en[3];
|
|
1251
|
+
model.scoring_metric = (ScoringMetric)data_en[4];
|
|
1252
|
+
|
|
1253
|
+
double data_doubles[2];
|
|
1254
|
+
read_bytes<double>((void*)data_doubles, (size_t)2, in);
|
|
1255
|
+
model.exp_avg_depth = data_doubles[0];
|
|
1256
|
+
model.exp_avg_sep = data_doubles[1];
|
|
1257
|
+
|
|
1258
|
+
size_t data_sizets[2];
|
|
1259
|
+
read_bytes<size_t>((void*)data_sizets, (size_t)2, in);
|
|
1260
|
+
model.orig_sample_size = data_sizets[0];
|
|
1261
|
+
model.trees.resize(data_sizets[1]);
|
|
1262
|
+
model.trees.shrink_to_fit();
|
|
1263
|
+
|
|
1264
|
+
size_t veclen;
|
|
1265
|
+
for (auto &tree : model.trees) {
|
|
1266
|
+
read_bytes<size_t>((void*)&veclen, (size_t)1, in);
|
|
1267
|
+
tree.resize(veclen);
|
|
1268
|
+
tree.shrink_to_fit();
|
|
1269
|
+
for (auto &node : tree)
|
|
1270
|
+
deserialize_node(node, in);
|
|
1271
|
+
}
|
|
1272
|
+
}
|
|
1273
|
+
|
|
1274
|
+
template <class itype, class saved_int_t, class saved_size_t>
|
|
1275
|
+
void deserialize_model(IsoForest &model, itype &in, std::vector<char> &buffer,
|
|
1276
|
+
const bool diff_endian, const bool lacks_range_penalty,
|
|
1277
|
+
const bool lacks_scoring_metric)
|
|
1278
|
+
{
|
|
1279
|
+
if (interrupt_switch) return;
|
|
1280
|
+
|
|
1281
|
+
if (lacks_range_penalty)
|
|
1282
|
+
{
|
|
1283
|
+
uint8_t data_en[3];
|
|
1284
|
+
read_bytes<uint8_t>((void*)data_en, (size_t)3, in);
|
|
1285
|
+
model.new_cat_action = (NewCategAction)data_en[0];
|
|
1286
|
+
model.cat_split_type = (CategSplit)data_en[1];
|
|
1287
|
+
model.missing_action = (MissingAction)data_en[2];
|
|
1288
|
+
}
|
|
1289
|
+
|
|
1290
|
+
else
|
|
1291
|
+
{
|
|
1292
|
+
uint8_t data_en[4];
|
|
1293
|
+
read_bytes<uint8_t>((void*)data_en, (size_t)4, in);
|
|
1294
|
+
model.new_cat_action = (NewCategAction)data_en[0];
|
|
1295
|
+
model.cat_split_type = (CategSplit)data_en[1];
|
|
1296
|
+
model.missing_action = (MissingAction)data_en[2];
|
|
1297
|
+
model.has_range_penalty = (bool)data_en[3];
|
|
1298
|
+
}
|
|
1299
|
+
|
|
1300
|
+
if (lacks_scoring_metric)
|
|
1301
|
+
{
|
|
1302
|
+
model.scoring_metric = Depth;
|
|
1303
|
+
}
|
|
1304
|
+
|
|
1305
|
+
else
|
|
1306
|
+
{
|
|
1307
|
+
uint8_t data_en;
|
|
1308
|
+
read_bytes<uint8_t>((void*)&data_en, (size_t)1, in);
|
|
1309
|
+
model.scoring_metric = (ScoringMetric)data_en;
|
|
1310
|
+
}
|
|
1311
|
+
|
|
1312
|
+
double data_doubles[2];
|
|
1313
|
+
read_bytes<double, double>((void*)data_doubles, (size_t)2, in, buffer, diff_endian);
|
|
1314
|
+
model.exp_avg_depth = data_doubles[0];
|
|
1315
|
+
model.exp_avg_sep = data_doubles[1];
|
|
1316
|
+
|
|
1317
|
+
size_t data_sizets[2];
|
|
1318
|
+
read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)2, in, buffer, diff_endian);
|
|
1319
|
+
model.orig_sample_size = data_sizets[0];
|
|
1320
|
+
model.trees.resize(data_sizets[1]);
|
|
1321
|
+
model.trees.shrink_to_fit();
|
|
1322
|
+
|
|
1323
|
+
size_t veclen;
|
|
1324
|
+
for (auto &tree : model.trees) {
|
|
1325
|
+
read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer, diff_endian);
|
|
1326
|
+
tree.resize(veclen);
|
|
1327
|
+
tree.shrink_to_fit();
|
|
1328
|
+
for (auto &node : tree)
|
|
1329
|
+
deserialize_node<itype, saved_int_t, saved_size_t>(node, in, buffer, diff_endian);
|
|
1330
|
+
}
|
|
1331
|
+
}
|
|
1332
|
+
|
|
1333
|
+
template <class otype>
|
|
1334
|
+
void serialize_additional_trees(const IsoForest &model, otype &out, size_t trees_prev)
|
|
1335
|
+
{
|
|
1336
|
+
size_t veclen;
|
|
1337
|
+
for (size_t ix = trees_prev; ix < model.trees.size(); ix++) {
|
|
1338
|
+
veclen = model.trees[ix].size();
|
|
1339
|
+
write_bytes<size_t>((void*)&veclen, (size_t)1, out);
|
|
1340
|
+
for (const auto &node : model.trees[ix])
|
|
1341
|
+
serialize_node(node, out);
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
|
|
1345
|
+
size_t determine_serialized_size_additional_trees(const IsoForest &model, size_t old_ntrees) noexcept
|
|
1346
|
+
{
|
|
1347
|
+
size_t n_bytes = 0;
|
|
1348
|
+
for (size_t ix = 0; ix < model.trees.size(); ix++) {
|
|
1349
|
+
n_bytes += sizeof(size_t);
|
|
1350
|
+
for (const auto &node : model.trees[ix])
|
|
1351
|
+
n_bytes += get_size_node(node);
|
|
1352
|
+
}
|
|
1353
|
+
return n_bytes;
|
|
1354
|
+
}
|
|
1355
|
+
|
|
1356
|
+
size_t get_size_model(const ExtIsoForest &model) noexcept
|
|
1357
|
+
{
|
|
1358
|
+
size_t n_bytes = 0;
|
|
1359
|
+
n_bytes += sizeof(uint8_t) * 5;
|
|
1360
|
+
n_bytes += sizeof(double) * 2;
|
|
1361
|
+
n_bytes += sizeof(size_t) * 2;
|
|
1362
|
+
for (const auto &tree : model.hplanes) {
|
|
1363
|
+
n_bytes += sizeof(size_t);
|
|
1364
|
+
for (const auto &node : tree)
|
|
1365
|
+
n_bytes += get_size_node(node);
|
|
1366
|
+
}
|
|
1367
|
+
return n_bytes;
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
template <class otype>
|
|
1371
|
+
void serialize_model(const ExtIsoForest &model, otype &out)
|
|
1372
|
+
{
|
|
1373
|
+
if (interrupt_switch) return;
|
|
1374
|
+
|
|
1375
|
+
uint8_t data_en[] = {
|
|
1376
|
+
(uint8_t)model.new_cat_action,
|
|
1377
|
+
(uint8_t)model.cat_split_type,
|
|
1378
|
+
(uint8_t)model.missing_action,
|
|
1379
|
+
(uint8_t)model.has_range_penalty,
|
|
1380
|
+
(uint8_t)model.scoring_metric
|
|
1381
|
+
};
|
|
1382
|
+
write_bytes<uint8_t>((void*)data_en, (size_t)5, out);
|
|
1383
|
+
|
|
1384
|
+
double data_doubles[] = {
|
|
1385
|
+
model.exp_avg_depth,
|
|
1386
|
+
model.exp_avg_sep
|
|
1387
|
+
};
|
|
1388
|
+
write_bytes<double>((void*)data_doubles, (size_t)2, out);
|
|
1389
|
+
|
|
1390
|
+
size_t data_sizets[] = {
|
|
1391
|
+
model.orig_sample_size,
|
|
1392
|
+
model.hplanes.size()
|
|
1393
|
+
};
|
|
1394
|
+
write_bytes<size_t>((void*)data_sizets, (size_t)2, out);
|
|
1395
|
+
|
|
1396
|
+
std::vector<uint8_t> buffer;
|
|
1397
|
+
size_t veclen;
|
|
1398
|
+
for (const auto &tree : model.hplanes) {
|
|
1399
|
+
veclen = tree.size();
|
|
1400
|
+
write_bytes<size_t>((void*)&veclen, (size_t)1, out);
|
|
1401
|
+
for (const auto &node : tree)
|
|
1402
|
+
serialize_node(node, out, buffer);
|
|
1403
|
+
}
|
|
1404
|
+
}
|
|
1405
|
+
|
|
1406
|
+
template <class itype>
|
|
1407
|
+
void deserialize_model(ExtIsoForest &model, itype &in)
|
|
1408
|
+
{
|
|
1409
|
+
if (interrupt_switch) return;
|
|
1410
|
+
|
|
1411
|
+
uint8_t data_en[5];
|
|
1412
|
+
read_bytes<uint8_t>((void*)data_en, (size_t)5, in);
|
|
1413
|
+
model.new_cat_action = (NewCategAction)data_en[0];
|
|
1414
|
+
model.cat_split_type = (CategSplit)data_en[1];
|
|
1415
|
+
model.missing_action = (MissingAction)data_en[2];
|
|
1416
|
+
model.has_range_penalty = (bool)data_en[3];
|
|
1417
|
+
model.scoring_metric = (ScoringMetric)data_en[4];
|
|
1418
|
+
|
|
1419
|
+
double data_doubles[2];
|
|
1420
|
+
read_bytes<double>((void*)data_doubles, (size_t)2, in);
|
|
1421
|
+
model.exp_avg_depth = data_doubles[0];
|
|
1422
|
+
model.exp_avg_sep = data_doubles[1];
|
|
1423
|
+
|
|
1424
|
+
size_t data_sizets[2];
|
|
1425
|
+
read_bytes<size_t>((void*)data_sizets, (size_t)2, in);
|
|
1426
|
+
model.orig_sample_size = data_sizets[0];
|
|
1427
|
+
model.hplanes.resize(data_sizets[1]);
|
|
1428
|
+
model.hplanes.shrink_to_fit();
|
|
1429
|
+
|
|
1430
|
+
size_t veclen;
|
|
1431
|
+
std::vector<uint8_t> buffer;
|
|
1432
|
+
for (auto &tree : model.hplanes) {
|
|
1433
|
+
read_bytes<size_t>((void*)&veclen, (size_t)1, in);
|
|
1434
|
+
tree.resize(veclen);
|
|
1435
|
+
tree.shrink_to_fit();
|
|
1436
|
+
for (auto &node : tree)
|
|
1437
|
+
deserialize_node(node, in, buffer);
|
|
1438
|
+
}
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1441
|
+
template <class itype, class saved_int_t, class saved_size_t>
|
|
1442
|
+
void deserialize_model(ExtIsoForest &model, itype &in, std::vector<char> &buffer,
|
|
1443
|
+
const bool diff_endian, const bool lacks_range_penalty,
|
|
1444
|
+
const bool lacks_scoring_metric)
|
|
1445
|
+
{
|
|
1446
|
+
if (interrupt_switch) return;
|
|
1447
|
+
|
|
1448
|
+
if (lacks_range_penalty)
|
|
1449
|
+
{
|
|
1450
|
+
uint8_t data_en[3];
|
|
1451
|
+
read_bytes<uint8_t>((void*)data_en, (size_t)3, in);
|
|
1452
|
+
model.new_cat_action = (NewCategAction)data_en[0];
|
|
1453
|
+
model.cat_split_type = (CategSplit)data_en[1];
|
|
1454
|
+
model.missing_action = (MissingAction)data_en[2];
|
|
1455
|
+
}
|
|
1456
|
+
|
|
1457
|
+
else
|
|
1458
|
+
{
|
|
1459
|
+
uint8_t data_en[4];
|
|
1460
|
+
read_bytes<uint8_t>((void*)data_en, (size_t)4, in);
|
|
1461
|
+
model.new_cat_action = (NewCategAction)data_en[0];
|
|
1462
|
+
model.cat_split_type = (CategSplit)data_en[1];
|
|
1463
|
+
model.missing_action = (MissingAction)data_en[2];
|
|
1464
|
+
model.has_range_penalty = (bool)data_en[3];
|
|
1465
|
+
}
|
|
1466
|
+
|
|
1467
|
+
if (lacks_scoring_metric)
|
|
1468
|
+
{
|
|
1469
|
+
model.scoring_metric = Depth;
|
|
1470
|
+
}
|
|
1471
|
+
|
|
1472
|
+
else
|
|
1473
|
+
{
|
|
1474
|
+
uint8_t data_en;
|
|
1475
|
+
read_bytes<uint8_t>((void*)&data_en, (size_t)1, in);
|
|
1476
|
+
model.scoring_metric = (ScoringMetric)data_en;
|
|
1477
|
+
}
|
|
1478
|
+
|
|
1479
|
+
double data_doubles[2];
|
|
1480
|
+
read_bytes<double, double>((void*)data_doubles, (size_t)2, in, buffer, diff_endian);
|
|
1481
|
+
model.exp_avg_depth = data_doubles[0];
|
|
1482
|
+
model.exp_avg_sep = data_doubles[1];
|
|
1483
|
+
|
|
1484
|
+
size_t data_sizets[2];
|
|
1485
|
+
read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)2, in, buffer, diff_endian);
|
|
1486
|
+
model.orig_sample_size = data_sizets[0];
|
|
1487
|
+
model.hplanes.resize(data_sizets[1]);
|
|
1488
|
+
model.hplanes.shrink_to_fit();
|
|
1489
|
+
|
|
1490
|
+
size_t veclen;
|
|
1491
|
+
std::vector<uint8_t> buffer_;
|
|
1492
|
+
for (auto &tree : model.hplanes) {
|
|
1493
|
+
read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer, diff_endian);
|
|
1494
|
+
tree.resize(veclen);
|
|
1495
|
+
tree.shrink_to_fit();
|
|
1496
|
+
for (auto &node : tree)
|
|
1497
|
+
deserialize_node<itype, saved_int_t, saved_size_t>(node, in, buffer_, buffer, diff_endian);
|
|
1498
|
+
}
|
|
1499
|
+
}
|
|
1500
|
+
|
|
1501
|
+
template <class otype>
|
|
1502
|
+
void serialize_additional_trees(const ExtIsoForest &model, otype &out, size_t trees_prev)
|
|
1503
|
+
{
|
|
1504
|
+
if (interrupt_switch) return;
|
|
1505
|
+
|
|
1506
|
+
std::vector<uint8_t> buffer;
|
|
1507
|
+
size_t veclen;
|
|
1508
|
+
for (size_t ix = trees_prev; ix < model.hplanes.size(); ix++) {
|
|
1509
|
+
veclen = model.hplanes[ix].size();
|
|
1510
|
+
write_bytes<size_t>((void*)&veclen, (size_t)1, out);
|
|
1511
|
+
for (const auto &node : model.hplanes[ix])
|
|
1512
|
+
serialize_node(node, out, buffer);
|
|
1513
|
+
}
|
|
1514
|
+
}
|
|
1515
|
+
|
|
1516
|
+
size_t determine_serialized_size_additional_trees(const ExtIsoForest &model, size_t old_ntrees) noexcept
|
|
1517
|
+
{
|
|
1518
|
+
size_t n_bytes = 0;
|
|
1519
|
+
for (size_t ix = 0; ix < model.hplanes.size(); ix++) {
|
|
1520
|
+
n_bytes += sizeof(size_t);
|
|
1521
|
+
for (const auto &node : model.hplanes[ix])
|
|
1522
|
+
n_bytes += get_size_node(node);
|
|
1523
|
+
}
|
|
1524
|
+
return n_bytes;
|
|
1525
|
+
}
|
|
1526
|
+
|
|
1527
|
+
size_t get_size_model(const Imputer &model) noexcept
|
|
1528
|
+
{
|
|
1529
|
+
size_t n_bytes = 0;
|
|
1530
|
+
n_bytes += sizeof(size_t) * 6;
|
|
1531
|
+
n_bytes += sizeof(int) * model.ncat.size();
|
|
1532
|
+
n_bytes += sizeof(double) * model.col_means.size();
|
|
1533
|
+
n_bytes += sizeof(int) * model.col_modes.size();
|
|
1534
|
+
for (const auto &tree : model.imputer_tree) {
|
|
1535
|
+
n_bytes += sizeof(size_t);
|
|
1536
|
+
for (const auto &node : tree)
|
|
1537
|
+
n_bytes += get_size_node(node);
|
|
1538
|
+
}
|
|
1539
|
+
return n_bytes;
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
template <class otype>
|
|
1543
|
+
void serialize_model(const Imputer &model, otype &out)
|
|
1544
|
+
{
|
|
1545
|
+
if (interrupt_switch) return;
|
|
1546
|
+
|
|
1547
|
+
size_t data_sizets[] = {
|
|
1548
|
+
model.ncols_numeric,
|
|
1549
|
+
model.ncols_categ,
|
|
1550
|
+
model.ncat.size(),
|
|
1551
|
+
model.imputer_tree.size(),
|
|
1552
|
+
model.col_means.size(),
|
|
1553
|
+
model.col_modes.size()
|
|
1554
|
+
};
|
|
1555
|
+
write_bytes<size_t>((void*)data_sizets, (size_t)6, out);
|
|
1556
|
+
|
|
1557
|
+
write_bytes<int>((void*)model.ncat.data(), model.ncat.size(), out);
|
|
1558
|
+
|
|
1559
|
+
write_bytes<double>((void*)model.col_means.data(), model.col_means.size(), out);
|
|
1560
|
+
|
|
1561
|
+
write_bytes<int>((void*)model.col_modes.data(), model.col_modes.size(), out);
|
|
1562
|
+
|
|
1563
|
+
size_t veclen;
|
|
1564
|
+
for (const auto &tree : model.imputer_tree) {
|
|
1565
|
+
veclen = tree.size();
|
|
1566
|
+
write_bytes<size_t>((void*)&veclen, (size_t)1, out);
|
|
1567
|
+
for (const auto &node : tree)
|
|
1568
|
+
serialize_node(node, out);
|
|
1569
|
+
}
|
|
1570
|
+
}
|
|
1571
|
+
|
|
1572
|
+
template <class itype>
|
|
1573
|
+
void deserialize_model(Imputer &model, itype &in)
|
|
1574
|
+
{
|
|
1575
|
+
if (interrupt_switch) return;
|
|
1576
|
+
|
|
1577
|
+
size_t data_sizets[6];
|
|
1578
|
+
read_bytes<size_t>((void*)data_sizets, (size_t)6, in);
|
|
1579
|
+
model.ncols_numeric = data_sizets[0];
|
|
1580
|
+
model.ncols_categ = data_sizets[1];
|
|
1581
|
+
model.ncat.resize(data_sizets[2]);
|
|
1582
|
+
model.imputer_tree.resize(data_sizets[3]);
|
|
1583
|
+
model.col_means.resize(data_sizets[4]);
|
|
1584
|
+
model.col_modes.resize(data_sizets[5]);
|
|
1585
|
+
|
|
1586
|
+
model.ncat.shrink_to_fit();
|
|
1587
|
+
model.imputer_tree.shrink_to_fit();
|
|
1588
|
+
model.col_means.shrink_to_fit();
|
|
1589
|
+
model.col_modes.shrink_to_fit();
|
|
1590
|
+
|
|
1591
|
+
read_bytes<int>(model.ncat, model.ncat.size(), in);
|
|
1592
|
+
|
|
1593
|
+
read_bytes<double>(model.col_means, model.col_means.size(), in);
|
|
1594
|
+
|
|
1595
|
+
read_bytes<int>(model.col_modes, model.col_modes.size(), in);
|
|
1596
|
+
|
|
1597
|
+
size_t veclen;
|
|
1598
|
+
for (auto &tree : model.imputer_tree) {
|
|
1599
|
+
read_bytes<size_t>((void*)&veclen, (size_t)1, in);
|
|
1600
|
+
tree.resize(veclen);
|
|
1601
|
+
tree.shrink_to_fit();
|
|
1602
|
+
for (auto &node : tree)
|
|
1603
|
+
deserialize_node(node, in);
|
|
1604
|
+
}
|
|
1605
|
+
}
|
|
1606
|
+
|
|
1607
|
+
template <class itype, class saved_int_t, class saved_size_t>
|
|
1608
|
+
void deserialize_model(Imputer &model, itype &in, std::vector<char> &buffer,
|
|
1609
|
+
const bool diff_endian, const bool lacks_range_penalty,
|
|
1610
|
+
const bool lacks_scoring_metric)
|
|
1611
|
+
{
|
|
1612
|
+
if (interrupt_switch) return;
|
|
1613
|
+
|
|
1614
|
+
size_t data_sizets[6];
|
|
1615
|
+
read_bytes<size_t, saved_size_t>((void*)data_sizets, (size_t)6, in, buffer, diff_endian);
|
|
1616
|
+
model.ncols_numeric = data_sizets[0];
|
|
1617
|
+
model.ncols_categ = data_sizets[1];
|
|
1618
|
+
model.ncat.resize(data_sizets[2]);
|
|
1619
|
+
model.imputer_tree.resize(data_sizets[3]);
|
|
1620
|
+
model.col_means.resize(data_sizets[4]);
|
|
1621
|
+
model.col_modes.resize(data_sizets[5]);
|
|
1622
|
+
|
|
1623
|
+
model.ncat.shrink_to_fit();
|
|
1624
|
+
model.imputer_tree.shrink_to_fit();
|
|
1625
|
+
model.col_means.shrink_to_fit();
|
|
1626
|
+
model.col_modes.shrink_to_fit();
|
|
1627
|
+
|
|
1628
|
+
read_bytes<int, saved_int_t>(model.ncat, model.ncat.size(), in, buffer, diff_endian);
|
|
1629
|
+
|
|
1630
|
+
read_bytes<double, double>(model.col_means, model.col_means.size(), in, buffer, diff_endian);
|
|
1631
|
+
|
|
1632
|
+
read_bytes<int, saved_int_t>(model.col_modes, model.col_modes.size(), in, buffer, diff_endian);
|
|
1633
|
+
|
|
1634
|
+
size_t veclen;
|
|
1635
|
+
for (auto &tree : model.imputer_tree) {
|
|
1636
|
+
read_bytes<size_t, saved_size_t>((void*)&veclen, (size_t)1, in, buffer, diff_endian);
|
|
1637
|
+
tree.resize(veclen);
|
|
1638
|
+
tree.shrink_to_fit();
|
|
1639
|
+
for (auto &node : tree)
|
|
1640
|
+
deserialize_node<itype, saved_int_t, saved_size_t>(node, in, buffer, diff_endian);
|
|
1641
|
+
}
|
|
1642
|
+
}
|
|
1643
|
+
|
|
1644
|
+
template <class otype>
|
|
1645
|
+
void serialize_additional_trees(const Imputer &model, otype &out, size_t trees_prev)
|
|
1646
|
+
{
|
|
1647
|
+
size_t veclen;
|
|
1648
|
+
for (size_t ix = trees_prev; ix < model.imputer_tree.size(); ix++) {
|
|
1649
|
+
veclen = model.imputer_tree[ix].size();
|
|
1650
|
+
write_bytes<size_t>((void*)&veclen, (size_t)1, out);
|
|
1651
|
+
for (const auto &node : model.imputer_tree[ix])
|
|
1652
|
+
serialize_node(node, out);
|
|
1653
|
+
}
|
|
1654
|
+
}
|
|
1655
|
+
|
|
1656
|
+
size_t determine_serialized_size_additional_trees(const Imputer &model, size_t old_ntrees) noexcept
|
|
1657
|
+
{
|
|
1658
|
+
size_t n_bytes = 0;
|
|
1659
|
+
for (size_t ix = 0; ix < model.imputer_tree.size(); ix++) {
|
|
1660
|
+
n_bytes += sizeof(size_t);
|
|
1661
|
+
for (const auto &node : model.imputer_tree[ix])
|
|
1662
|
+
n_bytes += get_size_node(node);
|
|
1663
|
+
}
|
|
1664
|
+
return n_bytes;
|
|
1665
|
+
}
|
|
1666
|
+
|
|
1667
|
+
size_t get_size_model(const TreesIndexer &model) noexcept
|
|
1668
|
+
{
|
|
1669
|
+
size_t n_bytes = 0;
|
|
1670
|
+
n_bytes += sizeof(size_t);
|
|
1671
|
+
for (const auto &node : model.indices)
|
|
1672
|
+
n_bytes += get_size_node(node);
|
|
1673
|
+
return n_bytes;
|
|
1674
|
+
}
|
|
1675
|
+
|
|
1676
|
+
template <class otype>
|
|
1677
|
+
void serialize_model(const TreesIndexer &model, otype &out)
|
|
1678
|
+
{
|
|
1679
|
+
if (interrupt_switch) return;
|
|
1680
|
+
|
|
1681
|
+
size_t vec_size = model.indices.size();
|
|
1682
|
+
write_bytes<size_t>((void*)&vec_size, (size_t)1, out);
|
|
1683
|
+
|
|
1684
|
+
for (const auto &tree : model.indices)
|
|
1685
|
+
serialize_node(tree, out);
|
|
1686
|
+
}
|
|
1687
|
+
|
|
1688
|
+
template <class itype>
|
|
1689
|
+
void deserialize_model(TreesIndexer &model, itype &in)
|
|
1690
|
+
{
|
|
1691
|
+
if (interrupt_switch) return;
|
|
1692
|
+
|
|
1693
|
+
size_t vec_size;
|
|
1694
|
+
read_bytes<size_t>(&vec_size, (size_t)1, in);
|
|
1695
|
+
model.indices.resize(vec_size);
|
|
1696
|
+
model.indices.shrink_to_fit();
|
|
1697
|
+
for (auto &tree : model.indices)
|
|
1698
|
+
deserialize_node(tree, in);
|
|
1699
|
+
}
|
|
1700
|
+
|
|
1701
|
+
template <class itype, class saved_int_t, class saved_size_t>
|
|
1702
|
+
void deserialize_model(TreesIndexer &model, itype &in, std::vector<char> &buffer,
|
|
1703
|
+
const bool diff_endian, const bool lacks_range_penalty,
|
|
1704
|
+
const bool lacks_scoring_metric)
|
|
1705
|
+
{
|
|
1706
|
+
if (interrupt_switch) return;
|
|
1707
|
+
|
|
1708
|
+
size_t vec_size;
|
|
1709
|
+
read_bytes<size_t, saved_size_t>(&vec_size, (size_t)1, in, buffer, diff_endian);
|
|
1710
|
+
model.indices.resize(vec_size);
|
|
1711
|
+
model.indices.shrink_to_fit();
|
|
1712
|
+
for (auto &tree : model.indices)
|
|
1713
|
+
deserialize_node<itype, saved_int_t, saved_size_t>(tree, in, buffer, diff_endian);
|
|
1714
|
+
}
|
|
1715
|
+
|
|
1716
|
+
template <class otype>
|
|
1717
|
+
void serialize_additional_trees(const TreesIndexer &model, otype &out, size_t trees_prev)
|
|
1718
|
+
{
|
|
1719
|
+
for (size_t ix = trees_prev; ix < model.indices.size(); ix++)
|
|
1720
|
+
serialize_node(model.indices[ix], out);
|
|
1721
|
+
}
|
|
1722
|
+
|
|
1723
|
+
size_t determine_serialized_size_additional_trees(const TreesIndexer &model, size_t old_ntrees) noexcept
|
|
1724
|
+
{
|
|
1725
|
+
size_t n_bytes = 0;
|
|
1726
|
+
for (size_t ix = 0; ix < model.indices.size(); ix++)
|
|
1727
|
+
n_bytes += get_size_node(model.indices[ix]);
|
|
1728
|
+
return n_bytes;
|
|
1729
|
+
}
|
|
1730
|
+
|
|
1731
|
+
bool get_is_little_endian() noexcept
|
|
1732
|
+
{
|
|
1733
|
+
const int one = 1;
|
|
1734
|
+
return *((unsigned char*)&one) != 0;
|
|
1735
|
+
}
|
|
1736
|
+
|
|
1737
|
+
size_t get_size_setup_info() noexcept
|
|
1738
|
+
{
|
|
1739
|
+
size_t n_bytes = 0;
|
|
1740
|
+
n_bytes += sizeof(unsigned char) * SIZE_WATERMARK;
|
|
1741
|
+
n_bytes += sizeof(uint8_t) * 9;
|
|
1742
|
+
return n_bytes;
|
|
1743
|
+
}
|
|
1744
|
+
|
|
1745
|
+
template <class otype>
|
|
1746
|
+
void add_setup_info(otype &out, bool full_watermark)
|
|
1747
|
+
{
|
|
1748
|
+
write_bytes<unsigned char>((void*)(full_watermark? watermark: incomplete_watermark), SIZE_WATERMARK, out);
|
|
1749
|
+
/*
|
|
1750
|
+
0 : endianness
|
|
1751
|
+
1-3: isotree version
|
|
1752
|
+
4: double type
|
|
1753
|
+
5: size_t limit
|
|
1754
|
+
6: sizeof(int)
|
|
1755
|
+
7: sizeof(size_t)
|
|
1756
|
+
8: sizeof(double)
|
|
1757
|
+
*/
|
|
1758
|
+
uint8_t setup_info[] = {
|
|
1759
|
+
(uint8_t)get_is_little_endian(),
|
|
1760
|
+
(uint8_t)ISOTREE_VERSION_MAJOR,
|
|
1761
|
+
(uint8_t)ISOTREE_VERSION_MINOR,
|
|
1762
|
+
(uint8_t)ISOTREE_VERSION_PATCH,
|
|
1763
|
+
#if defined(HAS_IEEE_DOUBLE)
|
|
1764
|
+
(uint8_t)IsNormalDouble,
|
|
1765
|
+
#else
|
|
1766
|
+
(uint8_t)IsAbnormalDouble,
|
|
1767
|
+
#endif
|
|
1768
|
+
#if SIZE_MAX == UINT32_MAX
|
|
1769
|
+
(uint8_t)Is32Bit,
|
|
1770
|
+
#elif SIZE_MAX == UINT64_MAX
|
|
1771
|
+
(uint8_t)Is64Bit,
|
|
1772
|
+
#else
|
|
1773
|
+
(uint8_t)IsOther,
|
|
1774
|
+
#endif
|
|
1775
|
+
(uint8_t)sizeof(int),
|
|
1776
|
+
(uint8_t)sizeof(size_t),
|
|
1777
|
+
(uint8_t)sizeof(double)
|
|
1778
|
+
};
|
|
1779
|
+
write_bytes<uint8_t>((void*)setup_info, (size_t)9, out);
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
template <class otype>
|
|
1783
|
+
void add_full_watermark(otype &out)
|
|
1784
|
+
{
|
|
1785
|
+
write_bytes<unsigned char>((void*)watermark, SIZE_WATERMARK, out);
|
|
1786
|
+
}
|
|
1787
|
+
|
|
1788
|
+
template <class itype>
|
|
1789
|
+
void check_setup_info
|
|
1790
|
+
(
|
|
1791
|
+
itype &in,
|
|
1792
|
+
bool &has_watermark,
|
|
1793
|
+
bool &has_incomplete_watermark,
|
|
1794
|
+
bool &has_same_double,
|
|
1795
|
+
bool &has_same_int_size,
|
|
1796
|
+
bool &has_same_size_t_size,
|
|
1797
|
+
bool &has_same_endianness,
|
|
1798
|
+
PlatformSize &saved_int_t,
|
|
1799
|
+
PlatformSize &saved_size_t,
|
|
1800
|
+
PlatformEndianness &saved_endian,
|
|
1801
|
+
bool &is_deserializable,
|
|
1802
|
+
bool &lacks_range_penalty,
|
|
1803
|
+
bool &lacks_scoring_metric,
|
|
1804
|
+
bool &lacks_indexer
|
|
1805
|
+
)
|
|
1806
|
+
{
|
|
1807
|
+
is_deserializable = false;
|
|
1808
|
+
has_incomplete_watermark = false;
|
|
1809
|
+
lacks_range_penalty = false;
|
|
1810
|
+
lacks_scoring_metric = false;
|
|
1811
|
+
lacks_indexer = false;
|
|
1812
|
+
|
|
1813
|
+
unsigned char watermark_in[SIZE_WATERMARK];
|
|
1814
|
+
read_bytes<unsigned char>((void*)watermark_in, SIZE_WATERMARK, in);
|
|
1815
|
+
if (memcmp(watermark_in, (unsigned char*)watermark, SIZE_WATERMARK)) {
|
|
1816
|
+
has_watermark = false;
|
|
1817
|
+
if (!memcmp(watermark_in, (unsigned char*)incomplete_watermark, SIZE_WATERMARK))
|
|
1818
|
+
has_incomplete_watermark = true;
|
|
1819
|
+
return;
|
|
1820
|
+
}
|
|
1821
|
+
else {
|
|
1822
|
+
has_watermark = true;
|
|
1823
|
+
}
|
|
1824
|
+
|
|
1825
|
+
uint8_t setup_info[9];
|
|
1826
|
+
read_bytes<uint8_t>((void*)setup_info, (size_t)9, in);
|
|
1827
|
+
|
|
1828
|
+
bool is_little_endian = get_is_little_endian();
|
|
1829
|
+
if ((bool)is_little_endian != (bool)setup_info[0]) {
|
|
1830
|
+
has_same_endianness = false;
|
|
1831
|
+
saved_endian = is_little_endian? PlatformLittleEndian : PlatformBigEndian;
|
|
1832
|
+
}
|
|
1833
|
+
else {
|
|
1834
|
+
has_same_endianness = true;
|
|
1835
|
+
}
|
|
1836
|
+
|
|
1837
|
+
if (setup_info[1] == 0 && setup_info[2] == 3 && setup_info[3] == 0) {
|
|
1838
|
+
lacks_range_penalty = true;
|
|
1839
|
+
}
|
|
1840
|
+
|
|
1841
|
+
if (setup_info[1] == 0 && setup_info[2] < 4) {
|
|
1842
|
+
lacks_scoring_metric = true;
|
|
1843
|
+
}
|
|
1844
|
+
|
|
1845
|
+
if (setup_info[1] == 0 && setup_info[2] < 5) {
|
|
1846
|
+
lacks_indexer = true;
|
|
1847
|
+
}
|
|
1848
|
+
|
|
1849
|
+
if (setup_info[4] == (uint8_t)IsAbnormalDouble)
|
|
1850
|
+
fprintf(stderr, "Warning: input model uses non-standard numeric type, might read correctly.\n");
|
|
1851
|
+
|
|
1852
|
+
switch(setup_info[6])
|
|
1853
|
+
{
|
|
1854
|
+
case 16: {saved_int_t = Is16Bit; break;}
|
|
1855
|
+
case 32: {saved_int_t = Is32Bit; break;}
|
|
1856
|
+
case 64: {saved_int_t = Is64Bit; break;}
|
|
1857
|
+
default: {saved_int_t = IsOther; break;}
|
|
1858
|
+
}
|
|
1859
|
+
if ((uint8_t)sizeof(int) != setup_info[6]) {
|
|
1860
|
+
has_same_int_size = false;
|
|
1861
|
+
if (sizeof(uint8_t) != 1) return;
|
|
1862
|
+
if (saved_int_t == IsOther) return;
|
|
1863
|
+
}
|
|
1864
|
+
else {
|
|
1865
|
+
has_same_int_size = true;
|
|
1866
|
+
}
|
|
1867
|
+
|
|
1868
|
+
|
|
1869
|
+
if ((uint8_t)sizeof(size_t) != setup_info[7]) {
|
|
1870
|
+
has_same_size_t_size = false;
|
|
1871
|
+
if (sizeof(uint8_t) != 1) return;
|
|
1872
|
+
}
|
|
1873
|
+
else {
|
|
1874
|
+
has_same_size_t_size = true;
|
|
1875
|
+
}
|
|
1876
|
+
|
|
1877
|
+
|
|
1878
|
+
if ((uint8_t)sizeof(double) != setup_info[8]) {
|
|
1879
|
+
has_same_double = false;
|
|
1880
|
+
return;
|
|
1881
|
+
}
|
|
1882
|
+
else {
|
|
1883
|
+
has_same_double = true;
|
|
1884
|
+
}
|
|
1885
|
+
|
|
1886
|
+
saved_size_t = (PlatformSize)setup_info[5];
|
|
1887
|
+
#if SIZE_MAX == UINT32_MAX
|
|
1888
|
+
if (setup_info[5] != (uint8_t)Is32Bit)
|
|
1889
|
+
#elif SIZE_MAX == UINT64_MAX
|
|
1890
|
+
if (setup_info[5] != (uint8_t)Is64Bit)
|
|
1891
|
+
#else
|
|
1892
|
+
if (setup_info[5] != (uint8_t)IsOther)
|
|
1893
|
+
#endif
|
|
1894
|
+
{
|
|
1895
|
+
has_same_size_t_size = false;
|
|
1896
|
+
if (saved_size_t == IsOther)
|
|
1897
|
+
return;
|
|
1898
|
+
}
|
|
1899
|
+
|
|
1900
|
+
else {
|
|
1901
|
+
has_same_size_t_size = true;
|
|
1902
|
+
}
|
|
1903
|
+
|
|
1904
|
+
is_deserializable = true;
|
|
1905
|
+
}
|
|
1906
|
+
|
|
1907
|
+
template <class itype>
|
|
1908
|
+
void check_setup_info(itype &in)
|
|
1909
|
+
{
|
|
1910
|
+
bool has_watermark = false;
|
|
1911
|
+
bool has_incomplete_watermark = false;
|
|
1912
|
+
bool has_same_double = false;
|
|
1913
|
+
bool has_same_int_size = false;
|
|
1914
|
+
bool has_same_size_t_size = false;
|
|
1915
|
+
bool has_same_endianness = false;
|
|
1916
|
+
PlatformSize saved_int_t;
|
|
1917
|
+
PlatformSize saved_size_t;
|
|
1918
|
+
PlatformEndianness saved_endian;
|
|
1919
|
+
bool is_deserializable = false;
|
|
1920
|
+
bool lacks_range_penalty = false;
|
|
1921
|
+
bool lacks_scoring_metric = false;
|
|
1922
|
+
bool lacks_indexer = false;
|
|
1923
|
+
|
|
1924
|
+
check_setup_info(
|
|
1925
|
+
in,
|
|
1926
|
+
has_watermark,
|
|
1927
|
+
has_incomplete_watermark,
|
|
1928
|
+
has_same_double,
|
|
1929
|
+
has_same_int_size,
|
|
1930
|
+
has_same_size_t_size,
|
|
1931
|
+
has_same_endianness,
|
|
1932
|
+
saved_int_t,
|
|
1933
|
+
saved_size_t,
|
|
1934
|
+
saved_endian,
|
|
1935
|
+
is_deserializable,
|
|
1936
|
+
lacks_range_penalty,
|
|
1937
|
+
lacks_scoring_metric,
|
|
1938
|
+
lacks_indexer
|
|
1939
|
+
);
|
|
1940
|
+
|
|
1941
|
+
if (!has_watermark) {
|
|
1942
|
+
if (has_incomplete_watermark)
|
|
1943
|
+
throw std::runtime_error("Error: serialized model is incomplete.\n");
|
|
1944
|
+
else
|
|
1945
|
+
throw std::runtime_error("Error: input is not an isotree model.\n");
|
|
1946
|
+
}
|
|
1947
|
+
if (!has_same_double)
|
|
1948
|
+
throw std::runtime_error("Error: input model was saved in a machine with different 'double' type.\n");
|
|
1949
|
+
if (!has_same_int_size)
|
|
1950
|
+
throw std::runtime_error("Error: input model was saved in a machine with different integer type.\n");
|
|
1951
|
+
if (!has_same_size_t_size)
|
|
1952
|
+
throw std::runtime_error("Error: input model was saved in a machine with different 'size_t' type.\n");
|
|
1953
|
+
if (!has_same_endianness)
|
|
1954
|
+
throw std::runtime_error("Error: input model was saved in a machine with different endianness.\n");
|
|
1955
|
+
if (lacks_range_penalty || lacks_scoring_metric || lacks_indexer)
|
|
1956
|
+
throw std::runtime_error("Error: input model was produced with an incompatible earlier version, needs to be re-serialized.\n");
|
|
1957
|
+
}
|
|
1958
|
+
|
|
1959
|
+
template <class itype>
|
|
1960
|
+
void check_setup_info
|
|
1961
|
+
(
|
|
1962
|
+
itype &in,
|
|
1963
|
+
bool &has_same_int_size,
|
|
1964
|
+
bool &has_same_size_t_size,
|
|
1965
|
+
bool &has_same_endianness,
|
|
1966
|
+
PlatformSize &saved_int_t,
|
|
1967
|
+
PlatformSize &saved_size_t,
|
|
1968
|
+
PlatformEndianness &saved_endian,
|
|
1969
|
+
bool &lacks_range_penalty,
|
|
1970
|
+
bool &lacks_scoring_metric,
|
|
1971
|
+
bool &lacks_indexer
|
|
1972
|
+
)
|
|
1973
|
+
{
|
|
1974
|
+
bool has_watermark = false;
|
|
1975
|
+
bool has_incomplete_watermark = false;
|
|
1976
|
+
bool has_same_double = false;
|
|
1977
|
+
bool is_deserializable = false;
|
|
1978
|
+
|
|
1979
|
+
check_setup_info(
|
|
1980
|
+
in,
|
|
1981
|
+
has_watermark,
|
|
1982
|
+
has_incomplete_watermark,
|
|
1983
|
+
has_same_double,
|
|
1984
|
+
has_same_int_size,
|
|
1985
|
+
has_same_size_t_size,
|
|
1986
|
+
has_same_endianness,
|
|
1987
|
+
saved_int_t,
|
|
1988
|
+
saved_size_t,
|
|
1989
|
+
saved_endian,
|
|
1990
|
+
is_deserializable,
|
|
1991
|
+
lacks_range_penalty,
|
|
1992
|
+
lacks_scoring_metric,
|
|
1993
|
+
lacks_indexer
|
|
1994
|
+
);
|
|
1995
|
+
|
|
1996
|
+
if (!has_watermark) {
|
|
1997
|
+
if (has_incomplete_watermark)
|
|
1998
|
+
throw std::runtime_error("Error: serialized model is incomplete.\n");
|
|
1999
|
+
else
|
|
2000
|
+
throw std::runtime_error("Error: input is not an isotree model.\n");
|
|
2001
|
+
}
|
|
2002
|
+
if (!has_same_double)
|
|
2003
|
+
throw std::runtime_error("Error: input model was saved in a machine with different 'double' type.\n");
|
|
2004
|
+
if (!is_deserializable)
|
|
2005
|
+
throw std::runtime_error("Error: input format is incompatible.\n");
|
|
2006
|
+
}
|
|
2007
|
+
|
|
2008
|
+
size_t get_size_ending_metadata() noexcept
|
|
2009
|
+
{
|
|
2010
|
+
size_t n_bytes = 0;
|
|
2011
|
+
n_bytes += sizeof(uint8_t);
|
|
2012
|
+
n_bytes += sizeof(size_t);
|
|
2013
|
+
return n_bytes;
|
|
2014
|
+
}
|
|
2015
|
+
|
|
2016
|
+
template <class Model>
|
|
2017
|
+
size_t determine_serialized_size(const Model &model) noexcept
|
|
2018
|
+
{
|
|
2019
|
+
size_t n_bytes = 0;
|
|
2020
|
+
n_bytes += get_size_setup_info();
|
|
2021
|
+
n_bytes += sizeof(uint8_t);
|
|
2022
|
+
n_bytes += sizeof(size_t);
|
|
2023
|
+
n_bytes += get_size_model(model);
|
|
2024
|
+
n_bytes += get_size_ending_metadata();
|
|
2025
|
+
return n_bytes;
|
|
2026
|
+
}
|
|
2027
|
+
|
|
2028
|
+
uint8_t get_model_code(const IsoForest &model) noexcept
|
|
2029
|
+
{
|
|
2030
|
+
return IsoForestModel;
|
|
2031
|
+
}
|
|
2032
|
+
|
|
2033
|
+
uint8_t get_model_code(const ExtIsoForest &model) noexcept
|
|
2034
|
+
{
|
|
2035
|
+
return ExtIsoForestModel;
|
|
2036
|
+
}
|
|
2037
|
+
|
|
2038
|
+
uint8_t get_model_code(const Imputer &model) noexcept
|
|
2039
|
+
{
|
|
2040
|
+
return ImputerModel;
|
|
2041
|
+
}
|
|
2042
|
+
|
|
2043
|
+
uint8_t get_model_code(const TreesIndexer &model) noexcept
|
|
2044
|
+
{
|
|
2045
|
+
return IndexerModel;
|
|
2046
|
+
}
|
|
2047
|
+
|
|
2048
|
+
template <class Model, class otype>
|
|
2049
|
+
void serialization_pipeline(const Model &model, otype &out)
|
|
2050
|
+
{
|
|
2051
|
+
SignalSwitcher ss = SignalSwitcher();
|
|
2052
|
+
|
|
2053
|
+
auto pos_watermark = set_return_position(out);
|
|
2054
|
+
|
|
2055
|
+
add_setup_info(out, false);
|
|
2056
|
+
uint8_t model_type = get_model_code(model);
|
|
2057
|
+
write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
|
|
2058
|
+
size_t size_model = get_size_model(model);
|
|
2059
|
+
write_bytes<size_t>((void*)&size_model, (size_t)1, out);
|
|
2060
|
+
serialize_model(model, out);
|
|
2061
|
+
check_interrupt_switch(ss);
|
|
2062
|
+
|
|
2063
|
+
/* This last bit will be left open in order to signal if anything follows,
|
|
2064
|
+
in case it's decided to change the format in the future or to add
|
|
2065
|
+
something additional, along with a 'size_t' slot in case it would need
|
|
2066
|
+
to jump ahead or something like that. */
|
|
2067
|
+
uint8_t ending_type = (uint8_t)EndsHere;
|
|
2068
|
+
write_bytes<uint8_t>((void*)&ending_type, (size_t)1, out);
|
|
2069
|
+
size_t jump_ahead = 0;
|
|
2070
|
+
write_bytes<size_t>((void*)&jump_ahead, (size_t)1, out);
|
|
2071
|
+
|
|
2072
|
+
auto end_pos = set_return_position(out);
|
|
2073
|
+
return_to_position(out, pos_watermark);
|
|
2074
|
+
add_full_watermark(out);
|
|
2075
|
+
return_to_position(out, end_pos);
|
|
2076
|
+
}
|
|
2077
|
+
|
|
2078
|
+
template <class Model, class itype>
|
|
2079
|
+
void deserialization_pipeline(Model &model, itype &in)
|
|
2080
|
+
{
|
|
2081
|
+
SignalSwitcher ss = SignalSwitcher();
|
|
2082
|
+
|
|
2083
|
+
bool has_same_int_size;
|
|
2084
|
+
bool has_same_size_t_size;
|
|
2085
|
+
bool has_same_endianness;
|
|
2086
|
+
PlatformSize saved_int_t;
|
|
2087
|
+
PlatformSize saved_size_t;
|
|
2088
|
+
PlatformEndianness saved_endian;
|
|
2089
|
+
bool lacks_range_penalty;
|
|
2090
|
+
bool lacks_scoring_metric;
|
|
2091
|
+
bool lacks_indexer; /* <- ignored */
|
|
2092
|
+
|
|
2093
|
+
check_setup_info(
|
|
2094
|
+
in,
|
|
2095
|
+
has_same_int_size,
|
|
2096
|
+
has_same_size_t_size,
|
|
2097
|
+
has_same_endianness,
|
|
2098
|
+
saved_int_t,
|
|
2099
|
+
saved_size_t,
|
|
2100
|
+
saved_endian,
|
|
2101
|
+
lacks_range_penalty,
|
|
2102
|
+
lacks_scoring_metric,
|
|
2103
|
+
lacks_indexer
|
|
2104
|
+
);
|
|
2105
|
+
|
|
2106
|
+
uint8_t model_type = get_model_code(model);
|
|
2107
|
+
uint8_t model_in;
|
|
2108
|
+
read_bytes<uint8_t>((void*)&model_in, (size_t)1, in);
|
|
2109
|
+
if (model_type != model_in)
|
|
2110
|
+
throw std::runtime_error("Object to de-serialize does not match with the supplied type.\n");
|
|
2111
|
+
|
|
2112
|
+
size_t size_model;
|
|
2113
|
+
if (has_same_int_size && has_same_size_t_size && has_same_endianness && !lacks_range_penalty && !lacks_scoring_metric)
|
|
2114
|
+
{
|
|
2115
|
+
read_bytes<size_t>((void*)&size_model, (size_t)1, in);
|
|
2116
|
+
deserialize_model(model, in);
|
|
2117
|
+
}
|
|
2118
|
+
|
|
2119
|
+
else
|
|
2120
|
+
{
|
|
2121
|
+
std::vector<char> buffer;
|
|
2122
|
+
const bool diff_endian = !has_same_endianness;
|
|
2123
|
+
|
|
2124
|
+
if (saved_int_t == Is16Bit && saved_size_t == Is32Bit)
|
|
2125
|
+
{
|
|
2126
|
+
read_bytes<size_t, uint32_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
|
|
2127
|
+
deserialize_model<itype, int16_t, uint32_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
|
|
2128
|
+
}
|
|
2129
|
+
|
|
2130
|
+
else if (saved_int_t == Is32Bit && saved_size_t == Is32Bit)
|
|
2131
|
+
{
|
|
2132
|
+
read_bytes<size_t, uint32_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
|
|
2133
|
+
deserialize_model<itype, int32_t, uint32_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
|
|
2134
|
+
}
|
|
2135
|
+
|
|
2136
|
+
else if (saved_int_t == Is64Bit && saved_size_t == Is32Bit)
|
|
2137
|
+
{
|
|
2138
|
+
read_bytes<size_t, uint32_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
|
|
2139
|
+
deserialize_model<itype, int64_t, uint32_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
|
|
2140
|
+
}
|
|
2141
|
+
|
|
2142
|
+
else if (saved_int_t == Is16Bit && saved_size_t == Is64Bit)
|
|
2143
|
+
{
|
|
2144
|
+
read_bytes<size_t, uint64_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
|
|
2145
|
+
deserialize_model<itype, int16_t, uint64_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
|
|
2146
|
+
}
|
|
2147
|
+
|
|
2148
|
+
else if (saved_int_t == Is32Bit && saved_size_t == Is64Bit)
|
|
2149
|
+
{
|
|
2150
|
+
read_bytes<size_t, uint64_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
|
|
2151
|
+
deserialize_model<itype, int32_t, uint64_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
|
|
2152
|
+
}
|
|
2153
|
+
|
|
2154
|
+
else if (saved_int_t == Is64Bit && saved_size_t == Is64Bit)
|
|
2155
|
+
{
|
|
2156
|
+
read_bytes<size_t, uint64_t>((void*)&size_model, (size_t)1, in, buffer, diff_endian);
|
|
2157
|
+
deserialize_model<itype, int64_t, uint64_t>(model, in, buffer, diff_endian, lacks_range_penalty, lacks_scoring_metric);
|
|
2158
|
+
}
|
|
2159
|
+
|
|
2160
|
+
else
|
|
2161
|
+
{
|
|
2162
|
+
unexpected_error();
|
|
2163
|
+
}
|
|
2164
|
+
}
|
|
2165
|
+
|
|
2166
|
+
check_interrupt_switch(ss);
|
|
2167
|
+
|
|
2168
|
+
if (lacks_range_penalty)
|
|
2169
|
+
{
|
|
2170
|
+
add_range_penalty(model);
|
|
2171
|
+
check_interrupt_switch(ss);
|
|
2172
|
+
}
|
|
2173
|
+
|
|
2174
|
+
/* Not currently used, but left in case the format changes */
|
|
2175
|
+
uint8_t ending_type;
|
|
2176
|
+
read_bytes<uint8_t>((void*)&ending_type, (size_t)1, in);
|
|
2177
|
+
size_t jump_ahead;
|
|
2178
|
+
read_bytes<size_t>((void*)&jump_ahead, (size_t)1, in);
|
|
2179
|
+
}
|
|
2180
|
+
|
|
2181
|
+
void re_serialization_pipeline(const IsoForest &model, char *&out)
|
|
2182
|
+
{
|
|
2183
|
+
SignalSwitcher ss = SignalSwitcher();
|
|
2184
|
+
|
|
2185
|
+
check_setup_info(out);
|
|
2186
|
+
|
|
2187
|
+
uint8_t model_in;
|
|
2188
|
+
memcpy(&model_in, out, sizeof(uint8_t));
|
|
2189
|
+
out += sizeof(uint8_t);
|
|
2190
|
+
if (model_in != get_model_code(model))
|
|
2191
|
+
throw std::runtime_error("Object to incrementally-serialize does not match with the supplied type.\n");
|
|
2192
|
+
|
|
2193
|
+
char *pos_size = out;
|
|
2194
|
+
size_t old_size;
|
|
2195
|
+
memcpy(&old_size, out, sizeof(size_t));
|
|
2196
|
+
out += sizeof(size_t);
|
|
2197
|
+
|
|
2198
|
+
char *old_end = out + old_size;
|
|
2199
|
+
uint8_t old_ending_type;
|
|
2200
|
+
memcpy(&old_ending_type, old_end, sizeof(uint8_t));
|
|
2201
|
+
size_t old_jump_ahead;
|
|
2202
|
+
memcpy(&old_jump_ahead, old_end + sizeof(uint8_t), sizeof(size_t));
|
|
2203
|
+
|
|
2204
|
+
size_t new_size = get_size_model(model);
|
|
2205
|
+
size_t new_ntrees = model.trees.size();
|
|
2206
|
+
|
|
2207
|
+
try
|
|
2208
|
+
{
|
|
2209
|
+
out += sizeof(uint8_t) * 3;
|
|
2210
|
+
if (model.has_range_penalty)
|
|
2211
|
+
{
|
|
2212
|
+
uint8_t has_range_penalty;
|
|
2213
|
+
memcpy(&has_range_penalty, out, sizeof(uint8_t));
|
|
2214
|
+
if (!has_range_penalty)
|
|
2215
|
+
memcpy(out, &has_range_penalty, sizeof(uint8_t));
|
|
2216
|
+
}
|
|
2217
|
+
out += sizeof(uint8_t);
|
|
2218
|
+
out += sizeof(double) * 2;
|
|
2219
|
+
out += sizeof(size_t);
|
|
2220
|
+
|
|
2221
|
+
char *pos_ntrees = out;
|
|
2222
|
+
size_t old_ntrees;
|
|
2223
|
+
memcpy(&old_ntrees, out, sizeof(size_t));
|
|
2224
|
+
|
|
2225
|
+
serialize_additional_trees(model, old_end, old_ntrees);
|
|
2226
|
+
|
|
2227
|
+
out = old_end;
|
|
2228
|
+
uint8_t ending_type = (uint8_t)EndsHere;
|
|
2229
|
+
memcpy(out, &ending_type, sizeof(uint8_t));
|
|
2230
|
+
out += sizeof(uint8_t);
|
|
2231
|
+
size_t jump_ahead = 0;
|
|
2232
|
+
memcpy(out, &jump_ahead, sizeof(size_t));
|
|
2233
|
+
out += sizeof(size_t);
|
|
2234
|
+
|
|
2235
|
+
/* Leave this for the end in case something fails, so as not to
|
|
2236
|
+
render the serialized bytes unusable. */
|
|
2237
|
+
memcpy(pos_size, &new_size, sizeof(size_t));
|
|
2238
|
+
memcpy(pos_ntrees, &new_ntrees, sizeof(size_t));
|
|
2239
|
+
}
|
|
2240
|
+
|
|
2241
|
+
catch(...)
|
|
2242
|
+
{
|
|
2243
|
+
memcpy(out, &old_ending_type, sizeof(uint8_t));
|
|
2244
|
+
memcpy(out + sizeof(uint8_t), &old_jump_ahead, sizeof(size_t));
|
|
2245
|
+
throw;
|
|
2246
|
+
}
|
|
2247
|
+
|
|
2248
|
+
check_interrupt_switch(ss);
|
|
2249
|
+
}
|
|
2250
|
+
|
|
2251
|
+
void re_serialization_pipeline(const ExtIsoForest &model, char *&out)
|
|
2252
|
+
{
|
|
2253
|
+
SignalSwitcher ss = SignalSwitcher();
|
|
2254
|
+
|
|
2255
|
+
check_setup_info(out);
|
|
2256
|
+
|
|
2257
|
+
uint8_t model_in;
|
|
2258
|
+
memcpy(&model_in, out, sizeof(uint8_t));
|
|
2259
|
+
out += sizeof(uint8_t);
|
|
2260
|
+
if (model_in != get_model_code(model))
|
|
2261
|
+
throw std::runtime_error("Object to incrementally-serialize does not match with the supplied type.\n");
|
|
2262
|
+
|
|
2263
|
+
char *pos_size = out;
|
|
2264
|
+
size_t old_size;
|
|
2265
|
+
memcpy(&old_size, out, sizeof(size_t));
|
|
2266
|
+
out += sizeof(size_t);
|
|
2267
|
+
|
|
2268
|
+
char *old_end = out + old_size;
|
|
2269
|
+
uint8_t old_ending_type;
|
|
2270
|
+
memcpy(&old_ending_type, old_end, sizeof(uint8_t));
|
|
2271
|
+
size_t old_jump_ahead;
|
|
2272
|
+
memcpy(&old_jump_ahead, old_end + sizeof(uint8_t), sizeof(size_t));
|
|
2273
|
+
|
|
2274
|
+
size_t new_size = get_size_model(model);
|
|
2275
|
+
size_t new_ntrees = model.hplanes.size();
|
|
2276
|
+
|
|
2277
|
+
try
|
|
2278
|
+
{
|
|
2279
|
+
out += sizeof(uint8_t) * 3;
|
|
2280
|
+
if (model.has_range_penalty)
|
|
2281
|
+
{
|
|
2282
|
+
uint8_t has_range_penalty;
|
|
2283
|
+
memcpy(&has_range_penalty, out, sizeof(uint8_t));
|
|
2284
|
+
if (!has_range_penalty)
|
|
2285
|
+
memcpy(out, &has_range_penalty, sizeof(uint8_t));
|
|
2286
|
+
}
|
|
2287
|
+
out += sizeof(uint8_t);
|
|
2288
|
+
out += sizeof(double) * 2;
|
|
2289
|
+
out += sizeof(size_t);
|
|
2290
|
+
char *pos_ntrees = out;
|
|
2291
|
+
size_t old_ntrees;
|
|
2292
|
+
memcpy(&old_ntrees, out, sizeof(size_t));
|
|
2293
|
+
out += sizeof(size_t);
|
|
2294
|
+
|
|
2295
|
+
serialize_additional_trees(model, old_end, old_ntrees);
|
|
2296
|
+
|
|
2297
|
+
out = old_end;
|
|
2298
|
+
uint8_t ending_type = (uint8_t)EndsHere;
|
|
2299
|
+
memcpy(out, &ending_type, sizeof(uint8_t));
|
|
2300
|
+
out += sizeof(uint8_t);
|
|
2301
|
+
size_t jump_ahead = 0;
|
|
2302
|
+
memcpy(out, &jump_ahead, sizeof(size_t));
|
|
2303
|
+
out += sizeof(size_t);
|
|
2304
|
+
|
|
2305
|
+
/* Leave this for the end in case something fails, so as not to
|
|
2306
|
+
render the serialized bytes unusable. */
|
|
2307
|
+
memcpy(pos_size, &new_size, sizeof(size_t));
|
|
2308
|
+
memcpy(pos_ntrees, &new_ntrees, sizeof(size_t));
|
|
2309
|
+
}
|
|
2310
|
+
|
|
2311
|
+
catch(...)
|
|
2312
|
+
{
|
|
2313
|
+
memcpy(out, &old_ending_type, sizeof(uint8_t));
|
|
2314
|
+
memcpy(out + sizeof(uint8_t), &old_jump_ahead, sizeof(size_t));
|
|
2315
|
+
throw;
|
|
2316
|
+
}
|
|
2317
|
+
|
|
2318
|
+
check_interrupt_switch(ss);
|
|
2319
|
+
}
|
|
2320
|
+
|
|
2321
|
+
void re_serialization_pipeline(const Imputer &model, char *&out)
|
|
2322
|
+
{
|
|
2323
|
+
SignalSwitcher ss = SignalSwitcher();
|
|
2324
|
+
|
|
2325
|
+
check_setup_info(out);
|
|
2326
|
+
|
|
2327
|
+
uint8_t model_in;
|
|
2328
|
+
memcpy(&model_in, out, sizeof(uint8_t));
|
|
2329
|
+
out += sizeof(uint8_t);
|
|
2330
|
+
if (model_in != get_model_code(model))
|
|
2331
|
+
throw std::runtime_error("Object to incrementally-serialize does not match with the supplied type.\n");
|
|
2332
|
+
|
|
2333
|
+
char *pos_size = out;
|
|
2334
|
+
size_t old_size;
|
|
2335
|
+
memcpy(&old_size, out, sizeof(size_t));
|
|
2336
|
+
out += sizeof(size_t);
|
|
2337
|
+
|
|
2338
|
+
char *old_end = out + old_size;
|
|
2339
|
+
uint8_t old_ending_type;
|
|
2340
|
+
memcpy(&old_ending_type, old_end, sizeof(uint8_t));
|
|
2341
|
+
size_t old_jump_ahead;
|
|
2342
|
+
memcpy(&old_jump_ahead, old_end + sizeof(uint8_t), sizeof(size_t));
|
|
2343
|
+
|
|
2344
|
+
size_t new_size = get_size_model(model);
|
|
2345
|
+
size_t new_ntrees = model.imputer_tree.size();
|
|
2346
|
+
|
|
2347
|
+
try
|
|
2348
|
+
{
|
|
2349
|
+
out += sizeof(size_t) * 3;
|
|
2350
|
+
|
|
2351
|
+
char *pos_ntrees = out;
|
|
2352
|
+
size_t old_ntrees;
|
|
2353
|
+
memcpy(&old_ntrees, out, sizeof(size_t));
|
|
2354
|
+
|
|
2355
|
+
serialize_additional_trees(model, old_end, old_ntrees);
|
|
2356
|
+
|
|
2357
|
+
out = old_end;
|
|
2358
|
+
uint8_t ending_type = (uint8_t)EndsHere;
|
|
2359
|
+
memcpy(out, &ending_type, sizeof(uint8_t));
|
|
2360
|
+
out += sizeof(uint8_t);
|
|
2361
|
+
size_t jump_ahead = 0;
|
|
2362
|
+
memcpy(out, &jump_ahead, sizeof(size_t));
|
|
2363
|
+
out += sizeof(size_t);
|
|
2364
|
+
|
|
2365
|
+
/* Leave this for the end in case something fails, so as not to
|
|
2366
|
+
render the serialized bytes unusable. */
|
|
2367
|
+
memcpy(pos_size, &new_size, sizeof(size_t));
|
|
2368
|
+
memcpy(pos_ntrees, &new_ntrees, sizeof(size_t));
|
|
2369
|
+
}
|
|
2370
|
+
|
|
2371
|
+
catch(...)
|
|
2372
|
+
{
|
|
2373
|
+
memcpy(out, &old_ending_type, sizeof(uint8_t));
|
|
2374
|
+
memcpy(out + sizeof(uint8_t), &old_jump_ahead, sizeof(size_t));
|
|
2375
|
+
throw;
|
|
2376
|
+
}
|
|
2377
|
+
|
|
2378
|
+
check_interrupt_switch(ss);
|
|
2379
|
+
}
|
|
2380
|
+
|
|
2381
|
+
void re_serialization_pipeline(const TreesIndexer &model, char *&out)
|
|
2382
|
+
{
|
|
2383
|
+
SignalSwitcher ss = SignalSwitcher();
|
|
2384
|
+
|
|
2385
|
+
check_setup_info(out);
|
|
2386
|
+
|
|
2387
|
+
uint8_t model_in;
|
|
2388
|
+
memcpy(&model_in, out, sizeof(uint8_t));
|
|
2389
|
+
out += sizeof(uint8_t);
|
|
2390
|
+
if (model_in != get_model_code(model))
|
|
2391
|
+
throw std::runtime_error("Object to incrementally-serialize does not match with the supplied type.\n");
|
|
2392
|
+
|
|
2393
|
+
char *pos_size = out;
|
|
2394
|
+
size_t old_size;
|
|
2395
|
+
memcpy(&old_size, out, sizeof(size_t));
|
|
2396
|
+
out += sizeof(size_t);
|
|
2397
|
+
|
|
2398
|
+
char *old_end = out + old_size;
|
|
2399
|
+
uint8_t old_ending_type;
|
|
2400
|
+
memcpy(&old_ending_type, old_end, sizeof(uint8_t));
|
|
2401
|
+
size_t old_jump_ahead;
|
|
2402
|
+
memcpy(&old_jump_ahead, old_end + sizeof(uint8_t), sizeof(size_t));
|
|
2403
|
+
|
|
2404
|
+
size_t new_size = get_size_model(model);
|
|
2405
|
+
size_t new_ntrees = model.indices.size();
|
|
2406
|
+
|
|
2407
|
+
try
|
|
2408
|
+
{
|
|
2409
|
+
char *pos_ntrees = out;
|
|
2410
|
+
size_t old_ntrees;
|
|
2411
|
+
memcpy(&old_ntrees, out, sizeof(size_t));
|
|
2412
|
+
|
|
2413
|
+
serialize_additional_trees(model, old_end, old_ntrees);
|
|
2414
|
+
|
|
2415
|
+
out = old_end;
|
|
2416
|
+
uint8_t ending_type = (uint8_t)EndsHere;
|
|
2417
|
+
memcpy(out, &ending_type, sizeof(uint8_t));
|
|
2418
|
+
out += sizeof(uint8_t);
|
|
2419
|
+
size_t jump_ahead = 0;
|
|
2420
|
+
memcpy(out, &jump_ahead, sizeof(size_t));
|
|
2421
|
+
out += sizeof(size_t);
|
|
2422
|
+
|
|
2423
|
+
/* Leave this for the end in case something fails, so as not to
|
|
2424
|
+
render the serialized bytes unusable. */
|
|
2425
|
+
memcpy(pos_size, &new_size, sizeof(size_t));
|
|
2426
|
+
memcpy(pos_ntrees, &new_ntrees, sizeof(size_t));
|
|
2427
|
+
}
|
|
2428
|
+
|
|
2429
|
+
catch(...)
|
|
2430
|
+
{
|
|
2431
|
+
memcpy(out, &old_ending_type, sizeof(uint8_t));
|
|
2432
|
+
memcpy(out + sizeof(uint8_t), &old_jump_ahead, sizeof(size_t));
|
|
2433
|
+
throw;
|
|
2434
|
+
}
|
|
2435
|
+
|
|
2436
|
+
check_interrupt_switch(ss);
|
|
2437
|
+
}
|
|
2438
|
+
|
|
2439
|
+
void incremental_serialize_IsoForest(const IsoForest &model, char *old_bytes_reallocated)
|
|
2440
|
+
{
|
|
2441
|
+
char *out = old_bytes_reallocated;
|
|
2442
|
+
re_serialization_pipeline(model, out);
|
|
2443
|
+
}
|
|
2444
|
+
|
|
2445
|
+
void incremental_serialize_ExtIsoForest(const ExtIsoForest &model, char *old_bytes_reallocated)
|
|
2446
|
+
{
|
|
2447
|
+
char *out = old_bytes_reallocated;
|
|
2448
|
+
re_serialization_pipeline(model, out);
|
|
2449
|
+
}
|
|
2450
|
+
|
|
2451
|
+
void incremental_serialize_Imputer(const Imputer &model, char *old_bytes_reallocated)
|
|
2452
|
+
{
|
|
2453
|
+
char *out = old_bytes_reallocated;
|
|
2454
|
+
re_serialization_pipeline(model, out);
|
|
2455
|
+
}
|
|
2456
|
+
|
|
2457
|
+
void incremental_serialize_Indexer(const TreesIndexer &model, char *old_bytes_reallocated)
|
|
2458
|
+
{
|
|
2459
|
+
char *out = old_bytes_reallocated;
|
|
2460
|
+
re_serialization_pipeline(model, out);
|
|
2461
|
+
}
|
|
2462
|
+
|
|
2463
|
+
template <class Model>
|
|
2464
|
+
void incremental_serialize_string(const Model &model, std::string &old_bytes)
|
|
2465
|
+
{
|
|
2466
|
+
size_t new_size = determine_serialized_size(model);
|
|
2467
|
+
if (old_bytes.size() > new_size)
|
|
2468
|
+
throw std::runtime_error("'old_bytes' is not a subset of 'model'.\n");
|
|
2469
|
+
if (!new_size)
|
|
2470
|
+
unexpected_error();
|
|
2471
|
+
old_bytes.resize(new_size);
|
|
2472
|
+
char *out = &old_bytes[0];
|
|
2473
|
+
re_serialization_pipeline(model, out);
|
|
2474
|
+
}
|
|
2475
|
+
|
|
2476
|
+
void incremental_serialize_IsoForest(const IsoForest &model, std::string &old_bytes)
|
|
2477
|
+
{
|
|
2478
|
+
incremental_serialize_string(model, old_bytes);
|
|
2479
|
+
}
|
|
2480
|
+
|
|
2481
|
+
void incremental_serialize_ExtIsoForest(const ExtIsoForest &model, std::string &old_bytes)
|
|
2482
|
+
{
|
|
2483
|
+
incremental_serialize_string(model, old_bytes);
|
|
2484
|
+
}
|
|
2485
|
+
|
|
2486
|
+
void incremental_serialize_Imputer(const Imputer &model, std::string &old_bytes)
|
|
2487
|
+
{
|
|
2488
|
+
incremental_serialize_string(model, old_bytes);
|
|
2489
|
+
}
|
|
2490
|
+
|
|
2491
|
+
void incremental_serialize_Indexer(const TreesIndexer &model, std::string &old_bytes)
|
|
2492
|
+
{
|
|
2493
|
+
incremental_serialize_string(model, old_bytes);
|
|
2494
|
+
}
|
|
2495
|
+
|
|
2496
|
+
template <class Model>
|
|
2497
|
+
std::string serialization_pipeline(const Model &model)
|
|
2498
|
+
{
|
|
2499
|
+
std::string serialized;
|
|
2500
|
+
serialized.resize(get_size_model(model));
|
|
2501
|
+
char *ptr = &serialized[0];
|
|
2502
|
+
serialization_pipeline(model, ptr);
|
|
2503
|
+
return serialized;
|
|
2504
|
+
}
|
|
2505
|
+
|
|
2506
|
+
template <class Model>
|
|
2507
|
+
void serialization_pipeline_ToFile(const Model &model, const char *fname)
|
|
2508
|
+
{
|
|
2509
|
+
FileHandle f(fname, "wb");
|
|
2510
|
+
serialization_pipeline(model, f.handle);
|
|
2511
|
+
}
|
|
2512
|
+
|
|
2513
|
+
#ifdef WCHAR_T_FUNS
|
|
2514
|
+
template <class Model>
|
|
2515
|
+
void serialization_pipeline_ToFile(const Model &model, const wchar_t *fname)
|
|
2516
|
+
{
|
|
2517
|
+
WFileHandle f(fname, L"wb");
|
|
2518
|
+
serialization_pipeline(model, f.handle);
|
|
2519
|
+
}
|
|
2520
|
+
#endif
|
|
2521
|
+
|
|
2522
|
+
size_t determine_serialized_size(const IsoForest &model) noexcept
|
|
2523
|
+
{
|
|
2524
|
+
return determine_serialized_size<IsoForest>(model);
|
|
2525
|
+
}
|
|
2526
|
+
|
|
2527
|
+
size_t determine_serialized_size(const ExtIsoForest &model) noexcept
|
|
2528
|
+
{
|
|
2529
|
+
return determine_serialized_size<ExtIsoForest>(model);
|
|
2530
|
+
}
|
|
2531
|
+
|
|
2532
|
+
size_t determine_serialized_size(const Imputer &model) noexcept
|
|
2533
|
+
{
|
|
2534
|
+
return determine_serialized_size<Imputer>(model);
|
|
2535
|
+
}
|
|
2536
|
+
|
|
2537
|
+
size_t determine_serialized_size(const TreesIndexer &model) noexcept
|
|
2538
|
+
{
|
|
2539
|
+
return determine_serialized_size<TreesIndexer>(model);
|
|
2540
|
+
}
|
|
2541
|
+
|
|
2542
|
+
void serialize_IsoForest(const IsoForest &model, char *out)
|
|
2543
|
+
{
|
|
2544
|
+
serialization_pipeline(model, out);
|
|
2545
|
+
}
|
|
2546
|
+
|
|
2547
|
+
void serialize_IsoForest(const IsoForest &model, FILE *out)
|
|
2548
|
+
{
|
|
2549
|
+
serialization_pipeline(model, out);
|
|
2550
|
+
}
|
|
2551
|
+
|
|
2552
|
+
void serialize_IsoForest(const IsoForest &model, std::ostream &out)
|
|
2553
|
+
{
|
|
2554
|
+
serialization_pipeline(model, out);
|
|
2555
|
+
}
|
|
2556
|
+
|
|
2557
|
+
std::string serialize_IsoForest(const IsoForest &model)
|
|
2558
|
+
{
|
|
2559
|
+
return serialization_pipeline(model);
|
|
2560
|
+
}
|
|
2561
|
+
|
|
2562
|
+
void serialize_IsoForest_ToFile(const IsoForest &model, const char *fname)
|
|
2563
|
+
{
|
|
2564
|
+
serialization_pipeline_ToFile(model, fname);
|
|
2565
|
+
}
|
|
2566
|
+
|
|
2567
|
+
#ifdef WCHAR_T_FUNS
|
|
2568
|
+
void serialize_IsoForest_ToFile(const IsoForest &model, const wchar_t *fname)
|
|
2569
|
+
{
|
|
2570
|
+
serialization_pipeline_ToFile(model, fname);
|
|
2571
|
+
}
|
|
2572
|
+
#endif
|
|
2573
|
+
|
|
2574
|
+
void deserialize_IsoForest(IsoForest &model, const char *in)
|
|
2575
|
+
{
|
|
2576
|
+
deserialization_pipeline(model, in);
|
|
2577
|
+
}
|
|
2578
|
+
|
|
2579
|
+
void deserialize_IsoForest(IsoForest &model, FILE *in)
|
|
2580
|
+
{
|
|
2581
|
+
deserialization_pipeline(model, in);
|
|
2582
|
+
}
|
|
2583
|
+
|
|
2584
|
+
void deserialize_IsoForest(IsoForest &model, std::istream &in)
|
|
2585
|
+
{
|
|
2586
|
+
deserialization_pipeline(model, in);
|
|
2587
|
+
}
|
|
2588
|
+
|
|
2589
|
+
void deserialize_IsoForest(IsoForest &model, const std::string &in)
|
|
2590
|
+
{
|
|
2591
|
+
if (!in.size())
|
|
2592
|
+
throw std::runtime_error("Invalid input model to deserialize.");
|
|
2593
|
+
const char *in_ = &in[0];
|
|
2594
|
+
deserialization_pipeline(model, in_);
|
|
2595
|
+
}
|
|
2596
|
+
|
|
2597
|
+
void deserialize_IsoForest_FromFile(IsoForest &model, const char *fname)
|
|
2598
|
+
{
|
|
2599
|
+
FileHandle f(fname, "rb");
|
|
2600
|
+
deserialize_IsoForest(model, f.handle);
|
|
2601
|
+
}
|
|
2602
|
+
|
|
2603
|
+
#ifdef WCHAR_T_FUNS
|
|
2604
|
+
void deserialize_IsoForest_FromFile(IsoForest &model, const wchar_t *fname)
|
|
2605
|
+
{
|
|
2606
|
+
WFileHandle f(fname, L"rb");
|
|
2607
|
+
deserialize_IsoForest(model, f.handle);
|
|
2608
|
+
}
|
|
2609
|
+
#endif
|
|
2610
|
+
|
|
2611
|
+
void serialize_ExtIsoForest(const ExtIsoForest &model, char *out)
|
|
2612
|
+
{
|
|
2613
|
+
serialization_pipeline(model, out);
|
|
2614
|
+
}
|
|
2615
|
+
|
|
2616
|
+
void serialize_ExtIsoForest(const ExtIsoForest &model, FILE *out)
|
|
2617
|
+
{
|
|
2618
|
+
serialization_pipeline(model, out);
|
|
2619
|
+
}
|
|
2620
|
+
|
|
2621
|
+
void serialize_ExtIsoForest(const ExtIsoForest &model, std::ostream &out)
|
|
2622
|
+
{
|
|
2623
|
+
serialization_pipeline(model, out);
|
|
2624
|
+
}
|
|
2625
|
+
|
|
2626
|
+
std::string serialize_ExtIsoForest(const ExtIsoForest &model)
|
|
2627
|
+
{
|
|
2628
|
+
return serialization_pipeline(model);
|
|
2629
|
+
}
|
|
2630
|
+
|
|
2631
|
+
void serialize_ExtIsoForest_ToFile(const ExtIsoForest &model, const char *fname)
|
|
2632
|
+
{
|
|
2633
|
+
serialization_pipeline_ToFile(model, fname);
|
|
2634
|
+
}
|
|
2635
|
+
|
|
2636
|
+
#ifdef WCHAR_T_FUNS
|
|
2637
|
+
void serialize_ExtIsoForest_ToFile(const ExtIsoForest &model, const wchar_t *fname)
|
|
2638
|
+
{
|
|
2639
|
+
serialization_pipeline_ToFile(model, fname);
|
|
2640
|
+
}
|
|
2641
|
+
#endif
|
|
2642
|
+
|
|
2643
|
+
void deserialize_ExtIsoForest(ExtIsoForest &model, const char *in)
|
|
2644
|
+
{
|
|
2645
|
+
deserialization_pipeline(model, in);
|
|
2646
|
+
}
|
|
2647
|
+
|
|
2648
|
+
void deserialize_ExtIsoForest(ExtIsoForest &model, FILE *in)
|
|
2649
|
+
{
|
|
2650
|
+
deserialization_pipeline(model, in);
|
|
2651
|
+
}
|
|
2652
|
+
|
|
2653
|
+
void deserialize_ExtIsoForest(ExtIsoForest &model, std::istream &in)
|
|
2654
|
+
{
|
|
2655
|
+
deserialization_pipeline(model, in);
|
|
2656
|
+
}
|
|
2657
|
+
|
|
2658
|
+
void deserialize_ExtIsoForest(ExtIsoForest &model, const std::string &in)
|
|
2659
|
+
{
|
|
2660
|
+
if (!in.size())
|
|
2661
|
+
throw std::runtime_error("Invalid input model to deserialize.");
|
|
2662
|
+
const char *in_ = &in[0];
|
|
2663
|
+
deserialization_pipeline(model, in_);
|
|
2664
|
+
}
|
|
2665
|
+
|
|
2666
|
+
void deserialize_ExtIsoForest_FromFile(ExtIsoForest &model, const char *fname)
|
|
2667
|
+
{
|
|
2668
|
+
FileHandle f(fname, "rb");
|
|
2669
|
+
deserialize_ExtIsoForest(model, f.handle);
|
|
2670
|
+
}
|
|
2671
|
+
|
|
2672
|
+
#ifdef WCHAR_T_FUNS
|
|
2673
|
+
void deserialize_ExtIsoForest_FromFile(ExtIsoForest &model, const wchar_t *fname)
|
|
2674
|
+
{
|
|
2675
|
+
WFileHandle f(fname, L"rb");
|
|
2676
|
+
deserialize_ExtIsoForest(model, f.handle);
|
|
2677
|
+
}
|
|
2678
|
+
#endif
|
|
2679
|
+
|
|
2680
|
+
void serialize_Imputer(const Imputer &model, char *out)
|
|
2681
|
+
{
|
|
2682
|
+
serialization_pipeline(model, out);
|
|
2683
|
+
}
|
|
2684
|
+
|
|
2685
|
+
void serialize_Imputer(const Imputer &model, FILE *out)
|
|
2686
|
+
{
|
|
2687
|
+
serialization_pipeline(model, out);
|
|
2688
|
+
}
|
|
2689
|
+
|
|
2690
|
+
void serialize_Imputer(const Imputer &model, std::ostream &out)
|
|
2691
|
+
{
|
|
2692
|
+
serialization_pipeline(model, out);
|
|
2693
|
+
}
|
|
2694
|
+
|
|
2695
|
+
std::string serialize_Imputer(const Imputer &model)
|
|
2696
|
+
{
|
|
2697
|
+
return serialization_pipeline(model);
|
|
2698
|
+
}
|
|
2699
|
+
|
|
2700
|
+
void serialize_Imputer_ToFile(const Imputer &model, const char *fname)
|
|
2701
|
+
{
|
|
2702
|
+
serialization_pipeline_ToFile(model, fname);
|
|
2703
|
+
}
|
|
2704
|
+
|
|
2705
|
+
#ifdef WCHAR_T_FUNS
|
|
2706
|
+
void serialize_Imputer_ToFile(const Imputer &model, const wchar_t *fname)
|
|
2707
|
+
{
|
|
2708
|
+
serialization_pipeline_ToFile(model, fname);
|
|
2709
|
+
}
|
|
2710
|
+
#endif
|
|
2711
|
+
|
|
2712
|
+
void deserialize_Imputer(Imputer &model, const char *in)
|
|
2713
|
+
{
|
|
2714
|
+
deserialization_pipeline(model, in);
|
|
2715
|
+
}
|
|
2716
|
+
|
|
2717
|
+
void deserialize_Imputer(Imputer &model, FILE *in)
|
|
2718
|
+
{
|
|
2719
|
+
deserialization_pipeline(model, in);
|
|
2720
|
+
}
|
|
2721
|
+
|
|
2722
|
+
void deserialize_Imputer(Imputer &model, std::istream &in)
|
|
2723
|
+
{
|
|
2724
|
+
deserialization_pipeline(model, in);
|
|
2725
|
+
}
|
|
2726
|
+
|
|
2727
|
+
void deserialize_Imputer(Imputer &model, const std::string &in)
|
|
2728
|
+
{
|
|
2729
|
+
if (!in.size())
|
|
2730
|
+
throw std::runtime_error("Invalid input model to deserialize.");
|
|
2731
|
+
const char *in_ = &in[0];
|
|
2732
|
+
deserialization_pipeline(model, in_);
|
|
2733
|
+
}
|
|
2734
|
+
|
|
2735
|
+
void deserialize_Imputer_FromFile(Imputer &model, const char *fname)
|
|
2736
|
+
{
|
|
2737
|
+
FileHandle f(fname, "rb");
|
|
2738
|
+
deserialize_Imputer(model, f.handle);
|
|
2739
|
+
}
|
|
2740
|
+
|
|
2741
|
+
#ifdef WCHAR_T_FUNS
|
|
2742
|
+
void deserialize_Imputer_FromFile(Imputer &model, const wchar_t *fname)
|
|
2743
|
+
{
|
|
2744
|
+
WFileHandle f(fname, L"rb");
|
|
2745
|
+
deserialize_Imputer(model, f.handle);
|
|
2746
|
+
}
|
|
2747
|
+
#endif
|
|
2748
|
+
|
|
2749
|
+
void serialize_Indexer(const TreesIndexer &model, char *out)
|
|
2750
|
+
{
|
|
2751
|
+
serialization_pipeline(model, out);
|
|
2752
|
+
}
|
|
2753
|
+
|
|
2754
|
+
void serialize_Indexer(const TreesIndexer &model, FILE *out)
|
|
2755
|
+
{
|
|
2756
|
+
serialization_pipeline(model, out);
|
|
2757
|
+
}
|
|
2758
|
+
|
|
2759
|
+
void serialize_Indexer(const TreesIndexer &model, std::ostream &out)
|
|
2760
|
+
{
|
|
2761
|
+
serialization_pipeline(model, out);
|
|
2762
|
+
}
|
|
2763
|
+
|
|
2764
|
+
std::string serialize_Indexer(const TreesIndexer &model)
|
|
2765
|
+
{
|
|
2766
|
+
return serialization_pipeline(model);
|
|
2767
|
+
}
|
|
2768
|
+
|
|
2769
|
+
void serialize_Indexer_ToFile(const TreesIndexer &model, const char *fname)
|
|
2770
|
+
{
|
|
2771
|
+
serialization_pipeline_ToFile(model, fname);
|
|
2772
|
+
}
|
|
2773
|
+
|
|
2774
|
+
#ifdef WCHAR_T_FUNS
|
|
2775
|
+
void serialize_Indexer_ToFile(const TreesIndexer &model, const wchar_t *fname)
|
|
2776
|
+
{
|
|
2777
|
+
serialization_pipeline_ToFile(model, fname);
|
|
2778
|
+
}
|
|
2779
|
+
#endif
|
|
2780
|
+
|
|
2781
|
+
void deserialize_Indexer(TreesIndexer &model, const char *in)
|
|
2782
|
+
{
|
|
2783
|
+
deserialization_pipeline(model, in);
|
|
2784
|
+
}
|
|
2785
|
+
|
|
2786
|
+
void deserialize_Indexer(TreesIndexer &model, FILE *in)
|
|
2787
|
+
{
|
|
2788
|
+
deserialization_pipeline(model, in);
|
|
2789
|
+
}
|
|
2790
|
+
|
|
2791
|
+
void deserialize_Indexer(TreesIndexer &model, std::istream &in)
|
|
2792
|
+
{
|
|
2793
|
+
deserialization_pipeline(model, in);
|
|
2794
|
+
}
|
|
2795
|
+
|
|
2796
|
+
void deserialize_Indexer(TreesIndexer &model, const std::string &in)
|
|
2797
|
+
{
|
|
2798
|
+
if (!in.size())
|
|
2799
|
+
throw std::runtime_error("Invalid input model to deserialize.");
|
|
2800
|
+
const char *in_ = &in[0];
|
|
2801
|
+
deserialization_pipeline(model, in_);
|
|
2802
|
+
}
|
|
2803
|
+
|
|
2804
|
+
void deserialize_Indexer_FromFile(TreesIndexer &model, const char *fname)
|
|
2805
|
+
{
|
|
2806
|
+
FileHandle f(fname, "rb");
|
|
2807
|
+
deserialize_Indexer(model, f.handle);
|
|
2808
|
+
}
|
|
2809
|
+
|
|
2810
|
+
#ifdef WCHAR_T_FUNS
|
|
2811
|
+
void deserialize_Indexer_FromFile(TreesIndexer &model, const wchar_t *fname)
|
|
2812
|
+
{
|
|
2813
|
+
WFileHandle f(fname, L"rb");
|
|
2814
|
+
deserialize_Indexer(model, f.handle);
|
|
2815
|
+
}
|
|
2816
|
+
#endif
|
|
2817
|
+
|
|
2818
|
+
/* Shorthands to use in templates (will be used in R) */
|
|
2819
|
+
void serialize_isotree(const IsoForest &model, char *out)
|
|
2820
|
+
{
|
|
2821
|
+
serialize_IsoForest(model, out);
|
|
2822
|
+
}
|
|
2823
|
+
|
|
2824
|
+
void serialize_isotree(const ExtIsoForest &model, char *out)
|
|
2825
|
+
{
|
|
2826
|
+
serialize_ExtIsoForest(model, out);
|
|
2827
|
+
}
|
|
2828
|
+
|
|
2829
|
+
void serialize_isotree(const Imputer &model, char *out)
|
|
2830
|
+
{
|
|
2831
|
+
serialize_Imputer(model, out);
|
|
2832
|
+
}
|
|
2833
|
+
|
|
2834
|
+
void serialize_isotree(const TreesIndexer &model, char *out)
|
|
2835
|
+
{
|
|
2836
|
+
serialize_Indexer(model, out);
|
|
2837
|
+
}
|
|
2838
|
+
|
|
2839
|
+
void deserialize_isotree(IsoForest &model, const char *in)
|
|
2840
|
+
{
|
|
2841
|
+
deserialize_IsoForest(model, in);
|
|
2842
|
+
}
|
|
2843
|
+
|
|
2844
|
+
void deserialize_isotree(ExtIsoForest &model, const char *in)
|
|
2845
|
+
{
|
|
2846
|
+
deserialize_ExtIsoForest(model, in);
|
|
2847
|
+
}
|
|
2848
|
+
|
|
2849
|
+
void deserialize_isotree(Imputer &model, const char *in)
|
|
2850
|
+
{
|
|
2851
|
+
deserialize_Imputer(model, in);
|
|
2852
|
+
}
|
|
2853
|
+
|
|
2854
|
+
void deserialize_isotree(TreesIndexer &model, const char *in)
|
|
2855
|
+
{
|
|
2856
|
+
deserialize_Indexer(model, in);
|
|
2857
|
+
}
|
|
2858
|
+
|
|
2859
|
+
void incremental_serialize_isotree(const IsoForest &model, char *old_bytes_reallocated)
|
|
2860
|
+
{
|
|
2861
|
+
incremental_serialize_IsoForest(model, old_bytes_reallocated);
|
|
2862
|
+
}
|
|
2863
|
+
|
|
2864
|
+
void incremental_serialize_isotree(const ExtIsoForest &model, char *old_bytes_reallocated)
|
|
2865
|
+
{
|
|
2866
|
+
incremental_serialize_ExtIsoForest(model, old_bytes_reallocated);
|
|
2867
|
+
}
|
|
2868
|
+
|
|
2869
|
+
void incremental_serialize_isotree(const Imputer &model, char *old_bytes_reallocated)
|
|
2870
|
+
{
|
|
2871
|
+
incremental_serialize_Imputer(model, old_bytes_reallocated);
|
|
2872
|
+
}
|
|
2873
|
+
|
|
2874
|
+
void incremental_serialize_isotree(const TreesIndexer &model, char *old_bytes_reallocated)
|
|
2875
|
+
{
|
|
2876
|
+
incremental_serialize_Indexer(model, old_bytes_reallocated);
|
|
2877
|
+
}
|
|
2878
|
+
|
|
2879
|
+
template <class itype>
|
|
2880
|
+
void read_bytes_size_t(void *ptr, const size_t n_els, itype &in, const PlatformSize saved_size_t, const bool has_same_endianness)
|
|
2881
|
+
{
|
|
2882
|
+
std::vector<char> buffer;
|
|
2883
|
+
switch(saved_size_t)
|
|
2884
|
+
{
|
|
2885
|
+
case Is32Bit:
|
|
2886
|
+
{
|
|
2887
|
+
read_bytes<size_t, uint32_t>(ptr, n_els, in, buffer, !has_same_endianness);
|
|
2888
|
+
break;
|
|
2889
|
+
}
|
|
2890
|
+
|
|
2891
|
+
case Is64Bit:
|
|
2892
|
+
{
|
|
2893
|
+
read_bytes<size_t, uint64_t>(ptr, n_els, in, buffer, !has_same_endianness);
|
|
2894
|
+
break;
|
|
2895
|
+
}
|
|
2896
|
+
|
|
2897
|
+
default:
|
|
2898
|
+
{
|
|
2899
|
+
unexpected_error();
|
|
2900
|
+
}
|
|
2901
|
+
}
|
|
2902
|
+
}
|
|
2903
|
+
|
|
2904
|
+
template <class itype>
|
|
2905
|
+
void inspect_serialized_object
|
|
2906
|
+
(
|
|
2907
|
+
itype &serialized_bytes,
|
|
2908
|
+
bool &is_isotree_model,
|
|
2909
|
+
bool &is_compatible,
|
|
2910
|
+
bool &has_combined_objects,
|
|
2911
|
+
bool &has_IsoForest,
|
|
2912
|
+
bool &has_ExtIsoForest,
|
|
2913
|
+
bool &has_Imputer,
|
|
2914
|
+
bool &has_Indexer,
|
|
2915
|
+
bool &has_metadata,
|
|
2916
|
+
size_t &size_metadata,
|
|
2917
|
+
bool &has_same_int_size,
|
|
2918
|
+
bool &has_same_size_t_size,
|
|
2919
|
+
bool &has_same_endianness,
|
|
2920
|
+
bool &lacks_range_penalty,
|
|
2921
|
+
bool &lacks_scoring_metric
|
|
2922
|
+
)
|
|
2923
|
+
{
|
|
2924
|
+
auto saved_position = set_return_position(serialized_bytes);
|
|
2925
|
+
|
|
2926
|
+
is_isotree_model = false;
|
|
2927
|
+
is_compatible = false;
|
|
2928
|
+
has_combined_objects = false;
|
|
2929
|
+
has_IsoForest = false;
|
|
2930
|
+
has_ExtIsoForest = false;
|
|
2931
|
+
has_Imputer = false;
|
|
2932
|
+
has_Indexer = false;
|
|
2933
|
+
has_metadata = false;
|
|
2934
|
+
size_metadata = 0;
|
|
2935
|
+
|
|
2936
|
+
bool lacks_indexer = false;
|
|
2937
|
+
|
|
2938
|
+
bool has_same_double = false;
|
|
2939
|
+
bool has_incomplete_watermark = false;
|
|
2940
|
+
PlatformSize saved_int_t;
|
|
2941
|
+
PlatformSize saved_size_t;
|
|
2942
|
+
PlatformEndianness saved_endian;
|
|
2943
|
+
check_setup_info(
|
|
2944
|
+
serialized_bytes,
|
|
2945
|
+
is_isotree_model,
|
|
2946
|
+
has_incomplete_watermark,
|
|
2947
|
+
has_same_double,
|
|
2948
|
+
has_same_int_size,
|
|
2949
|
+
has_same_size_t_size,
|
|
2950
|
+
has_same_endianness,
|
|
2951
|
+
saved_int_t,
|
|
2952
|
+
saved_size_t,
|
|
2953
|
+
saved_endian,
|
|
2954
|
+
is_compatible,
|
|
2955
|
+
lacks_range_penalty,
|
|
2956
|
+
lacks_scoring_metric,
|
|
2957
|
+
lacks_indexer
|
|
2958
|
+
);
|
|
2959
|
+
|
|
2960
|
+
if (!is_isotree_model || !is_compatible)
|
|
2961
|
+
return;
|
|
2962
|
+
|
|
2963
|
+
uint8_t model_type;
|
|
2964
|
+
read_bytes<uint8_t>((void*)&model_type, (size_t)1, serialized_bytes);
|
|
2965
|
+
|
|
2966
|
+
switch(model_type)
|
|
2967
|
+
{
|
|
2968
|
+
case IsoForestModel:
|
|
2969
|
+
{
|
|
2970
|
+
has_IsoForest = true;
|
|
2971
|
+
break;
|
|
2972
|
+
}
|
|
2973
|
+
|
|
2974
|
+
case ExtIsoForestModel:
|
|
2975
|
+
{
|
|
2976
|
+
has_ExtIsoForest = true;
|
|
2977
|
+
break;
|
|
2978
|
+
}
|
|
2979
|
+
|
|
2980
|
+
case ImputerModel:
|
|
2981
|
+
{
|
|
2982
|
+
has_Imputer = true;
|
|
2983
|
+
break;
|
|
2984
|
+
}
|
|
2985
|
+
|
|
2986
|
+
case IndexerModel:
|
|
2987
|
+
{
|
|
2988
|
+
has_Indexer = true;
|
|
2989
|
+
}
|
|
2990
|
+
|
|
2991
|
+
case AllObjectsCombined:
|
|
2992
|
+
{
|
|
2993
|
+
has_combined_objects = true;
|
|
2994
|
+
break;
|
|
2995
|
+
}
|
|
2996
|
+
|
|
2997
|
+
default:
|
|
2998
|
+
{
|
|
2999
|
+
|
|
3000
|
+
}
|
|
3001
|
+
}
|
|
3002
|
+
|
|
3003
|
+
if (has_combined_objects)
|
|
3004
|
+
{
|
|
3005
|
+
size_t size_model[4] = {0};
|
|
3006
|
+
|
|
3007
|
+
read_bytes<uint8_t>((void*)&model_type, (size_t)1, serialized_bytes);
|
|
3008
|
+
switch(model_type)
|
|
3009
|
+
{
|
|
3010
|
+
case HasSingleVarModelNext:
|
|
3011
|
+
{
|
|
3012
|
+
has_IsoForest = true;
|
|
3013
|
+
break;
|
|
3014
|
+
}
|
|
3015
|
+
case HasExtModelNext:
|
|
3016
|
+
{
|
|
3017
|
+
has_ExtIsoForest = true;
|
|
3018
|
+
break;
|
|
3019
|
+
}
|
|
3020
|
+
case HasSingleVarModelPlusImputerNext:
|
|
3021
|
+
{
|
|
3022
|
+
has_IsoForest = true;
|
|
3023
|
+
has_Imputer = true;
|
|
3024
|
+
break;
|
|
3025
|
+
}
|
|
3026
|
+
case HasSingleVarModelPlusIndexerNext:
|
|
3027
|
+
{
|
|
3028
|
+
has_IsoForest = true;
|
|
3029
|
+
has_Indexer = true;
|
|
3030
|
+
break;
|
|
3031
|
+
}
|
|
3032
|
+
case HasSingleVarModelPlusImputerPlusIndexerNext:
|
|
3033
|
+
{
|
|
3034
|
+
has_IsoForest = true;
|
|
3035
|
+
has_Imputer = true;
|
|
3036
|
+
has_Indexer = true;
|
|
3037
|
+
break;
|
|
3038
|
+
}
|
|
3039
|
+
case HasExtModelPlusImputerNext:
|
|
3040
|
+
{
|
|
3041
|
+
has_ExtIsoForest = true;
|
|
3042
|
+
has_Imputer = true;
|
|
3043
|
+
break;
|
|
3044
|
+
}
|
|
3045
|
+
case HasExtModelPlusIndexerNext:
|
|
3046
|
+
{
|
|
3047
|
+
has_ExtIsoForest = true;
|
|
3048
|
+
has_Indexer = true;
|
|
3049
|
+
break;
|
|
3050
|
+
}
|
|
3051
|
+
case HasExtModelPlusImputerPlusIndexerNext:
|
|
3052
|
+
{
|
|
3053
|
+
has_ExtIsoForest = true;
|
|
3054
|
+
has_Imputer = true;
|
|
3055
|
+
has_Indexer = true;
|
|
3056
|
+
break;
|
|
3057
|
+
}
|
|
3058
|
+
case HasSingleVarModelPlusMetadataNext:
|
|
3059
|
+
{
|
|
3060
|
+
has_IsoForest = true;
|
|
3061
|
+
has_metadata = true;
|
|
3062
|
+
read_bytes_size_t(size_model, (size_t)(3+!lacks_indexer), serialized_bytes, saved_size_t, has_same_endianness);
|
|
3063
|
+
size_metadata = size_model[2+!lacks_indexer];
|
|
3064
|
+
break;
|
|
3065
|
+
}
|
|
3066
|
+
case HasSingleVarModelPlusIndexerPlusMetadataNext:
|
|
3067
|
+
{
|
|
3068
|
+
has_IsoForest = true;
|
|
3069
|
+
has_Indexer = true;
|
|
3070
|
+
has_metadata = true;
|
|
3071
|
+
read_bytes_size_t(size_model, (size_t)4, serialized_bytes, saved_size_t, has_same_endianness);
|
|
3072
|
+
size_metadata = size_model[3];
|
|
3073
|
+
break;
|
|
3074
|
+
}
|
|
3075
|
+
case HasExtModelPlusMetadataNext:
|
|
3076
|
+
{
|
|
3077
|
+
has_ExtIsoForest = true;
|
|
3078
|
+
has_metadata = true;
|
|
3079
|
+
read_bytes_size_t(size_model, (size_t)(3+!lacks_indexer), serialized_bytes, saved_size_t, has_same_endianness);
|
|
3080
|
+
size_metadata = size_model[2+!lacks_indexer];
|
|
3081
|
+
break;
|
|
3082
|
+
}
|
|
3083
|
+
case HasExtModelPlusIndexerPlusMetadataNext:
|
|
3084
|
+
{
|
|
3085
|
+
has_ExtIsoForest = true;
|
|
3086
|
+
has_Indexer = true;
|
|
3087
|
+
has_metadata = true;
|
|
3088
|
+
read_bytes_size_t(size_model, (size_t)4, serialized_bytes, saved_size_t, has_same_endianness);
|
|
3089
|
+
size_metadata = size_model[3];
|
|
3090
|
+
break;
|
|
3091
|
+
break;
|
|
3092
|
+
}
|
|
3093
|
+
case HasSingleVarModelPlusImputerPlusMetadataNext:
|
|
3094
|
+
{
|
|
3095
|
+
has_IsoForest = true;
|
|
3096
|
+
has_Imputer = true;
|
|
3097
|
+
has_metadata = true;
|
|
3098
|
+
read_bytes_size_t(size_model, (size_t)(3+!lacks_indexer), serialized_bytes, saved_size_t, has_same_endianness);
|
|
3099
|
+
size_metadata = size_model[2+!lacks_indexer];
|
|
3100
|
+
break;
|
|
3101
|
+
}
|
|
3102
|
+
case HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext:
|
|
3103
|
+
{
|
|
3104
|
+
has_IsoForest = true;
|
|
3105
|
+
has_Imputer = true;
|
|
3106
|
+
has_Indexer = true;
|
|
3107
|
+
has_metadata = true;
|
|
3108
|
+
read_bytes_size_t(size_model, (size_t)4, serialized_bytes, saved_size_t, has_same_endianness);
|
|
3109
|
+
size_metadata = size_model[3];
|
|
3110
|
+
break;
|
|
3111
|
+
}
|
|
3112
|
+
case HasExtModelPlusImputerPlusMetadataNext:
|
|
3113
|
+
{
|
|
3114
|
+
has_ExtIsoForest = true;
|
|
3115
|
+
has_Imputer = true;
|
|
3116
|
+
has_metadata = true;
|
|
3117
|
+
read_bytes_size_t(size_model, (size_t)(3+!lacks_indexer), serialized_bytes, saved_size_t, has_same_endianness);
|
|
3118
|
+
size_metadata = size_model[2+!lacks_indexer];
|
|
3119
|
+
break;
|
|
3120
|
+
}
|
|
3121
|
+
case HasExtModelPlusImputerPlusIndexerPlusMetadataNext:
|
|
3122
|
+
{
|
|
3123
|
+
has_ExtIsoForest = true;
|
|
3124
|
+
has_Imputer = true;
|
|
3125
|
+
has_Indexer = true;
|
|
3126
|
+
has_metadata = true;
|
|
3127
|
+
read_bytes_size_t(size_model, (size_t)4, serialized_bytes, saved_size_t, has_same_endianness);
|
|
3128
|
+
size_metadata = size_model[3];
|
|
3129
|
+
break;
|
|
3130
|
+
}
|
|
3131
|
+
|
|
3132
|
+
default:
|
|
3133
|
+
{
|
|
3134
|
+
|
|
3135
|
+
}
|
|
3136
|
+
}
|
|
3137
|
+
}
|
|
3138
|
+
|
|
3139
|
+
return_to_position(serialized_bytes, saved_position);
|
|
3140
|
+
}
|
|
3141
|
+
|
|
3142
|
+
template <class itype>
|
|
3143
|
+
void inspect_serialized_object
|
|
3144
|
+
(
|
|
3145
|
+
itype &serialized_bytes,
|
|
3146
|
+
bool &is_isotree_model,
|
|
3147
|
+
bool &is_compatible,
|
|
3148
|
+
bool &has_combined_objects,
|
|
3149
|
+
bool &has_IsoForest,
|
|
3150
|
+
bool &has_ExtIsoForest,
|
|
3151
|
+
bool &has_Imputer,
|
|
3152
|
+
bool &has_Indexer,
|
|
3153
|
+
bool &has_metadata,
|
|
3154
|
+
size_t &size_metadata
|
|
3155
|
+
)
|
|
3156
|
+
{
|
|
3157
|
+
bool ignored[5];
|
|
3158
|
+
inspect_serialized_object(
|
|
3159
|
+
serialized_bytes,
|
|
3160
|
+
is_isotree_model,
|
|
3161
|
+
is_compatible,
|
|
3162
|
+
has_combined_objects,
|
|
3163
|
+
has_IsoForest,
|
|
3164
|
+
has_ExtIsoForest,
|
|
3165
|
+
has_Imputer,
|
|
3166
|
+
has_Indexer,
|
|
3167
|
+
has_metadata,
|
|
3168
|
+
size_metadata,
|
|
3169
|
+
ignored[0],
|
|
3170
|
+
ignored[1],
|
|
3171
|
+
ignored[2],
|
|
3172
|
+
ignored[3],
|
|
3173
|
+
ignored[4]
|
|
3174
|
+
);
|
|
3175
|
+
}
|
|
3176
|
+
|
|
3177
|
+
void inspect_serialized_object
|
|
3178
|
+
(
|
|
3179
|
+
const char *serialized_bytes,
|
|
3180
|
+
bool &is_isotree_model,
|
|
3181
|
+
bool &is_compatible,
|
|
3182
|
+
bool &has_combined_objects,
|
|
3183
|
+
bool &has_IsoForest,
|
|
3184
|
+
bool &has_ExtIsoForest,
|
|
3185
|
+
bool &has_Imputer,
|
|
3186
|
+
bool &has_Indexer,
|
|
3187
|
+
bool &has_metadata,
|
|
3188
|
+
size_t &size_metadata
|
|
3189
|
+
)
|
|
3190
|
+
{
|
|
3191
|
+
const char *in = serialized_bytes;
|
|
3192
|
+
inspect_serialized_object<const char*>(
|
|
3193
|
+
in,
|
|
3194
|
+
is_isotree_model,
|
|
3195
|
+
is_compatible,
|
|
3196
|
+
has_combined_objects,
|
|
3197
|
+
has_IsoForest,
|
|
3198
|
+
has_ExtIsoForest,
|
|
3199
|
+
has_Imputer,
|
|
3200
|
+
has_Indexer,
|
|
3201
|
+
has_metadata,
|
|
3202
|
+
size_metadata
|
|
3203
|
+
);
|
|
3204
|
+
}
|
|
3205
|
+
|
|
3206
|
+
void inspect_serialized_object
|
|
3207
|
+
(
|
|
3208
|
+
const std::string &serialized_bytes,
|
|
3209
|
+
bool &is_isotree_model,
|
|
3210
|
+
bool &is_compatible,
|
|
3211
|
+
bool &has_combined_objects,
|
|
3212
|
+
bool &has_IsoForest,
|
|
3213
|
+
bool &has_ExtIsoForest,
|
|
3214
|
+
bool &has_Imputer,
|
|
3215
|
+
bool &has_Indexer,
|
|
3216
|
+
bool &has_metadata,
|
|
3217
|
+
size_t &size_metadata
|
|
3218
|
+
)
|
|
3219
|
+
{
|
|
3220
|
+
if (!serialized_bytes.size()) {
|
|
3221
|
+
is_isotree_model = false;
|
|
3222
|
+
is_compatible = false;
|
|
3223
|
+
has_IsoForest = false;
|
|
3224
|
+
has_ExtIsoForest = false;
|
|
3225
|
+
has_Imputer = false;
|
|
3226
|
+
has_Indexer = false;
|
|
3227
|
+
has_metadata = false;
|
|
3228
|
+
return;
|
|
3229
|
+
}
|
|
3230
|
+
const char *in = &serialized_bytes[0];
|
|
3231
|
+
inspect_serialized_object<const char*>(
|
|
3232
|
+
in,
|
|
3233
|
+
is_isotree_model,
|
|
3234
|
+
is_compatible,
|
|
3235
|
+
has_combined_objects,
|
|
3236
|
+
has_IsoForest,
|
|
3237
|
+
has_ExtIsoForest,
|
|
3238
|
+
has_Imputer,
|
|
3239
|
+
has_Indexer,
|
|
3240
|
+
has_metadata,
|
|
3241
|
+
size_metadata
|
|
3242
|
+
);
|
|
3243
|
+
}
|
|
3244
|
+
|
|
3245
|
+
void inspect_serialized_object
|
|
3246
|
+
(
|
|
3247
|
+
FILE *serialized_bytes,
|
|
3248
|
+
bool &is_isotree_model,
|
|
3249
|
+
bool &is_compatible,
|
|
3250
|
+
bool &has_combined_objects,
|
|
3251
|
+
bool &has_IsoForest,
|
|
3252
|
+
bool &has_ExtIsoForest,
|
|
3253
|
+
bool &has_Imputer,
|
|
3254
|
+
bool &has_Indexer,
|
|
3255
|
+
bool &has_metadata,
|
|
3256
|
+
size_t &size_metadata
|
|
3257
|
+
)
|
|
3258
|
+
{
|
|
3259
|
+
FILE *in = serialized_bytes;
|
|
3260
|
+
inspect_serialized_object<FILE*>(
|
|
3261
|
+
in,
|
|
3262
|
+
is_isotree_model,
|
|
3263
|
+
is_compatible,
|
|
3264
|
+
has_combined_objects,
|
|
3265
|
+
has_IsoForest,
|
|
3266
|
+
has_ExtIsoForest,
|
|
3267
|
+
has_Imputer,
|
|
3268
|
+
has_Indexer,
|
|
3269
|
+
has_metadata,
|
|
3270
|
+
size_metadata
|
|
3271
|
+
);
|
|
3272
|
+
}
|
|
3273
|
+
|
|
3274
|
+
void inspect_serialized_object
|
|
3275
|
+
(
|
|
3276
|
+
std::istream &serialized_bytes,
|
|
3277
|
+
bool &is_isotree_model,
|
|
3278
|
+
bool &is_compatible,
|
|
3279
|
+
bool &has_combined_objects,
|
|
3280
|
+
bool &has_IsoForest,
|
|
3281
|
+
bool &has_ExtIsoForest,
|
|
3282
|
+
bool &has_Imputer,
|
|
3283
|
+
bool &has_Indexer,
|
|
3284
|
+
bool &has_metadata,
|
|
3285
|
+
size_t &size_metadata
|
|
3286
|
+
)
|
|
3287
|
+
{
|
|
3288
|
+
inspect_serialized_object<std::istream>(
|
|
3289
|
+
serialized_bytes,
|
|
3290
|
+
is_isotree_model,
|
|
3291
|
+
is_compatible,
|
|
3292
|
+
has_combined_objects,
|
|
3293
|
+
has_IsoForest,
|
|
3294
|
+
has_ExtIsoForest,
|
|
3295
|
+
has_Imputer,
|
|
3296
|
+
has_Indexer,
|
|
3297
|
+
has_metadata,
|
|
3298
|
+
size_metadata
|
|
3299
|
+
);
|
|
3300
|
+
}
|
|
3301
|
+
|
|
3302
|
+
template <class Model>
|
|
3303
|
+
bool prev_cols_match(const Model &model, const char *serialized_bytes)
|
|
3304
|
+
{
|
|
3305
|
+
return true;
|
|
3306
|
+
}
|
|
3307
|
+
|
|
3308
|
+
bool prev_cols_match(const Imputer &model, const char *serialized_bytes)
|
|
3309
|
+
{
|
|
3310
|
+
size_t prev[6];
|
|
3311
|
+
read_bytes<size_t>((void*)prev, (size_t)6, serialized_bytes);
|
|
3312
|
+
if (prev[0] != model.ncols_numeric ||
|
|
3313
|
+
prev[1] != model.ncols_categ ||
|
|
3314
|
+
prev[2] != model.ncat.size() ||
|
|
3315
|
+
prev[4] != model.col_means.size() ||
|
|
3316
|
+
prev[5] != model.col_modes.size())
|
|
3317
|
+
{
|
|
3318
|
+
return false;
|
|
3319
|
+
}
|
|
3320
|
+
|
|
3321
|
+
return true;
|
|
3322
|
+
}
|
|
3323
|
+
|
|
3324
|
+
template <class Model>
|
|
3325
|
+
bool check_can_undergo_incremental_serialization(const Model &model, const char *serialized_bytes)
|
|
3326
|
+
{
|
|
3327
|
+
const char *start = serialized_bytes;
|
|
3328
|
+
size_t curr_ntrees = get_ntrees(model);
|
|
3329
|
+
|
|
3330
|
+
bool is_isotree_model;
|
|
3331
|
+
bool is_compatible;
|
|
3332
|
+
bool has_combined_objects;
|
|
3333
|
+
bool has_IsoForest;
|
|
3334
|
+
bool has_ExtIsoForest;
|
|
3335
|
+
bool has_Imputer;
|
|
3336
|
+
bool has_Indexer;
|
|
3337
|
+
bool has_metadata;
|
|
3338
|
+
size_t size_metadata;
|
|
3339
|
+
bool has_same_int_size;
|
|
3340
|
+
bool has_same_size_t_size;
|
|
3341
|
+
bool has_same_endianness;
|
|
3342
|
+
bool lacks_range_penalty;
|
|
3343
|
+
bool lacks_scoring_metric;
|
|
3344
|
+
|
|
3345
|
+
inspect_serialized_object(
|
|
3346
|
+
serialized_bytes,
|
|
3347
|
+
is_isotree_model,
|
|
3348
|
+
is_compatible,
|
|
3349
|
+
has_combined_objects,
|
|
3350
|
+
has_IsoForest,
|
|
3351
|
+
has_ExtIsoForest,
|
|
3352
|
+
has_Imputer,
|
|
3353
|
+
has_Indexer,
|
|
3354
|
+
has_metadata,
|
|
3355
|
+
size_metadata,
|
|
3356
|
+
has_same_int_size,
|
|
3357
|
+
has_same_size_t_size,
|
|
3358
|
+
has_same_endianness,
|
|
3359
|
+
lacks_range_penalty,
|
|
3360
|
+
lacks_scoring_metric
|
|
3361
|
+
);
|
|
3362
|
+
|
|
3363
|
+
if (!is_isotree_model || !is_compatible || has_combined_objects ||
|
|
3364
|
+
!has_same_int_size || !has_same_size_t_size || !has_same_endianness ||
|
|
3365
|
+
lacks_range_penalty || lacks_scoring_metric)
|
|
3366
|
+
return false;
|
|
3367
|
+
|
|
3368
|
+
if (std::is_same<Model, IsoForest>::value) {
|
|
3369
|
+
if (!has_IsoForest || has_ExtIsoForest || has_Imputer || has_Indexer)
|
|
3370
|
+
return false;
|
|
3371
|
+
}
|
|
3372
|
+
|
|
3373
|
+
else if (std::is_same<Model, ExtIsoForest>::value) {
|
|
3374
|
+
if (has_IsoForest || !has_ExtIsoForest || has_Imputer || has_Indexer)
|
|
3375
|
+
return false;
|
|
3376
|
+
}
|
|
3377
|
+
|
|
3378
|
+
else if (std::is_same<Model, Imputer>::value) {
|
|
3379
|
+
if (has_IsoForest || has_ExtIsoForest || !has_Imputer || has_Indexer)
|
|
3380
|
+
return false;
|
|
3381
|
+
}
|
|
3382
|
+
|
|
3383
|
+
else if (std::is_same<Model, TreesIndexer>::value) {
|
|
3384
|
+
if (has_IsoForest || has_ExtIsoForest || has_Imputer || !has_Indexer)
|
|
3385
|
+
return false;
|
|
3386
|
+
}
|
|
3387
|
+
|
|
3388
|
+
else {
|
|
3389
|
+
assert(0);
|
|
3390
|
+
}
|
|
3391
|
+
|
|
3392
|
+
start += get_size_setup_info();
|
|
3393
|
+
start += sizeof(uint8_t);
|
|
3394
|
+
start += sizeof(size_t);
|
|
3395
|
+
|
|
3396
|
+
if (std::is_same<Model, IsoForest>::value) {
|
|
3397
|
+
start += sizeof(uint8_t) * 4;
|
|
3398
|
+
start += sizeof(double) * 2;
|
|
3399
|
+
start += sizeof(size_t);
|
|
3400
|
+
}
|
|
3401
|
+
|
|
3402
|
+
else if (std::is_same<Model, ExtIsoForest>::value) {
|
|
3403
|
+
start += sizeof(uint8_t) * 4;
|
|
3404
|
+
start += sizeof(double) * 2;
|
|
3405
|
+
start += sizeof(size_t);
|
|
3406
|
+
}
|
|
3407
|
+
|
|
3408
|
+
else if (std::is_same<Model, Imputer>::value) {
|
|
3409
|
+
if (!prev_cols_match(model, start))
|
|
3410
|
+
return false;
|
|
3411
|
+
start += sizeof(size_t) * 3;
|
|
3412
|
+
}
|
|
3413
|
+
|
|
3414
|
+
else if (std::is_same<Model, TreesIndexer>::value) {
|
|
3415
|
+
/* Nothing is required here */
|
|
3416
|
+
}
|
|
3417
|
+
|
|
3418
|
+
else {
|
|
3419
|
+
assert(0);
|
|
3420
|
+
}
|
|
3421
|
+
|
|
3422
|
+
size_t old_ntrees;
|
|
3423
|
+
memcpy(&old_ntrees, start, sizeof(size_t));
|
|
3424
|
+
if (old_ntrees > curr_ntrees)
|
|
3425
|
+
return false;
|
|
3426
|
+
|
|
3427
|
+
return true;
|
|
3428
|
+
}
|
|
3429
|
+
|
|
3430
|
+
bool check_can_undergo_incremental_serialization(const IsoForest &model, const char *serialized_bytes)
|
|
3431
|
+
{
|
|
3432
|
+
return check_can_undergo_incremental_serialization<IsoForest>(model, serialized_bytes);
|
|
3433
|
+
}
|
|
3434
|
+
|
|
3435
|
+
bool check_can_undergo_incremental_serialization(const ExtIsoForest &model, const char *serialized_bytes)
|
|
3436
|
+
{
|
|
3437
|
+
return check_can_undergo_incremental_serialization<ExtIsoForest>(model, serialized_bytes);
|
|
3438
|
+
}
|
|
3439
|
+
|
|
3440
|
+
bool check_can_undergo_incremental_serialization(const Imputer &model, const char *serialized_bytes)
|
|
3441
|
+
{
|
|
3442
|
+
return check_can_undergo_incremental_serialization<Imputer>(model, serialized_bytes);
|
|
3443
|
+
}
|
|
3444
|
+
|
|
3445
|
+
bool check_can_undergo_incremental_serialization(const TreesIndexer &model, const char *serialized_bytes)
|
|
3446
|
+
{
|
|
3447
|
+
return check_can_undergo_incremental_serialization<TreesIndexer>(model, serialized_bytes);
|
|
3448
|
+
}
|
|
3449
|
+
|
|
3450
|
+
size_t determine_serialized_size_combined
|
|
3451
|
+
(
|
|
3452
|
+
const IsoForest *model,
|
|
3453
|
+
const ExtIsoForest *model_ext,
|
|
3454
|
+
const Imputer *imputer,
|
|
3455
|
+
const TreesIndexer *indexer,
|
|
3456
|
+
const size_t size_optional_metadata
|
|
3457
|
+
) noexcept
|
|
3458
|
+
{
|
|
3459
|
+
size_t n_bytes = get_size_setup_info();
|
|
3460
|
+
n_bytes += 3 * sizeof(uint8_t);
|
|
3461
|
+
n_bytes += 5 * sizeof(size_t);
|
|
3462
|
+
|
|
3463
|
+
if (model != NULL)
|
|
3464
|
+
n_bytes += get_size_model(*model);
|
|
3465
|
+
else
|
|
3466
|
+
n_bytes += get_size_model(*model_ext);
|
|
3467
|
+
if (imputer != NULL)
|
|
3468
|
+
n_bytes += get_size_model(*imputer);
|
|
3469
|
+
if (indexer != NULL)
|
|
3470
|
+
n_bytes += get_size_model(*indexer);
|
|
3471
|
+
|
|
3472
|
+
n_bytes += get_size_ending_metadata();
|
|
3473
|
+
return n_bytes;
|
|
3474
|
+
}
|
|
3475
|
+
|
|
3476
|
+
template <class otype>
|
|
3477
|
+
void serialize_combined
|
|
3478
|
+
(
|
|
3479
|
+
const IsoForest *model,
|
|
3480
|
+
const ExtIsoForest *model_ext,
|
|
3481
|
+
const Imputer *imputer,
|
|
3482
|
+
const TreesIndexer *indexer,
|
|
3483
|
+
const char *optional_metadata,
|
|
3484
|
+
const size_t size_optional_metadata,
|
|
3485
|
+
otype &out
|
|
3486
|
+
)
|
|
3487
|
+
{
|
|
3488
|
+
SignalSwitcher ss = SignalSwitcher();
|
|
3489
|
+
|
|
3490
|
+
auto pos_watermark = set_return_position(out);
|
|
3491
|
+
|
|
3492
|
+
add_setup_info(out, false);
|
|
3493
|
+
uint8_t model_type = AllObjectsCombined;
|
|
3494
|
+
write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
|
|
3495
|
+
|
|
3496
|
+
if (model != NULL)
|
|
3497
|
+
{
|
|
3498
|
+
|
|
3499
|
+
if (!size_optional_metadata)
|
|
3500
|
+
{
|
|
3501
|
+
if (imputer == NULL) {
|
|
3502
|
+
if (indexer == NULL)
|
|
3503
|
+
model_type = HasSingleVarModelNext;
|
|
3504
|
+
else
|
|
3505
|
+
model_type = HasSingleVarModelPlusIndexerNext;
|
|
3506
|
+
}
|
|
3507
|
+
else {
|
|
3508
|
+
if (indexer == NULL)
|
|
3509
|
+
model_type = HasSingleVarModelPlusImputerNext;
|
|
3510
|
+
else
|
|
3511
|
+
model_type = HasSingleVarModelPlusImputerPlusIndexerNext;
|
|
3512
|
+
}
|
|
3513
|
+
}
|
|
3514
|
+
|
|
3515
|
+
else
|
|
3516
|
+
{
|
|
3517
|
+
if (imputer == NULL) {
|
|
3518
|
+
if (indexer == NULL)
|
|
3519
|
+
model_type = HasSingleVarModelPlusMetadataNext;
|
|
3520
|
+
else
|
|
3521
|
+
model_type = HasSingleVarModelPlusIndexerPlusMetadataNext;
|
|
3522
|
+
}
|
|
3523
|
+
else {
|
|
3524
|
+
if (indexer == NULL)
|
|
3525
|
+
model_type = HasSingleVarModelPlusImputerPlusMetadataNext;
|
|
3526
|
+
else
|
|
3527
|
+
model_type = HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext;
|
|
3528
|
+
}
|
|
3529
|
+
}
|
|
3530
|
+
|
|
3531
|
+
}
|
|
3532
|
+
|
|
3533
|
+
else if (model_ext != NULL)
|
|
3534
|
+
{
|
|
3535
|
+
|
|
3536
|
+
if (!size_optional_metadata)
|
|
3537
|
+
{
|
|
3538
|
+
if (imputer == NULL) {
|
|
3539
|
+
if (indexer == NULL)
|
|
3540
|
+
model_type = HasExtModelNext;
|
|
3541
|
+
else
|
|
3542
|
+
model_type = HasExtModelPlusIndexerNext;
|
|
3543
|
+
}
|
|
3544
|
+
else {
|
|
3545
|
+
if (indexer == NULL)
|
|
3546
|
+
model_type = HasExtModelPlusImputerNext;
|
|
3547
|
+
else
|
|
3548
|
+
model_type = HasExtModelPlusImputerPlusIndexerNext;
|
|
3549
|
+
}
|
|
3550
|
+
}
|
|
3551
|
+
|
|
3552
|
+
else
|
|
3553
|
+
{
|
|
3554
|
+
if (imputer == NULL) {
|
|
3555
|
+
if (indexer == NULL)
|
|
3556
|
+
model_type = HasExtModelPlusMetadataNext;
|
|
3557
|
+
else
|
|
3558
|
+
model_type = HasExtModelPlusIndexerPlusMetadataNext;
|
|
3559
|
+
}
|
|
3560
|
+
else {
|
|
3561
|
+
if (indexer == NULL)
|
|
3562
|
+
model_type = HasExtModelPlusImputerPlusMetadataNext;
|
|
3563
|
+
else
|
|
3564
|
+
model_type = HasExtModelPlusImputerPlusIndexerPlusMetadataNext;
|
|
3565
|
+
}
|
|
3566
|
+
}
|
|
3567
|
+
}
|
|
3568
|
+
|
|
3569
|
+
else {
|
|
3570
|
+
throw std::runtime_error("Must pass one of 'model' or 'model_ext'.\n");
|
|
3571
|
+
}
|
|
3572
|
+
|
|
3573
|
+
write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
|
|
3574
|
+
|
|
3575
|
+
size_t size_model;
|
|
3576
|
+
if (model != NULL)
|
|
3577
|
+
size_model = get_size_model(*model);
|
|
3578
|
+
else
|
|
3579
|
+
size_model = get_size_model(*model_ext);
|
|
3580
|
+
write_bytes<size_t>((void*)&size_model, (size_t)1, out);
|
|
3581
|
+
|
|
3582
|
+
if (imputer != NULL)
|
|
3583
|
+
size_model = get_size_model(*imputer);
|
|
3584
|
+
else
|
|
3585
|
+
size_model = 0;
|
|
3586
|
+
write_bytes<size_t>((void*)&size_model, (size_t)1, out);
|
|
3587
|
+
|
|
3588
|
+
if (indexer != NULL)
|
|
3589
|
+
size_model = get_size_model(*indexer);
|
|
3590
|
+
else
|
|
3591
|
+
size_model = 0;
|
|
3592
|
+
write_bytes<size_t>((void*)&size_model, (size_t)1, out);
|
|
3593
|
+
|
|
3594
|
+
write_bytes<size_t>((void*)&size_optional_metadata, (size_t)1, out);
|
|
3595
|
+
|
|
3596
|
+
|
|
3597
|
+
check_interrupt_switch(ss);
|
|
3598
|
+
|
|
3599
|
+
if (model != NULL)
|
|
3600
|
+
serialize_model(*model, out);
|
|
3601
|
+
else
|
|
3602
|
+
serialize_model(*model_ext, out);
|
|
3603
|
+
|
|
3604
|
+
if (imputer != NULL)
|
|
3605
|
+
serialize_model(*imputer, out);
|
|
3606
|
+
|
|
3607
|
+
if (indexer != NULL)
|
|
3608
|
+
serialize_model(*indexer, out);
|
|
3609
|
+
|
|
3610
|
+
if (size_optional_metadata)
|
|
3611
|
+
write_bytes<char>((void*)optional_metadata, size_optional_metadata, out);
|
|
3612
|
+
|
|
3613
|
+
check_interrupt_switch(ss);
|
|
3614
|
+
|
|
3615
|
+
uint8_t ending_type = (uint8_t)EndsHere;
|
|
3616
|
+
write_bytes<uint8_t>((void*)&ending_type, (size_t)1, out);
|
|
3617
|
+
size_t jump_ahead = 0;
|
|
3618
|
+
write_bytes<size_t>((void*)&jump_ahead, (size_t)1, out);
|
|
3619
|
+
|
|
3620
|
+
auto end_pos = set_return_position(out);
|
|
3621
|
+
return_to_position(out, pos_watermark);
|
|
3622
|
+
add_full_watermark(out);
|
|
3623
|
+
return_to_position(out, end_pos);
|
|
3624
|
+
}
|
|
3625
|
+
|
|
3626
|
+
void serialize_combined
|
|
3627
|
+
(
|
|
3628
|
+
const IsoForest *model,
|
|
3629
|
+
const ExtIsoForest *model_ext,
|
|
3630
|
+
const Imputer *imputer,
|
|
3631
|
+
const TreesIndexer *indexer,
|
|
3632
|
+
const char *optional_metadata,
|
|
3633
|
+
const size_t size_optional_metadata,
|
|
3634
|
+
char *out
|
|
3635
|
+
)
|
|
3636
|
+
{
|
|
3637
|
+
serialize_combined<char*>(
|
|
3638
|
+
model,
|
|
3639
|
+
model_ext,
|
|
3640
|
+
imputer,
|
|
3641
|
+
indexer,
|
|
3642
|
+
optional_metadata,
|
|
3643
|
+
size_optional_metadata,
|
|
3644
|
+
out
|
|
3645
|
+
);
|
|
3646
|
+
}
|
|
3647
|
+
|
|
3648
|
+
void serialize_combined
|
|
3649
|
+
(
|
|
3650
|
+
const IsoForest *model,
|
|
3651
|
+
const ExtIsoForest *model_ext,
|
|
3652
|
+
const Imputer *imputer,
|
|
3653
|
+
const TreesIndexer *indexer,
|
|
3654
|
+
const char *optional_metadata,
|
|
3655
|
+
const size_t size_optional_metadata,
|
|
3656
|
+
FILE *out
|
|
3657
|
+
)
|
|
3658
|
+
{
|
|
3659
|
+
serialize_combined<FILE*>(
|
|
3660
|
+
model,
|
|
3661
|
+
model_ext,
|
|
3662
|
+
imputer,
|
|
3663
|
+
indexer,
|
|
3664
|
+
optional_metadata,
|
|
3665
|
+
size_optional_metadata,
|
|
3666
|
+
out
|
|
3667
|
+
);
|
|
3668
|
+
}
|
|
3669
|
+
|
|
3670
|
+
void serialize_combined
|
|
3671
|
+
(
|
|
3672
|
+
const IsoForest *model,
|
|
3673
|
+
const ExtIsoForest *model_ext,
|
|
3674
|
+
const Imputer *imputer,
|
|
3675
|
+
const TreesIndexer *indexer,
|
|
3676
|
+
const char *optional_metadata,
|
|
3677
|
+
const size_t size_optional_metadata,
|
|
3678
|
+
std::ostream &out
|
|
3679
|
+
)
|
|
3680
|
+
{
|
|
3681
|
+
serialize_combined<std::ostream>(
|
|
3682
|
+
model,
|
|
3683
|
+
model_ext,
|
|
3684
|
+
imputer,
|
|
3685
|
+
indexer,
|
|
3686
|
+
optional_metadata,
|
|
3687
|
+
size_optional_metadata,
|
|
3688
|
+
out
|
|
3689
|
+
);
|
|
3690
|
+
}
|
|
3691
|
+
|
|
3692
|
+
std::string serialize_combined
|
|
3693
|
+
(
|
|
3694
|
+
const IsoForest *model,
|
|
3695
|
+
const ExtIsoForest *model_ext,
|
|
3696
|
+
const Imputer *imputer,
|
|
3697
|
+
const TreesIndexer *indexer,
|
|
3698
|
+
const char *optional_metadata,
|
|
3699
|
+
const size_t size_optional_metadata
|
|
3700
|
+
)
|
|
3701
|
+
{
|
|
3702
|
+
std::string serialized;
|
|
3703
|
+
serialized.resize(determine_serialized_size_combined(model, model_ext, imputer, indexer, size_optional_metadata));
|
|
3704
|
+
char *ptr = &serialized[0];
|
|
3705
|
+
serialize_combined(model, model_ext, imputer, indexer, optional_metadata, size_optional_metadata, ptr);
|
|
3706
|
+
return serialized;
|
|
3707
|
+
}
|
|
3708
|
+
|
|
3709
|
+
size_t determine_serialized_size_combined
|
|
3710
|
+
(
|
|
3711
|
+
const char *serialized_model,
|
|
3712
|
+
const char *serialized_model_ext,
|
|
3713
|
+
const char *serialized_imputer,
|
|
3714
|
+
const char *serialized_indexer,
|
|
3715
|
+
const size_t size_optional_metadata
|
|
3716
|
+
) noexcept
|
|
3717
|
+
{
|
|
3718
|
+
size_t n_bytes = get_size_setup_info();
|
|
3719
|
+
n_bytes += 3 * sizeof(uint8_t);
|
|
3720
|
+
n_bytes += 5 * sizeof(size_t);
|
|
3721
|
+
|
|
3722
|
+
size_t model_size;
|
|
3723
|
+
|
|
3724
|
+
if (serialized_model != NULL)
|
|
3725
|
+
memcpy(&model_size, serialized_model + get_size_setup_info() + sizeof(uint8_t), sizeof(size_t));
|
|
3726
|
+
else
|
|
3727
|
+
memcpy(&model_size, serialized_model_ext + get_size_setup_info() + sizeof(uint8_t), sizeof(size_t));
|
|
3728
|
+
n_bytes += model_size;
|
|
3729
|
+
if (serialized_imputer != NULL) {
|
|
3730
|
+
memcpy(&model_size, serialized_imputer + get_size_setup_info() + sizeof(uint8_t), sizeof(size_t));
|
|
3731
|
+
n_bytes += model_size;
|
|
3732
|
+
}
|
|
3733
|
+
if (serialized_indexer != NULL) {
|
|
3734
|
+
memcpy(&model_size, serialized_indexer + get_size_setup_info() + sizeof(uint8_t), sizeof(size_t));
|
|
3735
|
+
n_bytes += model_size;
|
|
3736
|
+
}
|
|
3737
|
+
|
|
3738
|
+
n_bytes += size_optional_metadata;
|
|
3739
|
+
|
|
3740
|
+
n_bytes += get_size_ending_metadata();
|
|
3741
|
+
return n_bytes;
|
|
3742
|
+
}
|
|
3743
|
+
|
|
3744
|
+
template <class otype>
|
|
3745
|
+
void serialize_combined
|
|
3746
|
+
(
|
|
3747
|
+
const char *serialized_model,
|
|
3748
|
+
const char *serialized_model_ext,
|
|
3749
|
+
const char *serialized_imputer,
|
|
3750
|
+
const char *serialized_indexer,
|
|
3751
|
+
const char *optional_metadata,
|
|
3752
|
+
const size_t size_optional_metadata,
|
|
3753
|
+
otype &out
|
|
3754
|
+
)
|
|
3755
|
+
{
|
|
3756
|
+
SignalSwitcher ss = SignalSwitcher();
|
|
3757
|
+
|
|
3758
|
+
std::unique_ptr<char[]> curr_setup(new char[get_size_setup_info()]);
|
|
3759
|
+
char *ptr_curr_setup = curr_setup.get();
|
|
3760
|
+
add_setup_info(ptr_curr_setup, true);
|
|
3761
|
+
auto pos_watermark = set_return_position(out);
|
|
3762
|
+
add_setup_info(out, false);
|
|
3763
|
+
|
|
3764
|
+
uint8_t model_type = AllObjectsCombined;
|
|
3765
|
+
write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
|
|
3766
|
+
|
|
3767
|
+
if (serialized_model != NULL)
|
|
3768
|
+
{
|
|
3769
|
+
if (!size_optional_metadata)
|
|
3770
|
+
{
|
|
3771
|
+
if (serialized_imputer == NULL) {
|
|
3772
|
+
if (serialized_indexer == NULL)
|
|
3773
|
+
model_type = HasSingleVarModelNext;
|
|
3774
|
+
else
|
|
3775
|
+
model_type = HasSingleVarModelPlusIndexerNext;
|
|
3776
|
+
}
|
|
3777
|
+
else {
|
|
3778
|
+
if (serialized_indexer == NULL)
|
|
3779
|
+
model_type = HasSingleVarModelPlusImputerNext;
|
|
3780
|
+
else
|
|
3781
|
+
model_type = HasSingleVarModelPlusImputerPlusIndexerNext;
|
|
3782
|
+
}
|
|
3783
|
+
}
|
|
3784
|
+
|
|
3785
|
+
else
|
|
3786
|
+
{
|
|
3787
|
+
if (serialized_imputer == NULL) {
|
|
3788
|
+
if (serialized_indexer == NULL)
|
|
3789
|
+
model_type = HasSingleVarModelPlusMetadataNext;
|
|
3790
|
+
else
|
|
3791
|
+
model_type = HasSingleVarModelPlusIndexerPlusMetadataNext;
|
|
3792
|
+
}
|
|
3793
|
+
else {
|
|
3794
|
+
if (serialized_indexer == NULL)
|
|
3795
|
+
model_type = HasSingleVarModelPlusImputerPlusMetadataNext;
|
|
3796
|
+
else
|
|
3797
|
+
model_type = HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext;
|
|
3798
|
+
}
|
|
3799
|
+
}
|
|
3800
|
+
}
|
|
3801
|
+
|
|
3802
|
+
else
|
|
3803
|
+
{
|
|
3804
|
+
if (!size_optional_metadata)
|
|
3805
|
+
{
|
|
3806
|
+
if (serialized_imputer == NULL) {
|
|
3807
|
+
if (serialized_indexer == NULL)
|
|
3808
|
+
model_type = HasExtModelNext;
|
|
3809
|
+
else
|
|
3810
|
+
model_type = HasExtModelPlusIndexerNext;
|
|
3811
|
+
}
|
|
3812
|
+
else {
|
|
3813
|
+
if (serialized_indexer == NULL)
|
|
3814
|
+
model_type = HasExtModelPlusImputerNext;
|
|
3815
|
+
else
|
|
3816
|
+
model_type = HasExtModelPlusImputerPlusIndexerNext;
|
|
3817
|
+
}
|
|
3818
|
+
}
|
|
3819
|
+
|
|
3820
|
+
else
|
|
3821
|
+
{
|
|
3822
|
+
if (serialized_imputer == NULL) {
|
|
3823
|
+
if (serialized_indexer == NULL)
|
|
3824
|
+
model_type = HasExtModelPlusMetadataNext;
|
|
3825
|
+
else
|
|
3826
|
+
model_type = HasExtModelPlusIndexerPlusMetadataNext;
|
|
3827
|
+
}
|
|
3828
|
+
else {
|
|
3829
|
+
if (serialized_indexer == NULL)
|
|
3830
|
+
model_type = HasExtModelPlusImputerPlusMetadataNext;
|
|
3831
|
+
else
|
|
3832
|
+
model_type = HasExtModelPlusImputerPlusIndexerPlusMetadataNext;
|
|
3833
|
+
}
|
|
3834
|
+
}
|
|
3835
|
+
}
|
|
3836
|
+
|
|
3837
|
+
write_bytes<uint8_t>((void*)&model_type, (size_t)1, out);
|
|
3838
|
+
|
|
3839
|
+
size_t model_size;
|
|
3840
|
+
size_t size_model1, size_model2, size_model3, size_model4;
|
|
3841
|
+
|
|
3842
|
+
std::unique_ptr<char[]> new_model;
|
|
3843
|
+
if (serialized_model != NULL)
|
|
3844
|
+
{
|
|
3845
|
+
if (memcmp(curr_setup.get(), serialized_model, get_size_setup_info()))
|
|
3846
|
+
{
|
|
3847
|
+
fprintf(stderr, "Warning: 'model' was serialized in a different setup, will need to convert.\n");
|
|
3848
|
+
IsoForest model;
|
|
3849
|
+
deserialization_pipeline(model, serialized_model);
|
|
3850
|
+
new_model = std::unique_ptr<char[]>(new char[get_size_model(model)]);
|
|
3851
|
+
char *ptr_new_model_ser = new_model.get();
|
|
3852
|
+
serialization_pipeline(model, ptr_new_model_ser);
|
|
3853
|
+
serialized_model = new_model.get();
|
|
3854
|
+
}
|
|
3855
|
+
serialized_model += get_size_setup_info() + sizeof(uint8_t);
|
|
3856
|
+
memcpy(&model_size, serialized_model, sizeof(size_t));
|
|
3857
|
+
serialized_model += sizeof(size_t);
|
|
3858
|
+
size_model1 = model_size;
|
|
3859
|
+
}
|
|
3860
|
+
|
|
3861
|
+
else
|
|
3862
|
+
{
|
|
3863
|
+
if (memcmp(curr_setup.get(), serialized_model_ext, get_size_setup_info()))
|
|
3864
|
+
{
|
|
3865
|
+
fprintf(stderr, "Warning: 'model_ext' was serialized in a different setup, will need to convert.\n");
|
|
3866
|
+
ExtIsoForest model;
|
|
3867
|
+
deserialization_pipeline(model, serialized_model_ext);
|
|
3868
|
+
new_model = std::unique_ptr<char[]>(new char[get_size_model(model)]);
|
|
3869
|
+
char *ptr_new_model_ser = new_model.get();
|
|
3870
|
+
serialization_pipeline(model, ptr_new_model_ser);
|
|
3871
|
+
serialized_model_ext = new_model.get();
|
|
3872
|
+
}
|
|
3873
|
+
serialized_model_ext += get_size_setup_info() + sizeof(uint8_t);
|
|
3874
|
+
memcpy(&model_size, serialized_model_ext, sizeof(size_t));
|
|
3875
|
+
serialized_model_ext += sizeof(size_t);
|
|
3876
|
+
size_model2 = model_size;
|
|
3877
|
+
}
|
|
3878
|
+
|
|
3879
|
+
check_interrupt_switch(ss);
|
|
3880
|
+
|
|
3881
|
+
write_bytes<size_t>((void*)&model_size, (size_t)1, out);
|
|
3882
|
+
|
|
3883
|
+
if (serialized_imputer != NULL)
|
|
3884
|
+
{
|
|
3885
|
+
if (memcmp(curr_setup.get(), serialized_imputer, get_size_setup_info()))
|
|
3886
|
+
{
|
|
3887
|
+
fprintf(stderr, "Warning: 'imputer' was serialized in a different setup, will need to convert.\n");
|
|
3888
|
+
Imputer model;
|
|
3889
|
+
deserialization_pipeline(model, serialized_imputer);
|
|
3890
|
+
new_model = std::unique_ptr<char[]>(new char[get_size_model(model)]);
|
|
3891
|
+
char *ptr_new_model_ser = new_model.get();
|
|
3892
|
+
serialization_pipeline(model, ptr_new_model_ser);
|
|
3893
|
+
serialized_imputer = new_model.get();
|
|
3894
|
+
}
|
|
3895
|
+
serialized_imputer += get_size_setup_info() + sizeof(uint8_t);
|
|
3896
|
+
memcpy(&model_size, serialized_imputer, sizeof(size_t));
|
|
3897
|
+
serialized_imputer += sizeof(size_t);
|
|
3898
|
+
size_model3 = model_size;
|
|
3899
|
+
}
|
|
3900
|
+
|
|
3901
|
+
else {
|
|
3902
|
+
model_size = 0;
|
|
3903
|
+
}
|
|
3904
|
+
write_bytes<size_t>((void*)&model_size, (size_t)1, out);
|
|
3905
|
+
|
|
3906
|
+
if (serialized_indexer != NULL)
|
|
3907
|
+
{
|
|
3908
|
+
if (memcmp(curr_setup.get(), serialized_indexer, get_size_setup_info()))
|
|
3909
|
+
{
|
|
3910
|
+
fprintf(stderr, "Warning: 'indexer' was serialized in a different setup, will need to convert.\n");
|
|
3911
|
+
TreesIndexer model;
|
|
3912
|
+
deserialization_pipeline(model, serialized_indexer);
|
|
3913
|
+
new_model = std::unique_ptr<char[]>(new char[get_size_model(model)]);
|
|
3914
|
+
char *ptr_new_model_ser = new_model.get();
|
|
3915
|
+
serialization_pipeline(model, ptr_new_model_ser);
|
|
3916
|
+
serialized_indexer = new_model.get();
|
|
3917
|
+
}
|
|
3918
|
+
serialized_indexer += get_size_setup_info() + sizeof(uint8_t);
|
|
3919
|
+
memcpy(&model_size, serialized_indexer, sizeof(size_t));
|
|
3920
|
+
serialized_indexer += sizeof(size_t);
|
|
3921
|
+
size_model4 = model_size;
|
|
3922
|
+
}
|
|
3923
|
+
|
|
3924
|
+
else {
|
|
3925
|
+
model_size = 0;
|
|
3926
|
+
}
|
|
3927
|
+
write_bytes<size_t>((void*)&model_size, (size_t)1, out);
|
|
3928
|
+
|
|
3929
|
+
check_interrupt_switch(ss);
|
|
3930
|
+
|
|
3931
|
+
write_bytes<size_t>((void*)&size_optional_metadata, (size_t)1, out);
|
|
3932
|
+
|
|
3933
|
+
if (serialized_model != NULL)
|
|
3934
|
+
write_bytes<char>((void*)serialized_model, size_model1, out);
|
|
3935
|
+
else
|
|
3936
|
+
write_bytes<char>((void*)serialized_model_ext, size_model2, out);
|
|
3937
|
+
if (serialized_imputer != NULL)
|
|
3938
|
+
write_bytes<char>((void*)serialized_imputer, size_model3, out);
|
|
3939
|
+
if (serialized_indexer != NULL)
|
|
3940
|
+
write_bytes<char>((void*)serialized_indexer, size_model4, out);
|
|
3941
|
+
|
|
3942
|
+
if (size_optional_metadata)
|
|
3943
|
+
write_bytes<char>((void*)optional_metadata, size_optional_metadata, out);
|
|
3944
|
+
|
|
3945
|
+
check_interrupt_switch(ss);
|
|
3946
|
+
|
|
3947
|
+
uint8_t ending_type = (uint8_t)EndsHere;
|
|
3948
|
+
write_bytes<uint8_t>((void*)&ending_type, (size_t)1, out);
|
|
3949
|
+
size_t jump_ahead = 0;
|
|
3950
|
+
write_bytes<size_t>((void*)&jump_ahead, (size_t)1, out);
|
|
3951
|
+
|
|
3952
|
+
auto end_pos = set_return_position(out);
|
|
3953
|
+
return_to_position(out, pos_watermark);
|
|
3954
|
+
add_full_watermark(out);
|
|
3955
|
+
return_to_position(out, end_pos);
|
|
3956
|
+
}
|
|
3957
|
+
|
|
3958
|
+
void serialize_combined
|
|
3959
|
+
(
|
|
3960
|
+
const char *serialized_model,
|
|
3961
|
+
const char *serialized_model_ext,
|
|
3962
|
+
const char *serialized_imputer,
|
|
3963
|
+
const char *serialized_indexer,
|
|
3964
|
+
const char *optional_metadata,
|
|
3965
|
+
const size_t size_optional_metadata,
|
|
3966
|
+
FILE *out
|
|
3967
|
+
)
|
|
3968
|
+
{
|
|
3969
|
+
serialize_combined<FILE*>(
|
|
3970
|
+
serialized_model,
|
|
3971
|
+
serialized_model_ext,
|
|
3972
|
+
serialized_imputer,
|
|
3973
|
+
serialized_indexer,
|
|
3974
|
+
optional_metadata,
|
|
3975
|
+
size_optional_metadata,
|
|
3976
|
+
out
|
|
3977
|
+
);
|
|
3978
|
+
}
|
|
3979
|
+
|
|
3980
|
+
void serialize_combined
|
|
3981
|
+
(
|
|
3982
|
+
const char *serialized_model,
|
|
3983
|
+
const char *serialized_model_ext,
|
|
3984
|
+
const char *serialized_imputer,
|
|
3985
|
+
const char *serialized_indexer,
|
|
3986
|
+
const char *optional_metadata,
|
|
3987
|
+
const size_t size_optional_metadata,
|
|
3988
|
+
std::ostream &out
|
|
3989
|
+
)
|
|
3990
|
+
{
|
|
3991
|
+
serialize_combined<std::ostream>(
|
|
3992
|
+
serialized_model,
|
|
3993
|
+
serialized_model_ext,
|
|
3994
|
+
serialized_imputer,
|
|
3995
|
+
serialized_indexer,
|
|
3996
|
+
optional_metadata,
|
|
3997
|
+
size_optional_metadata,
|
|
3998
|
+
out
|
|
3999
|
+
);
|
|
4000
|
+
}
|
|
4001
|
+
|
|
4002
|
+
std::string serialize_combined
|
|
4003
|
+
(
|
|
4004
|
+
const char *serialized_model,
|
|
4005
|
+
const char *serialized_model_ext,
|
|
4006
|
+
const char *serialized_imputer,
|
|
4007
|
+
const char *serialized_indexer,
|
|
4008
|
+
const char *optional_metadata,
|
|
4009
|
+
const size_t size_optional_metadata
|
|
4010
|
+
)
|
|
4011
|
+
{
|
|
4012
|
+
std::string serialized;
|
|
4013
|
+
serialized.resize(
|
|
4014
|
+
determine_serialized_size_combined(
|
|
4015
|
+
serialized_model,
|
|
4016
|
+
serialized_model_ext,
|
|
4017
|
+
serialized_imputer,
|
|
4018
|
+
serialized_indexer,
|
|
4019
|
+
size_optional_metadata
|
|
4020
|
+
)
|
|
4021
|
+
);
|
|
4022
|
+
char *ptr = &serialized[0];
|
|
4023
|
+
serialize_combined(
|
|
4024
|
+
serialized_model,
|
|
4025
|
+
serialized_model_ext,
|
|
4026
|
+
serialized_imputer,
|
|
4027
|
+
serialized_indexer,
|
|
4028
|
+
optional_metadata,
|
|
4029
|
+
size_optional_metadata,
|
|
4030
|
+
ptr
|
|
4031
|
+
);
|
|
4032
|
+
return serialized;
|
|
4033
|
+
}
|
|
4034
|
+
|
|
4035
|
+
template <class Model, class itype>
|
|
4036
|
+
void deserialize_model
|
|
4037
|
+
(
|
|
4038
|
+
Model &model,
|
|
4039
|
+
itype &in,
|
|
4040
|
+
const bool has_same_endianness,
|
|
4041
|
+
const bool has_same_int_size,
|
|
4042
|
+
const bool has_same_size_t_size,
|
|
4043
|
+
const PlatformSize saved_int_t,
|
|
4044
|
+
const PlatformSize saved_size_t,
|
|
4045
|
+
const bool lacks_range_penalty,
|
|
4046
|
+
const bool lacks_scoring_metric
|
|
4047
|
+
)
|
|
4048
|
+
{
|
|
4049
|
+
if (has_same_endianness && has_same_int_size && has_same_size_t_size && !lacks_range_penalty && !lacks_scoring_metric)
|
|
4050
|
+
{
|
|
4051
|
+
deserialize_model(model, in);
|
|
4052
|
+
return;
|
|
4053
|
+
}
|
|
4054
|
+
|
|
4055
|
+
std::vector<char> buffer;
|
|
4056
|
+
|
|
4057
|
+
if (saved_int_t == Is16Bit && saved_size_t == Is32Bit)
|
|
4058
|
+
{
|
|
4059
|
+
deserialize_model<itype, int16_t, uint32_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
|
|
4060
|
+
}
|
|
4061
|
+
|
|
4062
|
+
else if (saved_int_t == Is32Bit && saved_size_t == Is32Bit)
|
|
4063
|
+
{
|
|
4064
|
+
deserialize_model<itype, int32_t, uint32_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
|
|
4065
|
+
}
|
|
4066
|
+
|
|
4067
|
+
else if (saved_int_t == Is64Bit && saved_size_t == Is32Bit)
|
|
4068
|
+
{
|
|
4069
|
+
deserialize_model<itype, int64_t, uint32_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
|
|
4070
|
+
}
|
|
4071
|
+
|
|
4072
|
+
else if (saved_int_t == Is16Bit && saved_size_t == Is64Bit)
|
|
4073
|
+
{
|
|
4074
|
+
deserialize_model<itype, int16_t, uint64_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
|
|
4075
|
+
}
|
|
4076
|
+
|
|
4077
|
+
else if (saved_int_t == Is32Bit && saved_size_t == Is64Bit)
|
|
4078
|
+
{
|
|
4079
|
+
deserialize_model<itype, int32_t, uint64_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
|
|
4080
|
+
}
|
|
4081
|
+
|
|
4082
|
+
else if (saved_int_t == Is64Bit && saved_size_t == Is64Bit)
|
|
4083
|
+
{
|
|
4084
|
+
deserialize_model<itype, int16_t, uint64_t>(model, in, buffer, !has_same_endianness, lacks_range_penalty, lacks_scoring_metric);
|
|
4085
|
+
}
|
|
4086
|
+
|
|
4087
|
+
else
|
|
4088
|
+
{
|
|
4089
|
+
unexpected_error();
|
|
4090
|
+
}
|
|
4091
|
+
}
|
|
4092
|
+
|
|
4093
|
+
template <class itype>
|
|
4094
|
+
void deserialize_combined
|
|
4095
|
+
(
|
|
4096
|
+
itype &in,
|
|
4097
|
+
IsoForest *model,
|
|
4098
|
+
ExtIsoForest *model_ext,
|
|
4099
|
+
Imputer *imputer,
|
|
4100
|
+
TreesIndexer *indexer,
|
|
4101
|
+
char *optional_metadata
|
|
4102
|
+
)
|
|
4103
|
+
{
|
|
4104
|
+
SignalSwitcher ss = SignalSwitcher();
|
|
4105
|
+
|
|
4106
|
+
bool has_same_int_size;
|
|
4107
|
+
bool has_same_size_t_size;
|
|
4108
|
+
bool has_same_endianness;
|
|
4109
|
+
PlatformSize saved_int_t;
|
|
4110
|
+
PlatformSize saved_size_t;
|
|
4111
|
+
PlatformEndianness saved_endian;
|
|
4112
|
+
bool lacks_range_penalty;
|
|
4113
|
+
bool lacks_scoring_metric;
|
|
4114
|
+
bool lacks_indexer;
|
|
4115
|
+
|
|
4116
|
+
check_setup_info(
|
|
4117
|
+
in,
|
|
4118
|
+
has_same_int_size,
|
|
4119
|
+
has_same_size_t_size,
|
|
4120
|
+
has_same_endianness,
|
|
4121
|
+
saved_int_t,
|
|
4122
|
+
saved_size_t,
|
|
4123
|
+
saved_endian,
|
|
4124
|
+
lacks_range_penalty,
|
|
4125
|
+
lacks_scoring_metric,
|
|
4126
|
+
lacks_indexer
|
|
4127
|
+
);
|
|
4128
|
+
|
|
4129
|
+
uint8_t model_in;
|
|
4130
|
+
read_bytes<uint8_t>((void*)&model_in, (size_t)1, in);
|
|
4131
|
+
if (model_in != AllObjectsCombined)
|
|
4132
|
+
throw std::runtime_error("Object to de-serialize was not created through 'serialize_combined'.\n");
|
|
4133
|
+
|
|
4134
|
+
read_bytes<uint8_t>((void*)&model_in, (size_t)1, in);
|
|
4135
|
+
|
|
4136
|
+
size_t size_model[4];
|
|
4137
|
+
size_t size_metadata;
|
|
4138
|
+
if (!lacks_indexer)
|
|
4139
|
+
{
|
|
4140
|
+
read_bytes_size_t((void*)size_model, (size_t)4, in, saved_size_t, has_same_endianness);
|
|
4141
|
+
size_metadata = size_model[3];
|
|
4142
|
+
}
|
|
4143
|
+
|
|
4144
|
+
else
|
|
4145
|
+
{
|
|
4146
|
+
read_bytes_size_t((void*)size_model, (size_t)3, in, saved_size_t, has_same_endianness);
|
|
4147
|
+
size_metadata = size_model[2];
|
|
4148
|
+
size_model[2] = 0;
|
|
4149
|
+
size_model[3] = size_metadata;
|
|
4150
|
+
}
|
|
4151
|
+
|
|
4152
|
+
switch (model_in)
|
|
4153
|
+
{
|
|
4154
|
+
case HasSingleVarModelNext:
|
|
4155
|
+
{
|
|
4156
|
+
deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4157
|
+
break;
|
|
4158
|
+
}
|
|
4159
|
+
case HasSingleVarModelPlusIndexerNext:
|
|
4160
|
+
{
|
|
4161
|
+
deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4162
|
+
check_interrupt_switch(ss);
|
|
4163
|
+
deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4164
|
+
break;
|
|
4165
|
+
}
|
|
4166
|
+
case HasExtModelNext:
|
|
4167
|
+
{
|
|
4168
|
+
deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4169
|
+
break;
|
|
4170
|
+
}
|
|
4171
|
+
case HasExtModelPlusIndexerNext:
|
|
4172
|
+
{
|
|
4173
|
+
deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4174
|
+
check_interrupt_switch(ss);
|
|
4175
|
+
deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4176
|
+
break;
|
|
4177
|
+
}
|
|
4178
|
+
case HasSingleVarModelPlusImputerNext:
|
|
4179
|
+
{
|
|
4180
|
+
deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4181
|
+
check_interrupt_switch(ss);
|
|
4182
|
+
deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4183
|
+
break;
|
|
4184
|
+
}
|
|
4185
|
+
case HasSingleVarModelPlusImputerPlusIndexerNext:
|
|
4186
|
+
{
|
|
4187
|
+
deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4188
|
+
check_interrupt_switch(ss);
|
|
4189
|
+
deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4190
|
+
check_interrupt_switch(ss);
|
|
4191
|
+
deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4192
|
+
break;
|
|
4193
|
+
}
|
|
4194
|
+
case HasExtModelPlusImputerNext:
|
|
4195
|
+
{
|
|
4196
|
+
deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4197
|
+
check_interrupt_switch(ss);
|
|
4198
|
+
deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4199
|
+
break;
|
|
4200
|
+
}
|
|
4201
|
+
case HasExtModelPlusImputerPlusIndexerNext:
|
|
4202
|
+
{
|
|
4203
|
+
deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4204
|
+
check_interrupt_switch(ss);
|
|
4205
|
+
deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4206
|
+
check_interrupt_switch(ss);
|
|
4207
|
+
deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4208
|
+
break;
|
|
4209
|
+
}
|
|
4210
|
+
case HasSingleVarModelPlusMetadataNext:
|
|
4211
|
+
{
|
|
4212
|
+
deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4213
|
+
check_interrupt_switch(ss);
|
|
4214
|
+
read_bytes<char>((void*)optional_metadata, size_metadata, in);
|
|
4215
|
+
break;
|
|
4216
|
+
}
|
|
4217
|
+
case HasSingleVarModelPlusIndexerPlusMetadataNext:
|
|
4218
|
+
{
|
|
4219
|
+
deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4220
|
+
check_interrupt_switch(ss);
|
|
4221
|
+
deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4222
|
+
check_interrupt_switch(ss);
|
|
4223
|
+
read_bytes<char>((void*)optional_metadata, size_metadata, in);
|
|
4224
|
+
break;
|
|
4225
|
+
}
|
|
4226
|
+
case HasExtModelPlusMetadataNext:
|
|
4227
|
+
{
|
|
4228
|
+
deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4229
|
+
check_interrupt_switch(ss);
|
|
4230
|
+
read_bytes<char>((void*)optional_metadata, size_metadata, in);
|
|
4231
|
+
break;
|
|
4232
|
+
}
|
|
4233
|
+
case HasExtModelPlusIndexerPlusMetadataNext:
|
|
4234
|
+
{
|
|
4235
|
+
deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4236
|
+
check_interrupt_switch(ss);
|
|
4237
|
+
deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4238
|
+
check_interrupt_switch(ss);
|
|
4239
|
+
read_bytes<char>((void*)optional_metadata, size_metadata, in);
|
|
4240
|
+
break;
|
|
4241
|
+
}
|
|
4242
|
+
case HasSingleVarModelPlusImputerPlusMetadataNext:
|
|
4243
|
+
{
|
|
4244
|
+
deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4245
|
+
check_interrupt_switch(ss);
|
|
4246
|
+
deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4247
|
+
check_interrupt_switch(ss);
|
|
4248
|
+
read_bytes<char>((void*)optional_metadata, size_metadata, in);
|
|
4249
|
+
break;
|
|
4250
|
+
}
|
|
4251
|
+
case HasSingleVarModelPlusImputerPlusIndexerPlusMetadataNext:
|
|
4252
|
+
{
|
|
4253
|
+
deserialize_model(*model, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4254
|
+
check_interrupt_switch(ss);
|
|
4255
|
+
deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4256
|
+
check_interrupt_switch(ss);
|
|
4257
|
+
deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4258
|
+
check_interrupt_switch(ss);
|
|
4259
|
+
read_bytes<char>((void*)optional_metadata, size_metadata, in);
|
|
4260
|
+
break;
|
|
4261
|
+
}
|
|
4262
|
+
case HasExtModelPlusImputerPlusMetadataNext:
|
|
4263
|
+
{
|
|
4264
|
+
deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4265
|
+
check_interrupt_switch(ss);
|
|
4266
|
+
deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4267
|
+
check_interrupt_switch(ss);
|
|
4268
|
+
read_bytes<char>((void*)optional_metadata, size_metadata, in);
|
|
4269
|
+
break;
|
|
4270
|
+
}
|
|
4271
|
+
case HasExtModelPlusImputerPlusIndexerPlusMetadataNext:
|
|
4272
|
+
{
|
|
4273
|
+
deserialize_model(*model_ext, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4274
|
+
check_interrupt_switch(ss);
|
|
4275
|
+
deserialize_model(*imputer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4276
|
+
check_interrupt_switch(ss);
|
|
4277
|
+
deserialize_model(*indexer, in, has_same_endianness, has_same_int_size, has_same_size_t_size, saved_int_t, saved_size_t, lacks_range_penalty, lacks_scoring_metric);
|
|
4278
|
+
check_interrupt_switch(ss);
|
|
4279
|
+
read_bytes<char>((void*)optional_metadata, size_metadata, in);
|
|
4280
|
+
break;
|
|
4281
|
+
}
|
|
4282
|
+
|
|
4283
|
+
default:
|
|
4284
|
+
{
|
|
4285
|
+
throw std::runtime_error("Serialized format is incompatible.\n");
|
|
4286
|
+
}
|
|
4287
|
+
}
|
|
4288
|
+
}
|
|
4289
|
+
|
|
4290
|
+
void deserialize_combined
|
|
4291
|
+
(
|
|
4292
|
+
const char* in,
|
|
4293
|
+
IsoForest *model,
|
|
4294
|
+
ExtIsoForest *model_ext,
|
|
4295
|
+
Imputer *imputer,
|
|
4296
|
+
TreesIndexer *indexer,
|
|
4297
|
+
char *optional_metadata
|
|
4298
|
+
)
|
|
4299
|
+
{
|
|
4300
|
+
deserialize_combined<const char*>(
|
|
4301
|
+
in,
|
|
4302
|
+
model,
|
|
4303
|
+
model_ext,
|
|
4304
|
+
imputer,
|
|
4305
|
+
indexer,
|
|
4306
|
+
optional_metadata
|
|
4307
|
+
);
|
|
4308
|
+
}
|
|
4309
|
+
|
|
4310
|
+
void deserialize_combined
|
|
4311
|
+
(
|
|
4312
|
+
FILE* in,
|
|
4313
|
+
IsoForest *model,
|
|
4314
|
+
ExtIsoForest *model_ext,
|
|
4315
|
+
Imputer *imputer,
|
|
4316
|
+
TreesIndexer *indexer,
|
|
4317
|
+
char *optional_metadata
|
|
4318
|
+
)
|
|
4319
|
+
{
|
|
4320
|
+
deserialize_combined<FILE*>(
|
|
4321
|
+
in,
|
|
4322
|
+
model,
|
|
4323
|
+
model_ext,
|
|
4324
|
+
imputer,
|
|
4325
|
+
indexer,
|
|
4326
|
+
optional_metadata
|
|
4327
|
+
);
|
|
4328
|
+
}
|
|
4329
|
+
|
|
4330
|
+
void deserialize_combined
|
|
4331
|
+
(
|
|
4332
|
+
std::istream &in,
|
|
4333
|
+
IsoForest *model,
|
|
4334
|
+
ExtIsoForest *model_ext,
|
|
4335
|
+
Imputer *imputer,
|
|
4336
|
+
TreesIndexer *indexer,
|
|
4337
|
+
char *optional_metadata
|
|
4338
|
+
)
|
|
4339
|
+
{
|
|
4340
|
+
deserialize_combined<std::istream>(
|
|
4341
|
+
in,
|
|
4342
|
+
model,
|
|
4343
|
+
model_ext,
|
|
4344
|
+
imputer,
|
|
4345
|
+
indexer,
|
|
4346
|
+
optional_metadata
|
|
4347
|
+
);
|
|
4348
|
+
}
|
|
4349
|
+
|
|
4350
|
+
void deserialize_combined
|
|
4351
|
+
(
|
|
4352
|
+
const std::string &in,
|
|
4353
|
+
IsoForest *model,
|
|
4354
|
+
ExtIsoForest *model_ext,
|
|
4355
|
+
Imputer *imputer,
|
|
4356
|
+
TreesIndexer *indexer,
|
|
4357
|
+
char *optional_metadata
|
|
4358
|
+
)
|
|
4359
|
+
{
|
|
4360
|
+
const char *ptr = &in[0];
|
|
4361
|
+
deserialize_combined<const char*>(
|
|
4362
|
+
ptr,
|
|
4363
|
+
model,
|
|
4364
|
+
model_ext,
|
|
4365
|
+
imputer,
|
|
4366
|
+
indexer,
|
|
4367
|
+
optional_metadata
|
|
4368
|
+
);
|
|
4369
|
+
}
|
|
4370
|
+
|
|
4371
|
+
bool check_model_has_range_penalty(const IsoForest &model) noexcept
|
|
4372
|
+
{
|
|
4373
|
+
for (const auto &tree : model.trees)
|
|
4374
|
+
{
|
|
4375
|
+
for (const auto &node : tree)
|
|
4376
|
+
{
|
|
4377
|
+
if (node.score < 0 && node.col_type == Numeric)
|
|
4378
|
+
{
|
|
4379
|
+
if (node.range_low > -HUGE_VAL && node.range_high < HUGE_VAL)
|
|
4380
|
+
return true;
|
|
4381
|
+
}
|
|
4382
|
+
}
|
|
4383
|
+
}
|
|
4384
|
+
|
|
4385
|
+
return false;
|
|
4386
|
+
}
|
|
4387
|
+
|
|
4388
|
+
bool check_model_has_range_penalty(const ExtIsoForest &model) noexcept
|
|
4389
|
+
{
|
|
4390
|
+
for (const auto &tree : model.hplanes)
|
|
4391
|
+
{
|
|
4392
|
+
for (const auto &node : tree)
|
|
4393
|
+
{
|
|
4394
|
+
if (node.score < 0)
|
|
4395
|
+
{
|
|
4396
|
+
if (node.range_low > -HUGE_VAL && node.range_high < HUGE_VAL)
|
|
4397
|
+
return true;
|
|
4398
|
+
}
|
|
4399
|
+
}
|
|
4400
|
+
}
|
|
4401
|
+
|
|
4402
|
+
return false;
|
|
4403
|
+
}
|
|
4404
|
+
|
|
4405
|
+
void add_range_penalty(IsoForest &model) noexcept
|
|
4406
|
+
{
|
|
4407
|
+
model.has_range_penalty = check_model_has_range_penalty(model);
|
|
4408
|
+
}
|
|
4409
|
+
|
|
4410
|
+
void add_range_penalty(ExtIsoForest &model) noexcept
|
|
4411
|
+
{
|
|
4412
|
+
model.has_range_penalty = check_model_has_range_penalty(model);
|
|
4413
|
+
}
|
|
4414
|
+
|
|
4415
|
+
void add_range_penalty(Imputer &model) noexcept
|
|
4416
|
+
{
|
|
4417
|
+
|
|
4418
|
+
}
|
|
4419
|
+
|
|
4420
|
+
void add_range_penalty(TreesIndexer &model) noexcept
|
|
4421
|
+
{
|
|
4422
|
+
|
|
4423
|
+
}
|