isotree 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (151) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -1
  3. data/LICENSE.txt +2 -2
  4. data/README.md +32 -14
  5. data/ext/isotree/ext.cpp +144 -31
  6. data/ext/isotree/extconf.rb +7 -7
  7. data/lib/isotree/isolation_forest.rb +110 -30
  8. data/lib/isotree/version.rb +1 -1
  9. data/vendor/isotree/LICENSE +1 -1
  10. data/vendor/isotree/README.md +165 -27
  11. data/vendor/isotree/include/isotree.hpp +2111 -0
  12. data/vendor/isotree/include/isotree_oop.hpp +394 -0
  13. data/vendor/isotree/inst/COPYRIGHTS +62 -0
  14. data/vendor/isotree/src/RcppExports.cpp +525 -52
  15. data/vendor/isotree/src/Rwrapper.cpp +1931 -268
  16. data/vendor/isotree/src/c_interface.cpp +953 -0
  17. data/vendor/isotree/src/crit.hpp +4232 -0
  18. data/vendor/isotree/src/dist.hpp +1886 -0
  19. data/vendor/isotree/src/exp_depth_table.hpp +134 -0
  20. data/vendor/isotree/src/extended.hpp +1444 -0
  21. data/vendor/isotree/src/external_facing_generic.hpp +399 -0
  22. data/vendor/isotree/src/fit_model.hpp +2401 -0
  23. data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
  24. data/vendor/isotree/src/helpers_iforest.hpp +813 -0
  25. data/vendor/isotree/src/{impute.cpp → impute.hpp} +353 -122
  26. data/vendor/isotree/src/indexer.cpp +515 -0
  27. data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
  28. data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
  29. data/vendor/isotree/src/isoforest.hpp +1659 -0
  30. data/vendor/isotree/src/isotree.hpp +1804 -392
  31. data/vendor/isotree/src/isotree_exportable.hpp +99 -0
  32. data/vendor/isotree/src/merge_models.cpp +159 -16
  33. data/vendor/isotree/src/mult.hpp +1321 -0
  34. data/vendor/isotree/src/oop_interface.cpp +842 -0
  35. data/vendor/isotree/src/oop_interface.hpp +278 -0
  36. data/vendor/isotree/src/other_helpers.hpp +219 -0
  37. data/vendor/isotree/src/predict.hpp +1932 -0
  38. data/vendor/isotree/src/python_helpers.hpp +134 -0
  39. data/vendor/isotree/src/ref_indexer.hpp +154 -0
  40. data/vendor/isotree/src/robinmap/LICENSE +21 -0
  41. data/vendor/isotree/src/robinmap/README.md +483 -0
  42. data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
  43. data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1620 -0
  44. data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
  45. data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
  46. data/vendor/isotree/src/serialize.cpp +4300 -139
  47. data/vendor/isotree/src/sql.cpp +141 -59
  48. data/vendor/isotree/src/subset_models.cpp +174 -0
  49. data/vendor/isotree/src/utils.hpp +3808 -0
  50. data/vendor/isotree/src/xoshiro.hpp +467 -0
  51. data/vendor/isotree/src/ziggurat.hpp +405 -0
  52. metadata +38 -104
  53. data/vendor/cereal/LICENSE +0 -24
  54. data/vendor/cereal/README.md +0 -85
  55. data/vendor/cereal/include/cereal/access.hpp +0 -351
  56. data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
  57. data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
  58. data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
  59. data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
  60. data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
  61. data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
  62. data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
  63. data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
  64. data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
  65. data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
  66. data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
  67. data/vendor/cereal/include/cereal/details/util.hpp +0 -84
  68. data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
  69. data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
  70. data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
  71. data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
  72. data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
  73. data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
  74. data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
  75. data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
  76. data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
  77. data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
  78. data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
  79. data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
  80. data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
  81. data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
  82. data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
  83. data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
  84. data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
  85. data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
  86. data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
  87. data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
  88. data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
  89. data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
  90. data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
  91. data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
  92. data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
  93. data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
  94. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
  95. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
  96. data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
  97. data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
  98. data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
  99. data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
  100. data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
  101. data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
  102. data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
  103. data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
  104. data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
  105. data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
  106. data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
  107. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
  108. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
  109. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
  110. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
  111. data/vendor/cereal/include/cereal/macros.hpp +0 -154
  112. data/vendor/cereal/include/cereal/specialize.hpp +0 -139
  113. data/vendor/cereal/include/cereal/types/array.hpp +0 -79
  114. data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
  115. data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
  116. data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
  117. data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
  118. data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
  119. data/vendor/cereal/include/cereal/types/common.hpp +0 -129
  120. data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
  121. data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
  122. data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
  123. data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
  124. data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
  125. data/vendor/cereal/include/cereal/types/list.hpp +0 -62
  126. data/vendor/cereal/include/cereal/types/map.hpp +0 -36
  127. data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
  128. data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
  129. data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
  130. data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
  131. data/vendor/cereal/include/cereal/types/set.hpp +0 -103
  132. data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
  133. data/vendor/cereal/include/cereal/types/string.hpp +0 -61
  134. data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
  135. data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
  136. data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
  137. data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
  138. data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
  139. data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
  140. data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
  141. data/vendor/cereal/include/cereal/version.hpp +0 -52
  142. data/vendor/isotree/src/Makevars +0 -4
  143. data/vendor/isotree/src/crit.cpp +0 -912
  144. data/vendor/isotree/src/dist.cpp +0 -749
  145. data/vendor/isotree/src/extended.cpp +0 -790
  146. data/vendor/isotree/src/fit_model.cpp +0 -1090
  147. data/vendor/isotree/src/helpers_iforest.cpp +0 -324
  148. data/vendor/isotree/src/isoforest.cpp +0 -771
  149. data/vendor/isotree/src/mult.cpp +0 -607
  150. data/vendor/isotree/src/predict.cpp +0 -853
  151. data/vendor/isotree/src/utils.cpp +0 -1566
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1876c50ef4d9bbd7fc7898c222b9f10b8d287a38cf9d37af53e9841f63494299
4
- data.tar.gz: ea7b60ae9683498df1910fb3f4774ce8c94daf9fe2a6e23ffd87a9d488bcc5ce
3
+ metadata.gz: a2ed0745b24db6c7d55b86cbf60394de590225509f3be5dbc79934606cd402cb
4
+ data.tar.gz: d1601d80e3878cc678544a245defd04831e6f11f4f374ecff6d1916e1618af6e
5
5
  SHA512:
6
- metadata.gz: 3d0f6d95cac8b7dd457512c0ce41c3ae286cf8f344ccc8a47dee1c00104657b3d2d9c97be1c1211fcb253065ba7fcf3d3b955a89dd9bac8b2b6e5e35516c7b7d
7
- data.tar.gz: 476d7c8ffe5c252ba94ecc7a68e59fc3afbbb2a732fd9e3127e032e543a74957fcdef6173b6225dfee7370dbb86625b61b02d16a68cc7257be8f7cbdae87a581
6
+ metadata.gz: b461f501114e56810ed6075fba28ae79a611763838147670f357a8a9645000f139072bf3bb3a2120cc09ad302df429d39f6b67522b1c2bc18862e75a65266eb4
7
+ data.tar.gz: 57fc3f334fac2a6918ef95fb4827a67543a2aa5c1cdfcd86225c3411a409348593764109bcbc5155ddd6e49217f0764686847265550d145e72007556ae7fd00e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## 0.3.0 (2022-06-13)
2
+
3
+ - Updated IsoTree to 0.5.16
4
+ - Updated serialization format (exported models must be recreated)
5
+ - Dropped support for Ruby < 2.7
6
+ - Dropped support for Windows
7
+
1
8
  ## 0.2.2 (2022-06-12)
2
9
 
3
10
  - Fixed segfault when data is smaller than sample size
@@ -13,7 +20,7 @@
13
20
 
14
21
  ## 0.1.5 (2021-03-14)
15
22
 
16
- - Updated Isotree to 0.1.25
23
+ - Updated IsoTree to 0.1.25
17
24
  - Added support for exporting and importing models
18
25
 
19
26
  ## 0.1.4 (2020-08-22)
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  BSD 2-Clause License
2
2
 
3
- Copyright (c) 2020, David Cortes
4
- Copyright (c) 2020-2021, Andrew Kane
3
+ Copyright (c) 2020-2022, David Cortes
4
+ Copyright (c) 2020-2022, Andrew Kane
5
5
  All rights reserved.
6
6
 
7
7
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -16,6 +16,8 @@ Add this line to your application’s Gemfile:
16
16
  gem "isotree"
17
17
  ```
18
18
 
19
+ Windows is not supported at the moment
20
+
19
21
  ## Getting Started
20
22
 
21
23
  Prep your data
@@ -24,7 +26,8 @@ Prep your data
24
26
  data = [
25
27
  {department: "Books", sale: false, price: 2.50},
26
28
  {department: "Books", sale: true, price: 3.00},
27
- {department: "Movies", sale: false, price: 5.00}
29
+ {department: "Movies", sale: false, price: 5.00},
30
+ # ...
28
31
  ]
29
32
  ```
30
33
 
@@ -61,28 +64,38 @@ Pass parameters - default values below
61
64
 
62
65
  ```ruby
63
66
  IsoTree::IsolationForest.new(
64
- sample_size: nil,
67
+ sample_size: "auto",
65
68
  ntrees: 500,
66
69
  ndim: 3,
67
- ntry: 3,
68
- prob_pick_avg_gain: 0,
69
- prob_pick_pooled_gain: 0,
70
- prob_split_avg_gain: 0,
71
- prob_split_pooled_gain: 0,
72
- min_gain: 0,
73
- missing_action: "impute",
74
- new_categ_action: "smallest",
75
- categ_split_type: "subset",
70
+ ntry: 1,
71
+ max_depth: "auto",
72
+ ncols_per_tree: nil,
73
+ prob_pick_pooled_gain: 0.0,
74
+ prob_pick_avg_gain: 0.0,
75
+ prob_pick_full_gain: 0.0,
76
+ prob_pick_dens: 0.0,
77
+ prob_pick_col_by_range: 0.0,
78
+ prob_pick_col_by_var: 0.0,
79
+ prob_pick_col_by_kurt: 0.0,
80
+ min_gain: 0.0,
81
+ missing_action: "auto",
82
+ new_categ_action: "auto",
83
+ categ_split_type: "auto",
76
84
  all_perm: false,
77
85
  coef_by_prop: false,
78
86
  sample_with_replacement: false,
79
- penalize_range: true,
87
+ penalize_range: false,
88
+ standardize_data: true,
89
+ scoring_metric: "depth",
90
+ fast_bratio: true,
80
91
  weigh_by_kurtosis: false,
81
- coefs: "normal",
92
+ coefs: "uniform",
93
+ assume_full_distr: true,
82
94
  min_imp_obs: 3,
83
95
  depth_imp: "higher",
84
96
  weigh_imp_rows: "inverse",
85
97
  random_seed: 1,
98
+ use_long_double: false,
86
99
  nthreads: -1
87
100
  )
88
101
  ```
@@ -134,7 +147,6 @@ Check out [Trove](https://github.com/ankane/trove) for deploying models.
134
147
 
135
148
  ```sh
136
149
  trove push model.bin
137
- trove push model.bin.metadata
138
150
  ```
139
151
 
140
152
  ## Reference
@@ -145,6 +157,12 @@ Get the average isolation depth
145
157
  model.predict(data, output: "avg_depth")
146
158
  ```
147
159
 
160
+ ## Upgrading
161
+
162
+ ### 0.3.0
163
+
164
+ This version uses IsoTree’s new serialization format. Exported models must be recreated.
165
+
148
166
  ## History
149
167
 
150
168
  View the [changelog](https://github.com/ankane/isotree-ruby/blob/master/CHANGELOG.md)
data/ext/isotree/ext.cpp CHANGED
@@ -1,3 +1,8 @@
1
+ // stdlib
2
+ #include <cmath>
3
+ #include <fstream>
4
+ #include <iostream>
5
+
1
6
  // isotree
2
7
  #include <isotree.hpp>
3
8
 
@@ -22,7 +27,7 @@ namespace Rice::detail
22
27
  NewCategAction convert(VALUE x)
23
28
  {
24
29
  auto value = Object(x).to_s().str();
25
- if (value == "weighted") return Weighted;
30
+ if (value == "weighted" || value == "impute") return Weighted;
26
31
  if (value == "smallest") return Smallest;
27
32
  if (value == "random") return Random;
28
33
  throw std::runtime_error("Unknown new categ action: " + value);
@@ -96,6 +101,24 @@ namespace Rice::detail
96
101
  throw std::runtime_error("Unknown weight imp rows: " + value);
97
102
  }
98
103
  };
104
+
105
+ template<>
106
+ class From_Ruby<ScoringMetric>
107
+ {
108
+ public:
109
+ ScoringMetric convert(VALUE x)
110
+ {
111
+ auto value = Object(x).to_s().str();
112
+ if (value == "depth") return Depth;
113
+ if (value == "adj_depth") return AdjDepth;
114
+ if (value == "density") return Density;
115
+ if (value == "adj_density") return AdjDensity;
116
+ if (value == "boxed_density") return BoxedDensity;
117
+ if (value == "boxed_density2") return BoxedDensity2;
118
+ if (value == "boxed_ratio") return BoxedRatio;
119
+ throw std::runtime_error("Unknown scoring metric: " + value);
120
+ }
121
+ };
99
122
  }
100
123
 
101
124
  extern "C"
@@ -118,20 +141,20 @@ void Init_ext()
118
141
  size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
119
142
  size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
120
143
 
121
- double *restrict numeric_data = NULL;
144
+ real_t* numeric_data = NULL;
122
145
  if (ncols_numeric > 0) {
123
146
  numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
124
147
  }
125
148
 
126
- int *restrict categorical_data = NULL;
127
- int *restrict ncat = NULL;
149
+ int* categorical_data = NULL;
150
+ int* ncat = NULL;
128
151
  if (ncols_categ > 0) {
129
152
  categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
130
153
  ncat = (int*) options.get<String, Symbol>("ncat").c_str();
131
154
  }
132
155
 
133
156
  // not used (sparse matrices)
134
- double* Xc = NULL;
157
+ real_t* Xc = NULL;
135
158
  sparse_ix* Xc_ind = NULL;
136
159
  sparse_ix* Xc_indptr = NULL;
137
160
 
@@ -142,9 +165,7 @@ void Init_ext()
142
165
  size_t ntrees = options.get<size_t, Symbol>("ntrees");
143
166
  size_t ntry = options.get<size_t, Symbol>("ntry");
144
167
  double prob_pick_by_gain_avg = options.get<double, Symbol>("prob_pick_avg_gain");
145
- double prob_split_by_gain_avg = options.get<double, Symbol>("prob_split_avg_gain");
146
168
  double prob_pick_by_gain_pl = options.get<double, Symbol>("prob_pick_pooled_gain");
147
- double prob_split_by_gain_pl = options.get<double, Symbol>("prob_split_pooled_gain");
148
169
  double min_gain = options.get<double, Symbol>("min_gain");
149
170
  MissingAction missing_action = options.get<MissingAction, Symbol>("missing_action");
150
171
  CategSplit cat_split_type = options.get<CategSplit, Symbol>("categ_split_type");
@@ -159,21 +180,31 @@ void Init_ext()
159
180
  UseDepthImp depth_imp = options.get<UseDepthImp, Symbol>("depth_imp");
160
181
  WeighImpRows weigh_imp_rows = options.get<WeighImpRows, Symbol>("weigh_imp_rows");
161
182
  uint64_t random_seed = options.get<uint64_t, Symbol>("random_seed");
183
+ bool use_long_double = options.get<bool, Symbol>("use_long_double");
162
184
  int nthreads = options.get<int, Symbol>("nthreads");
163
185
 
164
186
  // TODO options
165
187
  double* sample_weights = NULL;
166
- bool weight_as_sample = false;
167
- size_t max_depth = 0;
168
- bool limit_depth = true;
188
+ bool weight_as_sample = options.get<bool, Symbol>("weights_as_sample_prob");
189
+ size_t max_depth = options.get<size_t, Symbol>("max_depth");
190
+ bool limit_depth = options.get<bool, Symbol>("limit_depth");
169
191
  bool standardize_dist = false;
170
192
  double* tmat = NULL;
171
193
  double* output_depths = NULL;
172
194
  bool standardize_depth = false;
173
- double* col_weights = NULL;
174
- Imputer *imputer = NULL;
195
+ real_t* col_weights = NULL;
196
+ Imputer* imputer = NULL;
175
197
  bool impute_at_fit = false;
176
- bool handle_interrupt = false;
198
+
199
+ int ncols_per_tree = options.get<int, Symbol>("ncols_per_tree");
200
+ bool standardize_data = options.get<bool, Symbol>("standardize_data");
201
+ ScoringMetric scoring_metric = options.get<ScoringMetric, Symbol>("scoring_metric");
202
+ bool fast_bratio = options.get<bool, Symbol>("fast_bratio");
203
+ double prob_pick_by_full_gain = options.get<double, Symbol>("prob_pick_full_gain");
204
+ double prob_pick_by_dens = options.get<double, Symbol>("prob_pick_dens");
205
+ double prob_pick_col_by_range = options.get<double, Symbol>("prob_pick_col_by_range");
206
+ double prob_pick_col_by_var = options.get<double, Symbol>("prob_pick_col_by_var");
207
+ double prob_pick_col_by_kurt = options.get<double, Symbol>("prob_pick_col_by_kurt");
177
208
 
178
209
  fit_iforest(
179
210
  NULL,
@@ -197,18 +228,25 @@ void Init_ext()
197
228
  sample_size,
198
229
  ntrees,
199
230
  max_depth,
231
+ ncols_per_tree,
200
232
  limit_depth,
201
233
  penalize_range,
234
+ standardize_data,
235
+ scoring_metric,
236
+ fast_bratio,
202
237
  standardize_dist,
203
238
  tmat,
204
239
  output_depths,
205
240
  standardize_depth,
206
241
  col_weights,
207
242
  weigh_by_kurt,
208
- prob_pick_by_gain_avg,
209
- prob_split_by_gain_avg,
210
243
  prob_pick_by_gain_pl,
211
- prob_split_by_gain_pl,
244
+ prob_pick_by_gain_avg,
245
+ prob_pick_by_full_gain,
246
+ prob_pick_by_dens,
247
+ prob_pick_col_by_range,
248
+ prob_pick_col_by_var,
249
+ prob_pick_col_by_kurt,
212
250
  min_gain,
213
251
  missing_action,
214
252
  cat_split_type,
@@ -220,7 +258,7 @@ void Init_ext()
220
258
  weigh_imp_rows,
221
259
  impute_at_fit,
222
260
  random_seed,
223
- handle_interrupt,
261
+ use_long_double,
224
262
  nthreads
225
263
  );
226
264
 
@@ -234,21 +272,21 @@ void Init_ext()
234
272
  size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
235
273
  size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
236
274
 
237
- double *restrict numeric_data = NULL;
275
+ real_t* numeric_data = NULL;
238
276
  if (ncols_numeric > 0) {
239
277
  numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
240
278
  }
241
279
 
242
- int *restrict categorical_data = NULL;
280
+ int* categorical_data = NULL;
243
281
  if (ncols_categ > 0) {
244
282
  categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
245
283
  }
246
284
 
247
285
  // not used (sparse matrices)
248
- double* Xc = NULL;
286
+ real_t* Xc = NULL;
249
287
  sparse_ix* Xc_ind = NULL;
250
288
  sparse_ix* Xc_indptr = NULL;
251
- double* Xr = NULL;
289
+ real_t* Xr = NULL;
252
290
  sparse_ix* Xr_ind = NULL;
253
291
  sparse_ix* Xr_indptr = NULL;
254
292
 
@@ -257,10 +295,17 @@ void Init_ext()
257
295
  bool standardize = options.get<bool, Symbol>("standardize");
258
296
  std::vector<double> outlier_scores(nrows);
259
297
  sparse_ix* tree_num = NULL;
298
+ bool is_col_major = true;
299
+ size_t ld_numeric = 0;
300
+ size_t ld_categ = 0;
301
+ double* per_tree_depths = NULL;
260
302
 
261
303
  predict_iforest(
262
304
  numeric_data,
263
305
  categorical_data,
306
+ is_col_major,
307
+ ld_numeric,
308
+ ld_categ,
264
309
  Xc,
265
310
  Xc_ind,
266
311
  Xc_indptr,
@@ -273,7 +318,9 @@ void Init_ext()
273
318
  NULL,
274
319
  &iso,
275
320
  outlier_scores.data(),
276
- tree_num
321
+ tree_num,
322
+ per_tree_depths,
323
+ NULL
277
324
  );
278
325
 
279
326
  Array ret;
@@ -283,27 +330,93 @@ void Init_ext()
283
330
  return ret;
284
331
  })
285
332
  .define_singleton_function(
286
- "serialize_ext_isoforest",
287
- [](ExtIsoForest& iso, String path) {
333
+ "serialize_combined",
334
+ [](ExtIsoForest& iso, String path, String metadata) {
288
335
  #ifdef _MSC_VER
289
336
  // TODO convert to wchar_t
290
337
  throw std::runtime_error("Not supported on Windows yet");
291
338
  #else
292
- serialize_ext_isoforest(iso, path.c_str());
339
+ std::ofstream file;
340
+ file.open(path.c_str());
341
+ serialize_combined(
342
+ NULL,
343
+ &iso,
344
+ NULL,
345
+ NULL,
346
+ metadata.c_str(),
347
+ // returns bytesize (RSTRING_LEN)
348
+ metadata.length(),
349
+ file
350
+ );
351
+ file.close();
293
352
  #endif
294
353
  })
295
354
  .define_singleton_function(
296
- "deserialize_ext_isoforest",
355
+ "deserialize_combined",
297
356
  [](String path) {
298
- ExtIsoForest iso;
299
-
300
357
  #ifdef _MSC_VER
301
358
  // TODO convert to wchar_t
302
359
  throw std::runtime_error("Not supported on Windows yet");
303
360
  #else
304
- deserialize_ext_isoforest(iso, path.c_str());
305
- #endif
361
+ Array ret;
306
362
 
307
- return iso;
363
+ std::ifstream file;
364
+ file.open(path.c_str(), std::ios_base::in | std::ios_base::binary);
365
+ if (!file) {
366
+ throw std::runtime_error("Cannot open file");
367
+ }
368
+
369
+ bool is_isotree_model = false;
370
+ bool is_compatible = false;
371
+ bool has_combined_objects = false;
372
+ bool has_IsoForest = false;
373
+ bool has_ExtIsoForest = false;
374
+ bool has_Imputer = false;
375
+ bool has_Indexer = false;
376
+ bool has_metadata = false;
377
+ size_t size_metadata = 0;
378
+
379
+ inspect_serialized_object(
380
+ file,
381
+ is_isotree_model,
382
+ is_compatible,
383
+ has_combined_objects,
384
+ has_IsoForest,
385
+ has_ExtIsoForest,
386
+ has_Imputer,
387
+ has_Indexer,
388
+ has_metadata,
389
+ size_metadata
390
+ );
391
+
392
+ if (!is_isotree_model || !has_combined_objects) {
393
+ throw std::runtime_error("Input file is not a serialized isotree model");
394
+ }
395
+ if (!is_compatible) {
396
+ throw std::runtime_error("Model file format is incompatible");
397
+ }
398
+ if (size_metadata == 0) {
399
+ throw std::runtime_error("Input file does not contain metadata");
400
+ }
401
+
402
+ IsoForest model = IsoForest();
403
+ ExtIsoForest model_ext = ExtIsoForest();
404
+ Imputer imputer = Imputer();
405
+ TreesIndexer indexer = TreesIndexer();
406
+ char *optional_metadata = (char*) calloc(size_metadata, sizeof(char));
407
+ if (optional_metadata == NULL) {
408
+ throw std::runtime_error("Cannot allocate memory");
409
+ }
410
+
411
+ deserialize_combined(file, &model, &model_ext, &imputer, &indexer, optional_metadata);
412
+ file.close();
413
+
414
+ ret.push(Object(Rice::detail::To_Ruby<ExtIsoForest>().convert(model_ext)));
415
+ ret.push(String(std::string(optional_metadata, size_metadata)));
416
+
417
+ free(optional_metadata);
418
+
419
+ return ret;
420
+ #endif
308
421
  });
309
422
  }
@@ -1,6 +1,6 @@
1
1
  require "mkmf-rice"
2
2
 
3
- $CXXFLAGS += " -std=c++17 $(optflags) -D_USE_MERSENNE_TWISTER -D_ENABLE_CEREAL"
3
+ $CXXFLAGS += " -std=c++17 $(optflags) -D_USE_XOSHIRO -DSUPPORTS_RESTRICT=1 -D_USE_ROBIN_MAP -DDONT_THROW_ON_INTERRUPT"
4
4
 
5
5
  apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
6
6
 
@@ -11,12 +11,12 @@ if have_library("omp") || have_library("gomp")
11
11
  end
12
12
 
13
13
  ext = File.expand_path(".", __dir__)
14
- isotree = File.expand_path("../../vendor/isotree/src", __dir__)
15
- cereal = File.expand_path("../../vendor/cereal/include", __dir__)
14
+ isotree_src = File.expand_path("../../vendor/isotree/src", __dir__)
15
+ isotree_inc = File.expand_path("../../vendor/isotree/include", __dir__)
16
16
 
17
- exclude = %w(Rwrapper.cpp RcppExports.cpp)
18
- $srcs = Dir["{#{ext},#{isotree}}/*.{cc,cpp}"].reject { |f| exclude.include?(File.basename(f)) }
19
- $INCFLAGS << " -I#{isotree} -I#{cereal}"
20
- $VPATH << isotree
17
+ exclude = %w(c_interface.cpp Rwrapper.cpp RcppExports.cpp)
18
+ $srcs = Dir["{#{ext},#{isotree_src}}/*.{cc,cpp}"].reject { |f| exclude.include?(File.basename(f)) }
19
+ $INCFLAGS << " -I#{isotree_inc}"
20
+ $VPATH << isotree_src
21
21
 
22
22
  create_makefile("isotree/ext")