isotree 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -1
  3. data/LICENSE.txt +2 -2
  4. data/README.md +32 -14
  5. data/ext/isotree/ext.cpp +144 -31
  6. data/ext/isotree/extconf.rb +7 -7
  7. data/lib/isotree/isolation_forest.rb +110 -30
  8. data/lib/isotree/version.rb +1 -1
  9. data/vendor/isotree/LICENSE +1 -1
  10. data/vendor/isotree/README.md +165 -27
  11. data/vendor/isotree/include/isotree.hpp +2111 -0
  12. data/vendor/isotree/include/isotree_oop.hpp +394 -0
  13. data/vendor/isotree/inst/COPYRIGHTS +62 -0
  14. data/vendor/isotree/src/RcppExports.cpp +525 -52
  15. data/vendor/isotree/src/Rwrapper.cpp +1931 -268
  16. data/vendor/isotree/src/c_interface.cpp +953 -0
  17. data/vendor/isotree/src/crit.hpp +4232 -0
  18. data/vendor/isotree/src/dist.hpp +1886 -0
  19. data/vendor/isotree/src/exp_depth_table.hpp +134 -0
  20. data/vendor/isotree/src/extended.hpp +1444 -0
  21. data/vendor/isotree/src/external_facing_generic.hpp +399 -0
  22. data/vendor/isotree/src/fit_model.hpp +2401 -0
  23. data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
  24. data/vendor/isotree/src/helpers_iforest.hpp +813 -0
  25. data/vendor/isotree/src/{impute.cpp → impute.hpp} +353 -122
  26. data/vendor/isotree/src/indexer.cpp +515 -0
  27. data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
  28. data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
  29. data/vendor/isotree/src/isoforest.hpp +1659 -0
  30. data/vendor/isotree/src/isotree.hpp +1804 -392
  31. data/vendor/isotree/src/isotree_exportable.hpp +99 -0
  32. data/vendor/isotree/src/merge_models.cpp +159 -16
  33. data/vendor/isotree/src/mult.hpp +1321 -0
  34. data/vendor/isotree/src/oop_interface.cpp +842 -0
  35. data/vendor/isotree/src/oop_interface.hpp +278 -0
  36. data/vendor/isotree/src/other_helpers.hpp +219 -0
  37. data/vendor/isotree/src/predict.hpp +1932 -0
  38. data/vendor/isotree/src/python_helpers.hpp +134 -0
  39. data/vendor/isotree/src/ref_indexer.hpp +154 -0
  40. data/vendor/isotree/src/robinmap/LICENSE +21 -0
  41. data/vendor/isotree/src/robinmap/README.md +483 -0
  42. data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
  43. data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1620 -0
  44. data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
  45. data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
  46. data/vendor/isotree/src/serialize.cpp +4300 -139
  47. data/vendor/isotree/src/sql.cpp +141 -59
  48. data/vendor/isotree/src/subset_models.cpp +174 -0
  49. data/vendor/isotree/src/utils.hpp +3808 -0
  50. data/vendor/isotree/src/xoshiro.hpp +467 -0
  51. data/vendor/isotree/src/ziggurat.hpp +405 -0
  52. metadata +38 -104
  53. data/vendor/cereal/LICENSE +0 -24
  54. data/vendor/cereal/README.md +0 -85
  55. data/vendor/cereal/include/cereal/access.hpp +0 -351
  56. data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
  57. data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
  58. data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
  59. data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
  60. data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
  61. data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
  62. data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
  63. data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
  64. data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
  65. data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
  66. data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
  67. data/vendor/cereal/include/cereal/details/util.hpp +0 -84
  68. data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
  69. data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
  70. data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
  71. data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
  72. data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
  73. data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
  74. data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
  75. data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
  76. data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
  77. data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
  78. data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
  79. data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
  80. data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
  81. data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
  82. data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
  83. data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
  84. data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
  85. data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
  86. data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
  87. data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
  88. data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
  89. data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
  90. data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
  91. data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
  92. data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
  93. data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
  94. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
  95. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
  96. data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
  97. data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
  98. data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
  99. data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
  100. data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
  101. data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
  102. data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
  103. data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
  104. data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
  105. data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
  106. data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
  107. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
  108. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
  109. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
  110. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
  111. data/vendor/cereal/include/cereal/macros.hpp +0 -154
  112. data/vendor/cereal/include/cereal/specialize.hpp +0 -139
  113. data/vendor/cereal/include/cereal/types/array.hpp +0 -79
  114. data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
  115. data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
  116. data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
  117. data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
  118. data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
  119. data/vendor/cereal/include/cereal/types/common.hpp +0 -129
  120. data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
  121. data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
  122. data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
  123. data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
  124. data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
  125. data/vendor/cereal/include/cereal/types/list.hpp +0 -62
  126. data/vendor/cereal/include/cereal/types/map.hpp +0 -36
  127. data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
  128. data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
  129. data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
  130. data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
  131. data/vendor/cereal/include/cereal/types/set.hpp +0 -103
  132. data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
  133. data/vendor/cereal/include/cereal/types/string.hpp +0 -61
  134. data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
  135. data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
  136. data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
  137. data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
  138. data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
  139. data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
  140. data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
  141. data/vendor/cereal/include/cereal/version.hpp +0 -52
  142. data/vendor/isotree/src/Makevars +0 -4
  143. data/vendor/isotree/src/crit.cpp +0 -912
  144. data/vendor/isotree/src/dist.cpp +0 -749
  145. data/vendor/isotree/src/extended.cpp +0 -790
  146. data/vendor/isotree/src/fit_model.cpp +0 -1090
  147. data/vendor/isotree/src/helpers_iforest.cpp +0 -324
  148. data/vendor/isotree/src/isoforest.cpp +0 -771
  149. data/vendor/isotree/src/mult.cpp +0 -607
  150. data/vendor/isotree/src/predict.cpp +0 -853
  151. data/vendor/isotree/src/utils.cpp +0 -1566
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1876c50ef4d9bbd7fc7898c222b9f10b8d287a38cf9d37af53e9841f63494299
4
- data.tar.gz: ea7b60ae9683498df1910fb3f4774ce8c94daf9fe2a6e23ffd87a9d488bcc5ce
3
+ metadata.gz: a2ed0745b24db6c7d55b86cbf60394de590225509f3be5dbc79934606cd402cb
4
+ data.tar.gz: d1601d80e3878cc678544a245defd04831e6f11f4f374ecff6d1916e1618af6e
5
5
  SHA512:
6
- metadata.gz: 3d0f6d95cac8b7dd457512c0ce41c3ae286cf8f344ccc8a47dee1c00104657b3d2d9c97be1c1211fcb253065ba7fcf3d3b955a89dd9bac8b2b6e5e35516c7b7d
7
- data.tar.gz: 476d7c8ffe5c252ba94ecc7a68e59fc3afbbb2a732fd9e3127e032e543a74957fcdef6173b6225dfee7370dbb86625b61b02d16a68cc7257be8f7cbdae87a581
6
+ metadata.gz: b461f501114e56810ed6075fba28ae79a611763838147670f357a8a9645000f139072bf3bb3a2120cc09ad302df429d39f6b67522b1c2bc18862e75a65266eb4
7
+ data.tar.gz: 57fc3f334fac2a6918ef95fb4827a67543a2aa5c1cdfcd86225c3411a409348593764109bcbc5155ddd6e49217f0764686847265550d145e72007556ae7fd00e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## 0.3.0 (2022-06-13)
2
+
3
+ - Updated IsoTree to 0.5.16
4
+ - Updated serialization format (exported models must be recreated)
5
+ - Dropped support for Ruby < 2.7
6
+ - Dropped support for Windows
7
+
1
8
  ## 0.2.2 (2022-06-12)
2
9
 
3
10
  - Fixed segfault when data is smaller than sample size
@@ -13,7 +20,7 @@
13
20
 
14
21
  ## 0.1.5 (2021-03-14)
15
22
 
16
- - Updated Isotree to 0.1.25
23
+ - Updated IsoTree to 0.1.25
17
24
  - Added support for exporting and importing models
18
25
 
19
26
  ## 0.1.4 (2020-08-22)
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  BSD 2-Clause License
2
2
 
3
- Copyright (c) 2020, David Cortes
4
- Copyright (c) 2020-2021, Andrew Kane
3
+ Copyright (c) 2020-2022, David Cortes
4
+ Copyright (c) 2020-2022, Andrew Kane
5
5
  All rights reserved.
6
6
 
7
7
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -16,6 +16,8 @@ Add this line to your application’s Gemfile:
16
16
  gem "isotree"
17
17
  ```
18
18
 
19
+ Windows is not supported at the moment
20
+
19
21
  ## Getting Started
20
22
 
21
23
  Prep your data
@@ -24,7 +26,8 @@ Prep your data
24
26
  data = [
25
27
  {department: "Books", sale: false, price: 2.50},
26
28
  {department: "Books", sale: true, price: 3.00},
27
- {department: "Movies", sale: false, price: 5.00}
29
+ {department: "Movies", sale: false, price: 5.00},
30
+ # ...
28
31
  ]
29
32
  ```
30
33
 
@@ -61,28 +64,38 @@ Pass parameters - default values below
61
64
 
62
65
  ```ruby
63
66
  IsoTree::IsolationForest.new(
64
- sample_size: nil,
67
+ sample_size: "auto",
65
68
  ntrees: 500,
66
69
  ndim: 3,
67
- ntry: 3,
68
- prob_pick_avg_gain: 0,
69
- prob_pick_pooled_gain: 0,
70
- prob_split_avg_gain: 0,
71
- prob_split_pooled_gain: 0,
72
- min_gain: 0,
73
- missing_action: "impute",
74
- new_categ_action: "smallest",
75
- categ_split_type: "subset",
70
+ ntry: 1,
71
+ max_depth: "auto",
72
+ ncols_per_tree: nil,
73
+ prob_pick_pooled_gain: 0.0,
74
+ prob_pick_avg_gain: 0.0,
75
+ prob_pick_full_gain: 0.0,
76
+ prob_pick_dens: 0.0,
77
+ prob_pick_col_by_range: 0.0,
78
+ prob_pick_col_by_var: 0.0,
79
+ prob_pick_col_by_kurt: 0.0,
80
+ min_gain: 0.0,
81
+ missing_action: "auto",
82
+ new_categ_action: "auto",
83
+ categ_split_type: "auto",
76
84
  all_perm: false,
77
85
  coef_by_prop: false,
78
86
  sample_with_replacement: false,
79
- penalize_range: true,
87
+ penalize_range: false,
88
+ standardize_data: true,
89
+ scoring_metric: "depth",
90
+ fast_bratio: true,
80
91
  weigh_by_kurtosis: false,
81
- coefs: "normal",
92
+ coefs: "uniform",
93
+ assume_full_distr: true,
82
94
  min_imp_obs: 3,
83
95
  depth_imp: "higher",
84
96
  weigh_imp_rows: "inverse",
85
97
  random_seed: 1,
98
+ use_long_double: false,
86
99
  nthreads: -1
87
100
  )
88
101
  ```
@@ -134,7 +147,6 @@ Check out [Trove](https://github.com/ankane/trove) for deploying models.
134
147
 
135
148
  ```sh
136
149
  trove push model.bin
137
- trove push model.bin.metadata
138
150
  ```
139
151
 
140
152
  ## Reference
@@ -145,6 +157,12 @@ Get the average isolation depth
145
157
  model.predict(data, output: "avg_depth")
146
158
  ```
147
159
 
160
+ ## Upgrading
161
+
162
+ ### 0.3.0
163
+
164
+ This version uses IsoTree’s new serialization format. Exported models must be recreated.
165
+
148
166
  ## History
149
167
 
150
168
  View the [changelog](https://github.com/ankane/isotree-ruby/blob/master/CHANGELOG.md)
data/ext/isotree/ext.cpp CHANGED
@@ -1,3 +1,8 @@
1
+ // stdlib
2
+ #include <cmath>
3
+ #include <fstream>
4
+ #include <iostream>
5
+
1
6
  // isotree
2
7
  #include <isotree.hpp>
3
8
 
@@ -22,7 +27,7 @@ namespace Rice::detail
22
27
  NewCategAction convert(VALUE x)
23
28
  {
24
29
  auto value = Object(x).to_s().str();
25
- if (value == "weighted") return Weighted;
30
+ if (value == "weighted" || value == "impute") return Weighted;
26
31
  if (value == "smallest") return Smallest;
27
32
  if (value == "random") return Random;
28
33
  throw std::runtime_error("Unknown new categ action: " + value);
@@ -96,6 +101,24 @@ namespace Rice::detail
96
101
  throw std::runtime_error("Unknown weight imp rows: " + value);
97
102
  }
98
103
  };
104
+
105
+ template<>
106
+ class From_Ruby<ScoringMetric>
107
+ {
108
+ public:
109
+ ScoringMetric convert(VALUE x)
110
+ {
111
+ auto value = Object(x).to_s().str();
112
+ if (value == "depth") return Depth;
113
+ if (value == "adj_depth") return AdjDepth;
114
+ if (value == "density") return Density;
115
+ if (value == "adj_density") return AdjDensity;
116
+ if (value == "boxed_density") return BoxedDensity;
117
+ if (value == "boxed_density2") return BoxedDensity2;
118
+ if (value == "boxed_ratio") return BoxedRatio;
119
+ throw std::runtime_error("Unknown scoring metric: " + value);
120
+ }
121
+ };
99
122
  }
100
123
 
101
124
  extern "C"
@@ -118,20 +141,20 @@ void Init_ext()
118
141
  size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
119
142
  size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
120
143
 
121
- double *restrict numeric_data = NULL;
144
+ real_t* numeric_data = NULL;
122
145
  if (ncols_numeric > 0) {
123
146
  numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
124
147
  }
125
148
 
126
- int *restrict categorical_data = NULL;
127
- int *restrict ncat = NULL;
149
+ int* categorical_data = NULL;
150
+ int* ncat = NULL;
128
151
  if (ncols_categ > 0) {
129
152
  categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
130
153
  ncat = (int*) options.get<String, Symbol>("ncat").c_str();
131
154
  }
132
155
 
133
156
  // not used (sparse matrices)
134
- double* Xc = NULL;
157
+ real_t* Xc = NULL;
135
158
  sparse_ix* Xc_ind = NULL;
136
159
  sparse_ix* Xc_indptr = NULL;
137
160
 
@@ -142,9 +165,7 @@ void Init_ext()
142
165
  size_t ntrees = options.get<size_t, Symbol>("ntrees");
143
166
  size_t ntry = options.get<size_t, Symbol>("ntry");
144
167
  double prob_pick_by_gain_avg = options.get<double, Symbol>("prob_pick_avg_gain");
145
- double prob_split_by_gain_avg = options.get<double, Symbol>("prob_split_avg_gain");
146
168
  double prob_pick_by_gain_pl = options.get<double, Symbol>("prob_pick_pooled_gain");
147
- double prob_split_by_gain_pl = options.get<double, Symbol>("prob_split_pooled_gain");
148
169
  double min_gain = options.get<double, Symbol>("min_gain");
149
170
  MissingAction missing_action = options.get<MissingAction, Symbol>("missing_action");
150
171
  CategSplit cat_split_type = options.get<CategSplit, Symbol>("categ_split_type");
@@ -159,21 +180,31 @@ void Init_ext()
159
180
  UseDepthImp depth_imp = options.get<UseDepthImp, Symbol>("depth_imp");
160
181
  WeighImpRows weigh_imp_rows = options.get<WeighImpRows, Symbol>("weigh_imp_rows");
161
182
  uint64_t random_seed = options.get<uint64_t, Symbol>("random_seed");
183
+ bool use_long_double = options.get<bool, Symbol>("use_long_double");
162
184
  int nthreads = options.get<int, Symbol>("nthreads");
163
185
 
164
186
  // TODO options
165
187
  double* sample_weights = NULL;
166
- bool weight_as_sample = false;
167
- size_t max_depth = 0;
168
- bool limit_depth = true;
188
+ bool weight_as_sample = options.get<bool, Symbol>("weights_as_sample_prob");
189
+ size_t max_depth = options.get<size_t, Symbol>("max_depth");
190
+ bool limit_depth = options.get<bool, Symbol>("limit_depth");
169
191
  bool standardize_dist = false;
170
192
  double* tmat = NULL;
171
193
  double* output_depths = NULL;
172
194
  bool standardize_depth = false;
173
- double* col_weights = NULL;
174
- Imputer *imputer = NULL;
195
+ real_t* col_weights = NULL;
196
+ Imputer* imputer = NULL;
175
197
  bool impute_at_fit = false;
176
- bool handle_interrupt = false;
198
+
199
+ int ncols_per_tree = options.get<int, Symbol>("ncols_per_tree");
200
+ bool standardize_data = options.get<bool, Symbol>("standardize_data");
201
+ ScoringMetric scoring_metric = options.get<ScoringMetric, Symbol>("scoring_metric");
202
+ bool fast_bratio = options.get<bool, Symbol>("fast_bratio");
203
+ double prob_pick_by_full_gain = options.get<double, Symbol>("prob_pick_full_gain");
204
+ double prob_pick_by_dens = options.get<double, Symbol>("prob_pick_dens");
205
+ double prob_pick_col_by_range = options.get<double, Symbol>("prob_pick_col_by_range");
206
+ double prob_pick_col_by_var = options.get<double, Symbol>("prob_pick_col_by_var");
207
+ double prob_pick_col_by_kurt = options.get<double, Symbol>("prob_pick_col_by_kurt");
177
208
 
178
209
  fit_iforest(
179
210
  NULL,
@@ -197,18 +228,25 @@ void Init_ext()
197
228
  sample_size,
198
229
  ntrees,
199
230
  max_depth,
231
+ ncols_per_tree,
200
232
  limit_depth,
201
233
  penalize_range,
234
+ standardize_data,
235
+ scoring_metric,
236
+ fast_bratio,
202
237
  standardize_dist,
203
238
  tmat,
204
239
  output_depths,
205
240
  standardize_depth,
206
241
  col_weights,
207
242
  weigh_by_kurt,
208
- prob_pick_by_gain_avg,
209
- prob_split_by_gain_avg,
210
243
  prob_pick_by_gain_pl,
211
- prob_split_by_gain_pl,
244
+ prob_pick_by_gain_avg,
245
+ prob_pick_by_full_gain,
246
+ prob_pick_by_dens,
247
+ prob_pick_col_by_range,
248
+ prob_pick_col_by_var,
249
+ prob_pick_col_by_kurt,
212
250
  min_gain,
213
251
  missing_action,
214
252
  cat_split_type,
@@ -220,7 +258,7 @@ void Init_ext()
220
258
  weigh_imp_rows,
221
259
  impute_at_fit,
222
260
  random_seed,
223
- handle_interrupt,
261
+ use_long_double,
224
262
  nthreads
225
263
  );
226
264
 
@@ -234,21 +272,21 @@ void Init_ext()
234
272
  size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
235
273
  size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
236
274
 
237
- double *restrict numeric_data = NULL;
275
+ real_t* numeric_data = NULL;
238
276
  if (ncols_numeric > 0) {
239
277
  numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
240
278
  }
241
279
 
242
- int *restrict categorical_data = NULL;
280
+ int* categorical_data = NULL;
243
281
  if (ncols_categ > 0) {
244
282
  categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
245
283
  }
246
284
 
247
285
  // not used (sparse matrices)
248
- double* Xc = NULL;
286
+ real_t* Xc = NULL;
249
287
  sparse_ix* Xc_ind = NULL;
250
288
  sparse_ix* Xc_indptr = NULL;
251
- double* Xr = NULL;
289
+ real_t* Xr = NULL;
252
290
  sparse_ix* Xr_ind = NULL;
253
291
  sparse_ix* Xr_indptr = NULL;
254
292
 
@@ -257,10 +295,17 @@ void Init_ext()
257
295
  bool standardize = options.get<bool, Symbol>("standardize");
258
296
  std::vector<double> outlier_scores(nrows);
259
297
  sparse_ix* tree_num = NULL;
298
+ bool is_col_major = true;
299
+ size_t ld_numeric = 0;
300
+ size_t ld_categ = 0;
301
+ double* per_tree_depths = NULL;
260
302
 
261
303
  predict_iforest(
262
304
  numeric_data,
263
305
  categorical_data,
306
+ is_col_major,
307
+ ld_numeric,
308
+ ld_categ,
264
309
  Xc,
265
310
  Xc_ind,
266
311
  Xc_indptr,
@@ -273,7 +318,9 @@ void Init_ext()
273
318
  NULL,
274
319
  &iso,
275
320
  outlier_scores.data(),
276
- tree_num
321
+ tree_num,
322
+ per_tree_depths,
323
+ NULL
277
324
  );
278
325
 
279
326
  Array ret;
@@ -283,27 +330,93 @@ void Init_ext()
283
330
  return ret;
284
331
  })
285
332
  .define_singleton_function(
286
- "serialize_ext_isoforest",
287
- [](ExtIsoForest& iso, String path) {
333
+ "serialize_combined",
334
+ [](ExtIsoForest& iso, String path, String metadata) {
288
335
  #ifdef _MSC_VER
289
336
  // TODO convert to wchar_t
290
337
  throw std::runtime_error("Not supported on Windows yet");
291
338
  #else
292
- serialize_ext_isoforest(iso, path.c_str());
339
+ std::ofstream file;
340
+ file.open(path.c_str());
341
+ serialize_combined(
342
+ NULL,
343
+ &iso,
344
+ NULL,
345
+ NULL,
346
+ metadata.c_str(),
347
+ // returns bytesize (RSTRING_LEN)
348
+ metadata.length(),
349
+ file
350
+ );
351
+ file.close();
293
352
  #endif
294
353
  })
295
354
  .define_singleton_function(
296
- "deserialize_ext_isoforest",
355
+ "deserialize_combined",
297
356
  [](String path) {
298
- ExtIsoForest iso;
299
-
300
357
  #ifdef _MSC_VER
301
358
  // TODO convert to wchar_t
302
359
  throw std::runtime_error("Not supported on Windows yet");
303
360
  #else
304
- deserialize_ext_isoforest(iso, path.c_str());
305
- #endif
361
+ Array ret;
306
362
 
307
- return iso;
363
+ std::ifstream file;
364
+ file.open(path.c_str(), std::ios_base::in | std::ios_base::binary);
365
+ if (!file) {
366
+ throw std::runtime_error("Cannot open file");
367
+ }
368
+
369
+ bool is_isotree_model = false;
370
+ bool is_compatible = false;
371
+ bool has_combined_objects = false;
372
+ bool has_IsoForest = false;
373
+ bool has_ExtIsoForest = false;
374
+ bool has_Imputer = false;
375
+ bool has_Indexer = false;
376
+ bool has_metadata = false;
377
+ size_t size_metadata = 0;
378
+
379
+ inspect_serialized_object(
380
+ file,
381
+ is_isotree_model,
382
+ is_compatible,
383
+ has_combined_objects,
384
+ has_IsoForest,
385
+ has_ExtIsoForest,
386
+ has_Imputer,
387
+ has_Indexer,
388
+ has_metadata,
389
+ size_metadata
390
+ );
391
+
392
+ if (!is_isotree_model || !has_combined_objects) {
393
+ throw std::runtime_error("Input file is not a serialized isotree model");
394
+ }
395
+ if (!is_compatible) {
396
+ throw std::runtime_error("Model file format is incompatible");
397
+ }
398
+ if (size_metadata == 0) {
399
+ throw std::runtime_error("Input file does not contain metadata");
400
+ }
401
+
402
+ IsoForest model = IsoForest();
403
+ ExtIsoForest model_ext = ExtIsoForest();
404
+ Imputer imputer = Imputer();
405
+ TreesIndexer indexer = TreesIndexer();
406
+ char *optional_metadata = (char*) calloc(size_metadata, sizeof(char));
407
+ if (optional_metadata == NULL) {
408
+ throw std::runtime_error("Cannot allocate memory");
409
+ }
410
+
411
+ deserialize_combined(file, &model, &model_ext, &imputer, &indexer, optional_metadata);
412
+ file.close();
413
+
414
+ ret.push(Object(Rice::detail::To_Ruby<ExtIsoForest>().convert(model_ext)));
415
+ ret.push(String(std::string(optional_metadata, size_metadata)));
416
+
417
+ free(optional_metadata);
418
+
419
+ return ret;
420
+ #endif
308
421
  });
309
422
  }
@@ -1,6 +1,6 @@
1
1
  require "mkmf-rice"
2
2
 
3
- $CXXFLAGS += " -std=c++17 $(optflags) -D_USE_MERSENNE_TWISTER -D_ENABLE_CEREAL"
3
+ $CXXFLAGS += " -std=c++17 $(optflags) -D_USE_XOSHIRO -DSUPPORTS_RESTRICT=1 -D_USE_ROBIN_MAP -DDONT_THROW_ON_INTERRUPT"
4
4
 
5
5
  apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
6
6
 
@@ -11,12 +11,12 @@ if have_library("omp") || have_library("gomp")
11
11
  end
12
12
 
13
13
  ext = File.expand_path(".", __dir__)
14
- isotree = File.expand_path("../../vendor/isotree/src", __dir__)
15
- cereal = File.expand_path("../../vendor/cereal/include", __dir__)
14
+ isotree_src = File.expand_path("../../vendor/isotree/src", __dir__)
15
+ isotree_inc = File.expand_path("../../vendor/isotree/include", __dir__)
16
16
 
17
- exclude = %w(Rwrapper.cpp RcppExports.cpp)
18
- $srcs = Dir["{#{ext},#{isotree}}/*.{cc,cpp}"].reject { |f| exclude.include?(File.basename(f)) }
19
- $INCFLAGS << " -I#{isotree} -I#{cereal}"
20
- $VPATH << isotree
17
+ exclude = %w(c_interface.cpp Rwrapper.cpp RcppExports.cpp)
18
+ $srcs = Dir["{#{ext},#{isotree_src}}/*.{cc,cpp}"].reject { |f| exclude.include?(File.basename(f)) }
19
+ $INCFLAGS << " -I#{isotree_inc}"
20
+ $VPATH << isotree_src
21
21
 
22
22
  create_makefile("isotree/ext")