isotree 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -1
  3. data/LICENSE.txt +2 -2
  4. data/README.md +32 -14
  5. data/ext/isotree/ext.cpp +144 -31
  6. data/ext/isotree/extconf.rb +7 -7
  7. data/lib/isotree/isolation_forest.rb +110 -30
  8. data/lib/isotree/version.rb +1 -1
  9. data/vendor/isotree/LICENSE +1 -1
  10. data/vendor/isotree/README.md +165 -27
  11. data/vendor/isotree/include/isotree.hpp +2116 -0
  12. data/vendor/isotree/include/isotree_oop.hpp +394 -0
  13. data/vendor/isotree/inst/COPYRIGHTS +132 -0
  14. data/vendor/isotree/src/RcppExports.cpp +594 -57
  15. data/vendor/isotree/src/Rwrapper.cpp +2452 -304
  16. data/vendor/isotree/src/c_interface.cpp +958 -0
  17. data/vendor/isotree/src/crit.hpp +4236 -0
  18. data/vendor/isotree/src/digamma.hpp +184 -0
  19. data/vendor/isotree/src/dist.hpp +1886 -0
  20. data/vendor/isotree/src/exp_depth_table.hpp +134 -0
  21. data/vendor/isotree/src/extended.hpp +1444 -0
  22. data/vendor/isotree/src/external_facing_generic.hpp +399 -0
  23. data/vendor/isotree/src/fit_model.hpp +2401 -0
  24. data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
  25. data/vendor/isotree/src/helpers_iforest.hpp +814 -0
  26. data/vendor/isotree/src/{impute.cpp → impute.hpp} +382 -123
  27. data/vendor/isotree/src/indexer.cpp +515 -0
  28. data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
  29. data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
  30. data/vendor/isotree/src/isoforest.hpp +1659 -0
  31. data/vendor/isotree/src/isotree.hpp +1815 -394
  32. data/vendor/isotree/src/isotree_exportable.hpp +99 -0
  33. data/vendor/isotree/src/merge_models.cpp +159 -16
  34. data/vendor/isotree/src/mult.hpp +1321 -0
  35. data/vendor/isotree/src/oop_interface.cpp +844 -0
  36. data/vendor/isotree/src/oop_interface.hpp +278 -0
  37. data/vendor/isotree/src/other_helpers.hpp +219 -0
  38. data/vendor/isotree/src/predict.hpp +1932 -0
  39. data/vendor/isotree/src/python_helpers.hpp +114 -0
  40. data/vendor/isotree/src/ref_indexer.hpp +154 -0
  41. data/vendor/isotree/src/robinmap/LICENSE +21 -0
  42. data/vendor/isotree/src/robinmap/README.md +483 -0
  43. data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
  44. data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1639 -0
  45. data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
  46. data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
  47. data/vendor/isotree/src/serialize.cpp +4316 -139
  48. data/vendor/isotree/src/sql.cpp +143 -61
  49. data/vendor/isotree/src/subset_models.cpp +174 -0
  50. data/vendor/isotree/src/utils.hpp +3786 -0
  51. data/vendor/isotree/src/xoshiro.hpp +463 -0
  52. data/vendor/isotree/src/ziggurat.hpp +405 -0
  53. metadata +40 -105
  54. data/vendor/cereal/LICENSE +0 -24
  55. data/vendor/cereal/README.md +0 -85
  56. data/vendor/cereal/include/cereal/access.hpp +0 -351
  57. data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
  58. data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
  59. data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
  60. data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
  61. data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
  62. data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
  63. data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
  64. data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
  65. data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
  66. data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
  67. data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
  68. data/vendor/cereal/include/cereal/details/util.hpp +0 -84
  69. data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
  70. data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
  71. data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
  72. data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
  73. data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
  74. data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
  75. data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
  76. data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
  77. data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
  78. data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
  79. data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
  80. data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
  81. data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
  82. data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
  83. data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
  84. data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
  85. data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
  86. data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
  87. data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
  88. data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
  89. data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
  90. data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
  91. data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
  92. data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
  93. data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
  94. data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
  95. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
  96. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
  97. data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
  98. data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
  99. data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
  100. data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
  101. data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
  102. data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
  103. data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
  104. data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
  105. data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
  106. data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
  107. data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
  108. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
  109. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
  110. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
  111. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
  112. data/vendor/cereal/include/cereal/macros.hpp +0 -154
  113. data/vendor/cereal/include/cereal/specialize.hpp +0 -139
  114. data/vendor/cereal/include/cereal/types/array.hpp +0 -79
  115. data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
  116. data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
  117. data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
  118. data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
  119. data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
  120. data/vendor/cereal/include/cereal/types/common.hpp +0 -129
  121. data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
  122. data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
  123. data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
  124. data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
  125. data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
  126. data/vendor/cereal/include/cereal/types/list.hpp +0 -62
  127. data/vendor/cereal/include/cereal/types/map.hpp +0 -36
  128. data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
  129. data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
  130. data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
  131. data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
  132. data/vendor/cereal/include/cereal/types/set.hpp +0 -103
  133. data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
  134. data/vendor/cereal/include/cereal/types/string.hpp +0 -61
  135. data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
  136. data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
  137. data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
  138. data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
  139. data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
  140. data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
  141. data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
  142. data/vendor/cereal/include/cereal/version.hpp +0 -52
  143. data/vendor/isotree/src/Makevars +0 -4
  144. data/vendor/isotree/src/crit.cpp +0 -912
  145. data/vendor/isotree/src/dist.cpp +0 -749
  146. data/vendor/isotree/src/extended.cpp +0 -790
  147. data/vendor/isotree/src/fit_model.cpp +0 -1090
  148. data/vendor/isotree/src/helpers_iforest.cpp +0 -324
  149. data/vendor/isotree/src/isoforest.cpp +0 -771
  150. data/vendor/isotree/src/mult.cpp +0 -607
  151. data/vendor/isotree/src/predict.cpp +0 -853
  152. data/vendor/isotree/src/utils.cpp +0 -1566
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1876c50ef4d9bbd7fc7898c222b9f10b8d287a38cf9d37af53e9841f63494299
4
- data.tar.gz: ea7b60ae9683498df1910fb3f4774ce8c94daf9fe2a6e23ffd87a9d488bcc5ce
3
+ metadata.gz: b07b4c1127d7fbdf12e03baebfd671d3dd06e163778654628942cdc16f18b21d
4
+ data.tar.gz: 1b3a2b09da4e4e4b8dd9d69c83e101926e1322b65932f3f9fd9f96914b5059f5
5
5
  SHA512:
6
- metadata.gz: 3d0f6d95cac8b7dd457512c0ce41c3ae286cf8f344ccc8a47dee1c00104657b3d2d9c97be1c1211fcb253065ba7fcf3d3b955a89dd9bac8b2b6e5e35516c7b7d
7
- data.tar.gz: 476d7c8ffe5c252ba94ecc7a68e59fc3afbbb2a732fd9e3127e032e543a74957fcdef6173b6225dfee7370dbb86625b61b02d16a68cc7257be8f7cbdae87a581
6
+ metadata.gz: b722fa013ee98be7dbf129191aededa2f347f9b1220423e740b7caccf0e0eab704f46bc8426edd60aaca68649ba8adbcd01a7d485c18eaca1106e795c4f00851
7
+ data.tar.gz: 56ae29ccb4b1982edab84bd419aa142eddddf3e7850941664f548583e5985a727f09763f9a6e332218b8b7a11ea0fdd73e70a779ed8a30a9ee20aecef287d8dc
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## 0.3.1 (2023-12-19)
2
+
3
+ - Updated IsoTree to 0.5.25
4
+
5
+ ## 0.3.0 (2022-06-13)
6
+
7
+ - Updated IsoTree to 0.5.16
8
+ - Updated serialization format (exported models must be recreated)
9
+ - Dropped support for Ruby < 2.7
10
+ - Dropped support for Windows
11
+
1
12
  ## 0.2.2 (2022-06-12)
2
13
 
3
14
  - Fixed segfault when data is smaller than sample size
@@ -13,7 +24,7 @@
13
24
 
14
25
  ## 0.1.5 (2021-03-14)
15
26
 
16
- - Updated Isotree to 0.1.25
27
+ - Updated IsoTree to 0.1.25
17
28
  - Added support for exporting and importing models
18
29
 
19
30
  ## 0.1.4 (2020-08-22)
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  BSD 2-Clause License
2
2
 
3
- Copyright (c) 2020, David Cortes
4
- Copyright (c) 2020-2021, Andrew Kane
3
+ Copyright (c) 2019-2023, David Cortes
4
+ Copyright (c) 2020-2023, Andrew Kane
5
5
  All rights reserved.
6
6
 
7
7
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -16,6 +16,8 @@ Add this line to your application’s Gemfile:
16
16
  gem "isotree"
17
17
  ```
18
18
 
19
+ Windows is not supported at the moment
20
+
19
21
  ## Getting Started
20
22
 
21
23
  Prep your data
@@ -24,7 +26,8 @@ Prep your data
24
26
  data = [
25
27
  {department: "Books", sale: false, price: 2.50},
26
28
  {department: "Books", sale: true, price: 3.00},
27
- {department: "Movies", sale: false, price: 5.00}
29
+ {department: "Movies", sale: false, price: 5.00},
30
+ # ...
28
31
  ]
29
32
  ```
30
33
 
@@ -61,28 +64,38 @@ Pass parameters - default values below
61
64
 
62
65
  ```ruby
63
66
  IsoTree::IsolationForest.new(
64
- sample_size: nil,
67
+ sample_size: "auto",
65
68
  ntrees: 500,
66
69
  ndim: 3,
67
- ntry: 3,
68
- prob_pick_avg_gain: 0,
69
- prob_pick_pooled_gain: 0,
70
- prob_split_avg_gain: 0,
71
- prob_split_pooled_gain: 0,
72
- min_gain: 0,
73
- missing_action: "impute",
74
- new_categ_action: "smallest",
75
- categ_split_type: "subset",
70
+ ntry: 1,
71
+ max_depth: "auto",
72
+ ncols_per_tree: nil,
73
+ prob_pick_pooled_gain: 0.0,
74
+ prob_pick_avg_gain: 0.0,
75
+ prob_pick_full_gain: 0.0,
76
+ prob_pick_dens: 0.0,
77
+ prob_pick_col_by_range: 0.0,
78
+ prob_pick_col_by_var: 0.0,
79
+ prob_pick_col_by_kurt: 0.0,
80
+ min_gain: 0.0,
81
+ missing_action: "auto",
82
+ new_categ_action: "auto",
83
+ categ_split_type: "auto",
76
84
  all_perm: false,
77
85
  coef_by_prop: false,
78
86
  sample_with_replacement: false,
79
- penalize_range: true,
87
+ penalize_range: false,
88
+ standardize_data: true,
89
+ scoring_metric: "depth",
90
+ fast_bratio: true,
80
91
  weigh_by_kurtosis: false,
81
- coefs: "normal",
92
+ coefs: "uniform",
93
+ assume_full_distr: true,
82
94
  min_imp_obs: 3,
83
95
  depth_imp: "higher",
84
96
  weigh_imp_rows: "inverse",
85
97
  random_seed: 1,
98
+ use_long_double: false,
86
99
  nthreads: -1
87
100
  )
88
101
  ```
@@ -134,7 +147,6 @@ Check out [Trove](https://github.com/ankane/trove) for deploying models.
134
147
 
135
148
  ```sh
136
149
  trove push model.bin
137
- trove push model.bin.metadata
138
150
  ```
139
151
 
140
152
  ## Reference
@@ -145,6 +157,12 @@ Get the average isolation depth
145
157
  model.predict(data, output: "avg_depth")
146
158
  ```
147
159
 
160
+ ## Upgrading
161
+
162
+ ### 0.3.0
163
+
164
+ This version uses IsoTree’s new serialization format. Exported models must be recreated.
165
+
148
166
  ## History
149
167
 
150
168
  View the [changelog](https://github.com/ankane/isotree-ruby/blob/master/CHANGELOG.md)
data/ext/isotree/ext.cpp CHANGED
@@ -1,3 +1,8 @@
1
+ // stdlib
2
+ #include <cmath>
3
+ #include <fstream>
4
+ #include <iostream>
5
+
1
6
  // isotree
2
7
  #include <isotree.hpp>
3
8
 
@@ -22,7 +27,7 @@ namespace Rice::detail
22
27
  NewCategAction convert(VALUE x)
23
28
  {
24
29
  auto value = Object(x).to_s().str();
25
- if (value == "weighted") return Weighted;
30
+ if (value == "weighted" || value == "impute") return Weighted;
26
31
  if (value == "smallest") return Smallest;
27
32
  if (value == "random") return Random;
28
33
  throw std::runtime_error("Unknown new categ action: " + value);
@@ -96,6 +101,24 @@ namespace Rice::detail
96
101
  throw std::runtime_error("Unknown weight imp rows: " + value);
97
102
  }
98
103
  };
104
+
105
+ template<>
106
+ class From_Ruby<ScoringMetric>
107
+ {
108
+ public:
109
+ ScoringMetric convert(VALUE x)
110
+ {
111
+ auto value = Object(x).to_s().str();
112
+ if (value == "depth") return Depth;
113
+ if (value == "adj_depth") return AdjDepth;
114
+ if (value == "density") return Density;
115
+ if (value == "adj_density") return AdjDensity;
116
+ if (value == "boxed_density") return BoxedDensity;
117
+ if (value == "boxed_density2") return BoxedDensity2;
118
+ if (value == "boxed_ratio") return BoxedRatio;
119
+ throw std::runtime_error("Unknown scoring metric: " + value);
120
+ }
121
+ };
99
122
  }
100
123
 
101
124
  extern "C"
@@ -118,20 +141,20 @@ void Init_ext()
118
141
  size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
119
142
  size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
120
143
 
121
- double *restrict numeric_data = NULL;
144
+ real_t* numeric_data = NULL;
122
145
  if (ncols_numeric > 0) {
123
146
  numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
124
147
  }
125
148
 
126
- int *restrict categorical_data = NULL;
127
- int *restrict ncat = NULL;
149
+ int* categorical_data = NULL;
150
+ int* ncat = NULL;
128
151
  if (ncols_categ > 0) {
129
152
  categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
130
153
  ncat = (int*) options.get<String, Symbol>("ncat").c_str();
131
154
  }
132
155
 
133
156
  // not used (sparse matrices)
134
- double* Xc = NULL;
157
+ real_t* Xc = NULL;
135
158
  sparse_ix* Xc_ind = NULL;
136
159
  sparse_ix* Xc_indptr = NULL;
137
160
 
@@ -142,9 +165,7 @@ void Init_ext()
142
165
  size_t ntrees = options.get<size_t, Symbol>("ntrees");
143
166
  size_t ntry = options.get<size_t, Symbol>("ntry");
144
167
  double prob_pick_by_gain_avg = options.get<double, Symbol>("prob_pick_avg_gain");
145
- double prob_split_by_gain_avg = options.get<double, Symbol>("prob_split_avg_gain");
146
168
  double prob_pick_by_gain_pl = options.get<double, Symbol>("prob_pick_pooled_gain");
147
- double prob_split_by_gain_pl = options.get<double, Symbol>("prob_split_pooled_gain");
148
169
  double min_gain = options.get<double, Symbol>("min_gain");
149
170
  MissingAction missing_action = options.get<MissingAction, Symbol>("missing_action");
150
171
  CategSplit cat_split_type = options.get<CategSplit, Symbol>("categ_split_type");
@@ -159,21 +180,31 @@ void Init_ext()
159
180
  UseDepthImp depth_imp = options.get<UseDepthImp, Symbol>("depth_imp");
160
181
  WeighImpRows weigh_imp_rows = options.get<WeighImpRows, Symbol>("weigh_imp_rows");
161
182
  uint64_t random_seed = options.get<uint64_t, Symbol>("random_seed");
183
+ bool use_long_double = options.get<bool, Symbol>("use_long_double");
162
184
  int nthreads = options.get<int, Symbol>("nthreads");
163
185
 
164
186
  // TODO options
165
187
  double* sample_weights = NULL;
166
- bool weight_as_sample = false;
167
- size_t max_depth = 0;
168
- bool limit_depth = true;
188
+ bool weight_as_sample = options.get<bool, Symbol>("weights_as_sample_prob");
189
+ size_t max_depth = options.get<size_t, Symbol>("max_depth");
190
+ bool limit_depth = options.get<bool, Symbol>("limit_depth");
169
191
  bool standardize_dist = false;
170
192
  double* tmat = NULL;
171
193
  double* output_depths = NULL;
172
194
  bool standardize_depth = false;
173
- double* col_weights = NULL;
174
- Imputer *imputer = NULL;
195
+ real_t* col_weights = NULL;
196
+ Imputer* imputer = NULL;
175
197
  bool impute_at_fit = false;
176
- bool handle_interrupt = false;
198
+
199
+ int ncols_per_tree = options.get<int, Symbol>("ncols_per_tree");
200
+ bool standardize_data = options.get<bool, Symbol>("standardize_data");
201
+ ScoringMetric scoring_metric = options.get<ScoringMetric, Symbol>("scoring_metric");
202
+ bool fast_bratio = options.get<bool, Symbol>("fast_bratio");
203
+ double prob_pick_by_full_gain = options.get<double, Symbol>("prob_pick_full_gain");
204
+ double prob_pick_by_dens = options.get<double, Symbol>("prob_pick_dens");
205
+ double prob_pick_col_by_range = options.get<double, Symbol>("prob_pick_col_by_range");
206
+ double prob_pick_col_by_var = options.get<double, Symbol>("prob_pick_col_by_var");
207
+ double prob_pick_col_by_kurt = options.get<double, Symbol>("prob_pick_col_by_kurt");
177
208
 
178
209
  fit_iforest(
179
210
  NULL,
@@ -197,18 +228,25 @@ void Init_ext()
197
228
  sample_size,
198
229
  ntrees,
199
230
  max_depth,
231
+ ncols_per_tree,
200
232
  limit_depth,
201
233
  penalize_range,
234
+ standardize_data,
235
+ scoring_metric,
236
+ fast_bratio,
202
237
  standardize_dist,
203
238
  tmat,
204
239
  output_depths,
205
240
  standardize_depth,
206
241
  col_weights,
207
242
  weigh_by_kurt,
208
- prob_pick_by_gain_avg,
209
- prob_split_by_gain_avg,
210
243
  prob_pick_by_gain_pl,
211
- prob_split_by_gain_pl,
244
+ prob_pick_by_gain_avg,
245
+ prob_pick_by_full_gain,
246
+ prob_pick_by_dens,
247
+ prob_pick_col_by_range,
248
+ prob_pick_col_by_var,
249
+ prob_pick_col_by_kurt,
212
250
  min_gain,
213
251
  missing_action,
214
252
  cat_split_type,
@@ -220,7 +258,7 @@ void Init_ext()
220
258
  weigh_imp_rows,
221
259
  impute_at_fit,
222
260
  random_seed,
223
- handle_interrupt,
261
+ use_long_double,
224
262
  nthreads
225
263
  );
226
264
 
@@ -234,21 +272,21 @@ void Init_ext()
234
272
  size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
235
273
  size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
236
274
 
237
- double *restrict numeric_data = NULL;
275
+ real_t* numeric_data = NULL;
238
276
  if (ncols_numeric > 0) {
239
277
  numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
240
278
  }
241
279
 
242
- int *restrict categorical_data = NULL;
280
+ int* categorical_data = NULL;
243
281
  if (ncols_categ > 0) {
244
282
  categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
245
283
  }
246
284
 
247
285
  // not used (sparse matrices)
248
- double* Xc = NULL;
286
+ real_t* Xc = NULL;
249
287
  sparse_ix* Xc_ind = NULL;
250
288
  sparse_ix* Xc_indptr = NULL;
251
- double* Xr = NULL;
289
+ real_t* Xr = NULL;
252
290
  sparse_ix* Xr_ind = NULL;
253
291
  sparse_ix* Xr_indptr = NULL;
254
292
 
@@ -257,10 +295,17 @@ void Init_ext()
257
295
  bool standardize = options.get<bool, Symbol>("standardize");
258
296
  std::vector<double> outlier_scores(nrows);
259
297
  sparse_ix* tree_num = NULL;
298
+ bool is_col_major = true;
299
+ size_t ld_numeric = 0;
300
+ size_t ld_categ = 0;
301
+ double* per_tree_depths = NULL;
260
302
 
261
303
  predict_iforest(
262
304
  numeric_data,
263
305
  categorical_data,
306
+ is_col_major,
307
+ ld_numeric,
308
+ ld_categ,
264
309
  Xc,
265
310
  Xc_ind,
266
311
  Xc_indptr,
@@ -273,7 +318,9 @@ void Init_ext()
273
318
  NULL,
274
319
  &iso,
275
320
  outlier_scores.data(),
276
- tree_num
321
+ tree_num,
322
+ per_tree_depths,
323
+ NULL
277
324
  );
278
325
 
279
326
  Array ret;
@@ -283,27 +330,93 @@ void Init_ext()
283
330
  return ret;
284
331
  })
285
332
  .define_singleton_function(
286
- "serialize_ext_isoforest",
287
- [](ExtIsoForest& iso, String path) {
333
+ "serialize_combined",
334
+ [](ExtIsoForest& iso, String path, String metadata) {
288
335
  #ifdef _MSC_VER
289
336
  // TODO convert to wchar_t
290
337
  throw std::runtime_error("Not supported on Windows yet");
291
338
  #else
292
- serialize_ext_isoforest(iso, path.c_str());
339
+ std::ofstream file;
340
+ file.open(path.c_str());
341
+ serialize_combined(
342
+ NULL,
343
+ &iso,
344
+ NULL,
345
+ NULL,
346
+ metadata.c_str(),
347
+ // returns bytesize (RSTRING_LEN)
348
+ metadata.length(),
349
+ file
350
+ );
351
+ file.close();
293
352
  #endif
294
353
  })
295
354
  .define_singleton_function(
296
- "deserialize_ext_isoforest",
355
+ "deserialize_combined",
297
356
  [](String path) {
298
- ExtIsoForest iso;
299
-
300
357
  #ifdef _MSC_VER
301
358
  // TODO convert to wchar_t
302
359
  throw std::runtime_error("Not supported on Windows yet");
303
360
  #else
304
- deserialize_ext_isoforest(iso, path.c_str());
305
- #endif
361
+ Array ret;
306
362
 
307
- return iso;
363
+ std::ifstream file;
364
+ file.open(path.c_str(), std::ios_base::in | std::ios_base::binary);
365
+ if (!file) {
366
+ throw std::runtime_error("Cannot open file");
367
+ }
368
+
369
+ bool is_isotree_model = false;
370
+ bool is_compatible = false;
371
+ bool has_combined_objects = false;
372
+ bool has_IsoForest = false;
373
+ bool has_ExtIsoForest = false;
374
+ bool has_Imputer = false;
375
+ bool has_Indexer = false;
376
+ bool has_metadata = false;
377
+ size_t size_metadata = 0;
378
+
379
+ inspect_serialized_object(
380
+ file,
381
+ is_isotree_model,
382
+ is_compatible,
383
+ has_combined_objects,
384
+ has_IsoForest,
385
+ has_ExtIsoForest,
386
+ has_Imputer,
387
+ has_Indexer,
388
+ has_metadata,
389
+ size_metadata
390
+ );
391
+
392
+ if (!is_isotree_model || !has_combined_objects) {
393
+ throw std::runtime_error("Input file is not a serialized isotree model");
394
+ }
395
+ if (!is_compatible) {
396
+ throw std::runtime_error("Model file format is incompatible");
397
+ }
398
+ if (size_metadata == 0) {
399
+ throw std::runtime_error("Input file does not contain metadata");
400
+ }
401
+
402
+ IsoForest model = IsoForest();
403
+ ExtIsoForest model_ext = ExtIsoForest();
404
+ Imputer imputer = Imputer();
405
+ TreesIndexer indexer = TreesIndexer();
406
+ char *optional_metadata = (char*) calloc(size_metadata, sizeof(char));
407
+ if (optional_metadata == NULL) {
408
+ throw std::runtime_error("Cannot allocate memory");
409
+ }
410
+
411
+ deserialize_combined(file, &model, &model_ext, &imputer, &indexer, optional_metadata);
412
+ file.close();
413
+
414
+ ret.push(Object(Rice::detail::To_Ruby<ExtIsoForest>().convert(model_ext)));
415
+ ret.push(String(std::string(optional_metadata, size_metadata)));
416
+
417
+ free(optional_metadata);
418
+
419
+ return ret;
420
+ #endif
308
421
  });
309
422
  }
@@ -1,6 +1,6 @@
1
1
  require "mkmf-rice"
2
2
 
3
- $CXXFLAGS += " -std=c++17 $(optflags) -D_USE_MERSENNE_TWISTER -D_ENABLE_CEREAL"
3
+ $CXXFLAGS += " -std=c++17 $(optflags) -D_USE_XOSHIRO -DSUPPORTS_RESTRICT=1 -D_USE_ROBIN_MAP -DDONT_THROW_ON_INTERRUPT"
4
4
 
5
5
  apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
6
6
 
@@ -11,12 +11,12 @@ if have_library("omp") || have_library("gomp")
11
11
  end
12
12
 
13
13
  ext = File.expand_path(".", __dir__)
14
- isotree = File.expand_path("../../vendor/isotree/src", __dir__)
15
- cereal = File.expand_path("../../vendor/cereal/include", __dir__)
14
+ isotree_src = File.expand_path("../../vendor/isotree/src", __dir__)
15
+ isotree_inc = File.expand_path("../../vendor/isotree/include", __dir__)
16
16
 
17
- exclude = %w(Rwrapper.cpp RcppExports.cpp)
18
- $srcs = Dir["{#{ext},#{isotree}}/*.{cc,cpp}"].reject { |f| exclude.include?(File.basename(f)) }
19
- $INCFLAGS << " -I#{isotree} -I#{cereal}"
20
- $VPATH << isotree
17
+ exclude = %w(c_interface.cpp Rwrapper.cpp RcppExports.cpp)
18
+ $srcs = Dir["{#{ext},#{isotree_src}}/*.{cc,cpp}"].reject { |f| exclude.include?(File.basename(f)) }
19
+ $INCFLAGS << " -I#{isotree_inc}"
20
+ $VPATH << isotree_src
21
21
 
22
22
  create_makefile("isotree/ext")