isotree 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (119) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/LICENSE.txt +2 -1
  4. data/README.md +57 -6
  5. data/ext/isotree/ext.cpp +170 -39
  6. data/ext/isotree/extconf.rb +3 -3
  7. data/lib/isotree.rb +2 -0
  8. data/lib/isotree/dataset.rb +73 -0
  9. data/lib/isotree/isolation_forest.rb +182 -29
  10. data/lib/isotree/version.rb +1 -1
  11. data/vendor/cereal/LICENSE +24 -0
  12. data/vendor/cereal/README.md +85 -0
  13. data/vendor/cereal/include/cereal/access.hpp +351 -0
  14. data/vendor/cereal/include/cereal/archives/adapters.hpp +163 -0
  15. data/vendor/cereal/include/cereal/archives/binary.hpp +169 -0
  16. data/vendor/cereal/include/cereal/archives/json.hpp +1019 -0
  17. data/vendor/cereal/include/cereal/archives/portable_binary.hpp +334 -0
  18. data/vendor/cereal/include/cereal/archives/xml.hpp +956 -0
  19. data/vendor/cereal/include/cereal/cereal.hpp +1089 -0
  20. data/vendor/cereal/include/cereal/details/helpers.hpp +422 -0
  21. data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +796 -0
  22. data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +65 -0
  23. data/vendor/cereal/include/cereal/details/static_object.hpp +127 -0
  24. data/vendor/cereal/include/cereal/details/traits.hpp +1411 -0
  25. data/vendor/cereal/include/cereal/details/util.hpp +84 -0
  26. data/vendor/cereal/include/cereal/external/base64.hpp +134 -0
  27. data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +284 -0
  28. data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +78 -0
  29. data/vendor/cereal/include/cereal/external/rapidjson/document.h +2652 -0
  30. data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +299 -0
  31. data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +716 -0
  32. data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +74 -0
  33. data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +161 -0
  34. data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +99 -0
  35. data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +104 -0
  36. data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +151 -0
  37. data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +290 -0
  38. data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +271 -0
  39. data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +245 -0
  40. data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +78 -0
  41. data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +308 -0
  42. data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +186 -0
  43. data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +55 -0
  44. data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +740 -0
  45. data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +232 -0
  46. data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +69 -0
  47. data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +290 -0
  48. data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +46 -0
  49. data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +128 -0
  50. data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +70 -0
  51. data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +71 -0
  52. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +316 -0
  53. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +300 -0
  54. data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +81 -0
  55. data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +1414 -0
  56. data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +277 -0
  57. data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +656 -0
  58. data/vendor/cereal/include/cereal/external/rapidjson/reader.h +2230 -0
  59. data/vendor/cereal/include/cereal/external/rapidjson/schema.h +2497 -0
  60. data/vendor/cereal/include/cereal/external/rapidjson/stream.h +223 -0
  61. data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +121 -0
  62. data/vendor/cereal/include/cereal/external/rapidjson/writer.h +709 -0
  63. data/vendor/cereal/include/cereal/external/rapidxml/license.txt +52 -0
  64. data/vendor/cereal/include/cereal/external/rapidxml/manual.html +406 -0
  65. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +2624 -0
  66. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +175 -0
  67. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +428 -0
  68. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +123 -0
  69. data/vendor/cereal/include/cereal/macros.hpp +154 -0
  70. data/vendor/cereal/include/cereal/specialize.hpp +139 -0
  71. data/vendor/cereal/include/cereal/types/array.hpp +79 -0
  72. data/vendor/cereal/include/cereal/types/atomic.hpp +55 -0
  73. data/vendor/cereal/include/cereal/types/base_class.hpp +203 -0
  74. data/vendor/cereal/include/cereal/types/bitset.hpp +176 -0
  75. data/vendor/cereal/include/cereal/types/boost_variant.hpp +164 -0
  76. data/vendor/cereal/include/cereal/types/chrono.hpp +72 -0
  77. data/vendor/cereal/include/cereal/types/common.hpp +129 -0
  78. data/vendor/cereal/include/cereal/types/complex.hpp +56 -0
  79. data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +73 -0
  80. data/vendor/cereal/include/cereal/types/deque.hpp +62 -0
  81. data/vendor/cereal/include/cereal/types/forward_list.hpp +68 -0
  82. data/vendor/cereal/include/cereal/types/functional.hpp +43 -0
  83. data/vendor/cereal/include/cereal/types/list.hpp +62 -0
  84. data/vendor/cereal/include/cereal/types/map.hpp +36 -0
  85. data/vendor/cereal/include/cereal/types/memory.hpp +425 -0
  86. data/vendor/cereal/include/cereal/types/optional.hpp +66 -0
  87. data/vendor/cereal/include/cereal/types/polymorphic.hpp +483 -0
  88. data/vendor/cereal/include/cereal/types/queue.hpp +132 -0
  89. data/vendor/cereal/include/cereal/types/set.hpp +103 -0
  90. data/vendor/cereal/include/cereal/types/stack.hpp +76 -0
  91. data/vendor/cereal/include/cereal/types/string.hpp +61 -0
  92. data/vendor/cereal/include/cereal/types/tuple.hpp +123 -0
  93. data/vendor/cereal/include/cereal/types/unordered_map.hpp +36 -0
  94. data/vendor/cereal/include/cereal/types/unordered_set.hpp +99 -0
  95. data/vendor/cereal/include/cereal/types/utility.hpp +47 -0
  96. data/vendor/cereal/include/cereal/types/valarray.hpp +89 -0
  97. data/vendor/cereal/include/cereal/types/variant.hpp +109 -0
  98. data/vendor/cereal/include/cereal/types/vector.hpp +112 -0
  99. data/vendor/cereal/include/cereal/version.hpp +52 -0
  100. data/vendor/isotree/LICENSE +1 -1
  101. data/vendor/isotree/README.md +7 -2
  102. data/vendor/isotree/src/RcppExports.cpp +44 -4
  103. data/vendor/isotree/src/Rwrapper.cpp +141 -51
  104. data/vendor/isotree/src/crit.cpp +1 -1
  105. data/vendor/isotree/src/dealloc.cpp +1 -1
  106. data/vendor/isotree/src/dist.cpp +6 -6
  107. data/vendor/isotree/src/extended.cpp +5 -5
  108. data/vendor/isotree/src/fit_model.cpp +27 -5
  109. data/vendor/isotree/src/helpers_iforest.cpp +26 -11
  110. data/vendor/isotree/src/impute.cpp +7 -7
  111. data/vendor/isotree/src/isoforest.cpp +7 -7
  112. data/vendor/isotree/src/isotree.hpp +27 -5
  113. data/vendor/isotree/src/merge_models.cpp +1 -1
  114. data/vendor/isotree/src/mult.cpp +1 -1
  115. data/vendor/isotree/src/predict.cpp +20 -16
  116. data/vendor/isotree/src/serialize.cpp +1 -1
  117. data/vendor/isotree/src/sql.cpp +545 -0
  118. data/vendor/isotree/src/utils.cpp +36 -44
  119. metadata +102 -81
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2f1bbb1a3daca8511c9988d7975213ac466a19284bdcab1eb9a1b38b3883eb47
4
- data.tar.gz: bafd2fadac39b4e5881b7d039f2a75514953b85edd79cc07e4cabd91eda79e76
3
+ metadata.gz: 95dc93ac1b84a5a37539b335da0457955ee8868997a34a9c249f7c54927f4b04
4
+ data.tar.gz: eff22a02afce64167248e967d384b0c9b2259f2f5248cfad5bd37acd8bc44e2a
5
5
  SHA512:
6
- metadata.gz: cf1c9582b2bf9355bb26151b62c95d00be849594bb06390f2a9b5015ed5119b175c76cd58b4429ee186ba14333f86c8e3a8b6f87af5a04f6550b2e2751c85112
7
- data.tar.gz: 82f204ffd028cfb71840b0918c6fb5e7a741a7a2076d4dfe0c15c3f2e536fbd765be883fedc1a5e4ca73828f7429d08a22f521bc6b2e9f094ff10de6cf2eb979
6
+ metadata.gz: 9f410b78af1ae72f4cd166511b6b676f3b71bc39ce92641c455879a3aa88172abb7d1c37ffc953d505fdd59db07d355c9ffba20c3577a1d9b2311c5959a4c87f
7
+ data.tar.gz: 4804ec4aa11997fb91bcd714a0569e0571970bf06d4ad9b491f0e450c33183c86d37a922577ec296765708e9599272863cfa7f775f7dca4d9d18346ed8a38d87
data/CHANGELOG.md CHANGED
@@ -1,3 +1,28 @@
1
+ ## 0.2.0 (2021-05-17)
2
+
3
+ - Updated to Rice 4
4
+ - Dropped support for Ruby < 2.6
5
+
6
+ ## 0.1.5 (2021-03-14)
7
+
8
+ - Updated Isotree to 0.1.25
9
+ - Added support for exporting and importing models
10
+
11
+ ## 0.1.4 (2020-08-22)
12
+
13
+ - Added `missing_action`, `new_categ_action`, `categ_split_type`, `coefs`, `depth_imp`, and `weigh_imp_rows` options
14
+ - Fixed signal handling
15
+
16
+ ## 0.1.3 (2020-08-13)
17
+
18
+ - Added support for categorical data
19
+ - Added support for Rover data frames
20
+ - Added `output` option to `predict` method
21
+
22
+ ## 0.1.2 (2020-08-11)
23
+
24
+ - Fixed outlier scores
25
+
1
26
  ## 0.1.1 (2020-08-10)
2
27
 
3
28
  - Fixed installation error when cereal not installed
data/LICENSE.txt CHANGED
@@ -1,6 +1,7 @@
1
1
  BSD 2-Clause License
2
2
 
3
- Copyright (c) 2020, Andrew Kane
3
+ Copyright (c) 2020, David Cortes
4
+ Copyright (c) 2020-2021, Andrew Kane
4
5
  All rights reserved.
5
6
 
6
7
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -4,7 +4,9 @@
4
4
 
5
5
  Learn how [Isolation Forest](https://www.youtube.com/watch?v=RyFQXQf4w4w) works
6
6
 
7
- [![Build Status](https://travis-ci.org/ankane/isotree.svg?branch=master)](https://travis-ci.org/ankane/isotree)
7
+ :deciduous_tree: Check out [OutlierTree](https://github.com/ankane/outliertree) for human-readable explanations of outliers
8
+
9
+ [![Build Status](https://github.com/ankane/isotree/workflows/build/badge.svg?branch=master)](https://github.com/ankane/isotree/actions)
8
10
 
9
11
  ## Installation
10
12
 
@@ -19,24 +21,40 @@ gem 'isotree'
19
21
  Prep your data
20
22
 
21
23
  ```ruby
22
- x = [[1, 2], [3, 4], [5, 6], [7, 8]]
24
+ data = [
25
+ {department: "Books", sale: false, price: 2.50},
26
+ {department: "Books", sale: true, price: 3.00},
27
+ {department: "Movies", sale: false, price: 5.00}
28
+ ]
23
29
  ```
24
30
 
25
31
  Train a model
26
32
 
27
33
  ```ruby
28
34
  model = IsoTree::IsolationForest.new
29
- model.fit(x)
35
+ model.fit(data)
30
36
  ```
31
37
 
32
38
  Get outlier scores
33
39
 
34
40
  ```ruby
35
- model.predict(x)
41
+ model.predict(data)
36
42
  ```
37
43
 
38
44
  Scores are between 0 and 1, with higher scores indicating outliers
39
45
 
46
+ Export the model
47
+
48
+ ```ruby
49
+ model.export_model("model.bin")
50
+ ```
51
+
52
+ Import a model
53
+
54
+ ```ruby
55
+ model = IsoTree::IsolationForest.import_model("model.bin")
56
+ ```
57
+
40
58
  ## Parameters
41
59
 
42
60
  Pass parameters - default values below
@@ -52,12 +70,18 @@ IsoTree::IsolationForest.new(
52
70
  prob_split_avg_gain: 0,
53
71
  prob_split_pooled_gain: 0,
54
72
  min_gain: 0,
73
+ missing_action: "impute",
74
+ new_categ_action: "smallest",
75
+ categ_split_type: "subset",
55
76
  all_perm: false,
56
77
  coef_by_prop: false,
57
78
  sample_with_replacement: false,
58
79
  penalize_range: true,
59
80
  weigh_by_kurtosis: false,
81
+ coefs: "normal",
60
82
  min_imp_obs: 3,
83
+ depth_imp: "higher",
84
+ weigh_imp_rows: "inverse",
61
85
  random_seed: 1,
62
86
  nthreads: -1
63
87
  )
@@ -67,10 +91,20 @@ See a [detailed explanation](https://isotree.readthedocs.io/en/latest/#isotree.I
67
91
 
68
92
  ## Data
69
93
 
70
- Data can be an array of arrays
94
+ Data can be an array of hashes
71
95
 
72
96
  ```ruby
73
- [[1, 2, 3], [4, 5, 6]]
97
+ [
98
+ {department: "Books", sale: false, price: 2.50},
99
+ {department: "Books", sale: true, price: 3.00},
100
+ {department: "Movies", sale: false, price: 5.00}
101
+ ]
102
+ ```
103
+
104
+ Or a Rover data frame
105
+
106
+ ```ruby
107
+ Rover.read_csv("data.csv")
74
108
  ```
75
109
 
76
110
  Or a Numo array
@@ -94,6 +128,23 @@ gem uninstall isotree --force
94
128
  bundle install
95
129
  ```
96
130
 
131
+ ## Deployment
132
+
133
+ Check out [Trove](https://github.com/ankane/trove) for deploying models.
134
+
135
+ ```sh
136
+ trove push model.bin
137
+ trove push model.bin.metadata
138
+ ```
139
+
140
+ ## Reference
141
+
142
+ Get the average isolation depth
143
+
144
+ ```ruby
145
+ model.predict(data, output: "avg_depth")
146
+ ```
147
+
97
148
  ## History
98
149
 
99
150
  View the [changelog](https://github.com/ankane/isotree/blob/master/CHANGELOG.md)
data/ext/isotree/ext.cpp CHANGED
@@ -2,20 +2,102 @@
2
2
  #include <isotree.hpp>
3
3
 
4
4
  // rice
5
- #include <rice/Array.hpp>
6
- #include <rice/Hash.hpp>
7
- #include <rice/Module.hpp>
8
- #include <rice/String.hpp>
9
- #include <rice/Symbol.hpp>
5
+ #include <rice/rice.hpp>
10
6
 
11
7
  using Rice::Array;
12
8
  using Rice::Hash;
13
9
  using Rice::Module;
10
+ using Rice::Object;
14
11
  using Rice::String;
15
12
  using Rice::Symbol;
16
13
  using Rice::define_class_under;
17
14
  using Rice::define_module;
18
15
 
16
+ namespace Rice::detail
17
+ {
18
+ template<>
19
+ class From_Ruby<NewCategAction>
20
+ {
21
+ public:
22
+ NewCategAction convert(VALUE x)
23
+ {
24
+ auto value = Object(x).to_s().str();
25
+ if (value == "weighted") return Weighted;
26
+ if (value == "smallest") return Smallest;
27
+ if (value == "random") return Random;
28
+ throw std::runtime_error("Unknown new categ action: " + value);
29
+ }
30
+ };
31
+
32
+ template<>
33
+ class From_Ruby<MissingAction>
34
+ {
35
+ public:
36
+ MissingAction convert(VALUE x)
37
+ {
38
+ auto value = Object(x).to_s().str();
39
+ if (value == "divide") return Divide;
40
+ if (value == "impute") return Impute;
41
+ if (value == "fail") return Fail;
42
+ throw std::runtime_error("Unknown missing action: " + value);
43
+ }
44
+ };
45
+
46
+ template<>
47
+ class From_Ruby<CategSplit>
48
+ {
49
+ public:
50
+ CategSplit convert(VALUE x)
51
+ {
52
+ auto value = Object(x).to_s().str();
53
+ if (value == "subset") return SubSet;
54
+ if (value == "single_categ") return SingleCateg;
55
+ throw std::runtime_error("Unknown categ split: " + value);
56
+ }
57
+ };
58
+
59
+ template<>
60
+ class From_Ruby<CoefType>
61
+ {
62
+ public:
63
+ CoefType convert(VALUE x)
64
+ {
65
+ auto value = Object(x).to_s().str();
66
+ if (value == "uniform") return Uniform;
67
+ if (value == "normal") return Normal;
68
+ throw std::runtime_error("Unknown coef type: " + value);
69
+ }
70
+ };
71
+
72
+ template<>
73
+ class From_Ruby<UseDepthImp>
74
+ {
75
+ public:
76
+ UseDepthImp convert(VALUE x)
77
+ {
78
+ auto value = Object(x).to_s().str();
79
+ if (value == "lower") return Lower;
80
+ if (value == "higher") return Higher;
81
+ if (value == "same") return Same;
82
+ throw std::runtime_error("Unknown depth imp: " + value);
83
+ }
84
+ };
85
+
86
+ template<>
87
+ class From_Ruby<WeighImpRows>
88
+ {
89
+ public:
90
+ WeighImpRows convert(VALUE x)
91
+ {
92
+ auto value = Object(x).to_s().str();
93
+ if (value == "inverse") return Inverse;
94
+ if (value == "prop") return Prop;
95
+ if (value == "flat") return Flat;
96
+ throw std::runtime_error("Unknown weight imp rows: " + value);
97
+ }
98
+ };
99
+ }
100
+
19
101
  extern "C"
20
102
  void Init_ext()
21
103
  {
@@ -25,44 +107,36 @@ void Init_ext()
25
107
  define_class_under<ExtIsoForest>(rb_mExt, "ExtIsoForest");
26
108
 
27
109
  rb_mExt
28
- .define_singleton_method(
110
+ .define_singleton_function(
29
111
  "fit_iforest",
30
- *[](Hash options) {
112
+ [](Hash options) {
31
113
  // model
32
114
  ExtIsoForest iso;
33
115
 
34
116
  // data
35
117
  size_t nrows = options.get<size_t, Symbol>("nrows");
36
- size_t ncols = options.get<size_t, Symbol>("ncols");
37
- double* numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
38
- size_t ncols_numeric = ncols;
39
- int* categ_data = NULL;
40
- size_t ncols_categ = 0;
41
- int* ncat = NULL;
118
+ size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
119
+ size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
120
+
121
+ double *restrict numeric_data = NULL;
122
+ if (ncols_numeric > 0) {
123
+ numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
124
+ }
125
+
126
+ int *restrict categorical_data = NULL;
127
+ int *restrict ncat = NULL;
128
+ if (ncols_categ > 0) {
129
+ categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
130
+ ncat = (int*) options.get<String, Symbol>("ncat").c_str();
131
+ }
132
+
133
+ // not used (sparse matrices)
42
134
  double* Xc = NULL;
43
135
  sparse_ix* Xc_ind = NULL;
44
136
  sparse_ix* Xc_indptr = NULL;
45
137
 
46
138
  // options
47
- CoefType coef_type = Normal;
48
- double* sample_weights = NULL;
49
- bool weight_as_sample = false;
50
- size_t max_depth = 0;
51
- bool limit_depth = true;
52
- bool standardize_dist = false;
53
- double* tmat = NULL;
54
- double* output_depths = NULL;
55
- bool standardize_depth = false;
56
- double* col_weights = NULL;
57
- MissingAction missing_action = Impute;
58
- CategSplit cat_split_type = SubSet;
59
- NewCategAction new_cat_action = Smallest;
60
- Imputer *imputer = NULL;
61
- UseDepthImp depth_imp = Higher;
62
- WeighImpRows weigh_imp_rows = Inverse;
63
- bool impute_at_fit = false;
64
-
65
- // Rice has limit of 14 arguments, so use hash for options
139
+ // Rice has limit of 14 arguments, so use hash
66
140
  size_t sample_size = options.get<size_t, Symbol>("sample_size");
67
141
  size_t ndim = options.get<size_t, Symbol>("ndim");
68
142
  size_t ntrees = options.get<size_t, Symbol>("ntrees");
@@ -72,21 +146,41 @@ void Init_ext()
72
146
  double prob_pick_by_gain_pl = options.get<double, Symbol>("prob_pick_pooled_gain");
73
147
  double prob_split_by_gain_pl = options.get<double, Symbol>("prob_split_pooled_gain");
74
148
  double min_gain = options.get<double, Symbol>("min_gain");
149
+ MissingAction missing_action = options.get<MissingAction, Symbol>("missing_action");
150
+ CategSplit cat_split_type = options.get<CategSplit, Symbol>("categ_split_type");
151
+ NewCategAction new_cat_action = options.get<NewCategAction, Symbol>("new_categ_action");
75
152
  bool all_perm = options.get<bool, Symbol>("all_perm");
76
153
  bool coef_by_prop = options.get<bool, Symbol>("coef_by_prop");
77
154
  bool with_replacement = options.get<bool, Symbol>("sample_with_replacement");
78
155
  bool penalize_range = options.get<bool, Symbol>("penalize_range");
79
156
  bool weigh_by_kurt = options.get<bool, Symbol>("weigh_by_kurtosis");
157
+ CoefType coef_type = options.get<CoefType, Symbol>("coefs");
80
158
  size_t min_imp_obs = options.get<size_t, Symbol>("min_imp_obs");
159
+ UseDepthImp depth_imp = options.get<UseDepthImp, Symbol>("depth_imp");
160
+ WeighImpRows weigh_imp_rows = options.get<WeighImpRows, Symbol>("weigh_imp_rows");
81
161
  uint64_t random_seed = options.get<uint64_t, Symbol>("random_seed");
82
162
  int nthreads = options.get<int, Symbol>("nthreads");
83
163
 
164
+ // TODO options
165
+ double* sample_weights = NULL;
166
+ bool weight_as_sample = false;
167
+ size_t max_depth = 0;
168
+ bool limit_depth = true;
169
+ bool standardize_dist = false;
170
+ double* tmat = NULL;
171
+ double* output_depths = NULL;
172
+ bool standardize_depth = false;
173
+ double* col_weights = NULL;
174
+ Imputer *imputer = NULL;
175
+ bool impute_at_fit = false;
176
+ bool handle_interrupt = false;
177
+
84
178
  fit_iforest(
85
179
  NULL,
86
180
  &iso,
87
181
  numeric_data,
88
182
  ncols_numeric,
89
- categ_data,
183
+ categorical_data,
90
184
  ncols_categ,
91
185
  ncat,
92
186
  Xc,
@@ -126,18 +220,31 @@ void Init_ext()
126
220
  weigh_imp_rows,
127
221
  impute_at_fit,
128
222
  random_seed,
223
+ handle_interrupt,
129
224
  nthreads
130
225
  );
131
226
 
132
227
  return iso;
133
228
  })
134
- .define_singleton_method(
229
+ .define_singleton_function(
135
230
  "predict_iforest",
136
- *[](ExtIsoForest& iso, Hash options) {
231
+ [](ExtIsoForest& iso, Hash options) {
137
232
  // data
138
233
  size_t nrows = options.get<size_t, Symbol>("nrows");
139
- double* numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
140
- int* categ_data = NULL;
234
+ size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
235
+ size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
236
+
237
+ double *restrict numeric_data = NULL;
238
+ if (ncols_numeric > 0) {
239
+ numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
240
+ }
241
+
242
+ int *restrict categorical_data = NULL;
243
+ if (ncols_categ > 0) {
244
+ categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
245
+ }
246
+
247
+ // not used (sparse matrices)
141
248
  double* Xc = NULL;
142
249
  sparse_ix* Xc_ind = NULL;
143
250
  sparse_ix* Xc_indptr = NULL;
@@ -147,13 +254,13 @@ void Init_ext()
147
254
 
148
255
  // options
149
256
  int nthreads = options.get<int, Symbol>("nthreads");
150
- bool standardize = true;
257
+ bool standardize = options.get<bool, Symbol>("standardize");
151
258
  std::vector<double> outlier_scores(nrows);
152
259
  sparse_ix* tree_num = NULL;
153
260
 
154
261
  predict_iforest(
155
262
  numeric_data,
156
- categ_data,
263
+ categorical_data,
157
264
  Xc,
158
265
  Xc_ind,
159
266
  Xc_indptr,
@@ -174,5 +281,29 @@ void Init_ext()
174
281
  ret.push(outlier_scores[i]);
175
282
  }
176
283
  return ret;
284
+ })
285
+ .define_singleton_function(
286
+ "serialize_ext_isoforest",
287
+ [](ExtIsoForest& iso, String path) {
288
+ #ifdef _MSC_VER
289
+ // TODO convert to wchar_t
290
+ throw std::runtime_error("Not supported on Windows yet");
291
+ #else
292
+ serialize_ext_isoforest(iso, path.c_str());
293
+ #endif
294
+ })
295
+ .define_singleton_function(
296
+ "deserialize_ext_isoforest",
297
+ [](String path) {
298
+ ExtIsoForest iso;
299
+
300
+ #ifdef _MSC_VER
301
+ // TODO convert to wchar_t
302
+ throw std::runtime_error("Not supported on Windows yet");
303
+ #else
304
+ deserialize_ext_isoforest(iso, path.c_str());
305
+ #endif
306
+
307
+ return iso;
177
308
  });
178
309
  }