isotree 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/LICENSE.txt +2 -1
  4. data/README.md +57 -6
  5. data/ext/isotree/ext.cpp +170 -39
  6. data/ext/isotree/extconf.rb +3 -3
  7. data/lib/isotree.rb +2 -0
  8. data/lib/isotree/dataset.rb +73 -0
  9. data/lib/isotree/isolation_forest.rb +182 -35
  10. data/lib/isotree/version.rb +1 -1
  11. data/vendor/cereal/LICENSE +24 -0
  12. data/vendor/cereal/README.md +85 -0
  13. data/vendor/cereal/include/cereal/access.hpp +351 -0
  14. data/vendor/cereal/include/cereal/archives/adapters.hpp +163 -0
  15. data/vendor/cereal/include/cereal/archives/binary.hpp +169 -0
  16. data/vendor/cereal/include/cereal/archives/json.hpp +1019 -0
  17. data/vendor/cereal/include/cereal/archives/portable_binary.hpp +334 -0
  18. data/vendor/cereal/include/cereal/archives/xml.hpp +956 -0
  19. data/vendor/cereal/include/cereal/cereal.hpp +1089 -0
  20. data/vendor/cereal/include/cereal/details/helpers.hpp +422 -0
  21. data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +796 -0
  22. data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +65 -0
  23. data/vendor/cereal/include/cereal/details/static_object.hpp +127 -0
  24. data/vendor/cereal/include/cereal/details/traits.hpp +1411 -0
  25. data/vendor/cereal/include/cereal/details/util.hpp +84 -0
  26. data/vendor/cereal/include/cereal/external/base64.hpp +134 -0
  27. data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +284 -0
  28. data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +78 -0
  29. data/vendor/cereal/include/cereal/external/rapidjson/document.h +2652 -0
  30. data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +299 -0
  31. data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +716 -0
  32. data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +74 -0
  33. data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +161 -0
  34. data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +99 -0
  35. data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +104 -0
  36. data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +151 -0
  37. data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +290 -0
  38. data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +271 -0
  39. data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +245 -0
  40. data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +78 -0
  41. data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +308 -0
  42. data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +186 -0
  43. data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +55 -0
  44. data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +740 -0
  45. data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +232 -0
  46. data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +69 -0
  47. data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +290 -0
  48. data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +46 -0
  49. data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +128 -0
  50. data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +70 -0
  51. data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +71 -0
  52. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +316 -0
  53. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +300 -0
  54. data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +81 -0
  55. data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +1414 -0
  56. data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +277 -0
  57. data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +656 -0
  58. data/vendor/cereal/include/cereal/external/rapidjson/reader.h +2230 -0
  59. data/vendor/cereal/include/cereal/external/rapidjson/schema.h +2497 -0
  60. data/vendor/cereal/include/cereal/external/rapidjson/stream.h +223 -0
  61. data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +121 -0
  62. data/vendor/cereal/include/cereal/external/rapidjson/writer.h +709 -0
  63. data/vendor/cereal/include/cereal/external/rapidxml/license.txt +52 -0
  64. data/vendor/cereal/include/cereal/external/rapidxml/manual.html +406 -0
  65. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +2624 -0
  66. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +175 -0
  67. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +428 -0
  68. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +123 -0
  69. data/vendor/cereal/include/cereal/macros.hpp +154 -0
  70. data/vendor/cereal/include/cereal/specialize.hpp +139 -0
  71. data/vendor/cereal/include/cereal/types/array.hpp +79 -0
  72. data/vendor/cereal/include/cereal/types/atomic.hpp +55 -0
  73. data/vendor/cereal/include/cereal/types/base_class.hpp +203 -0
  74. data/vendor/cereal/include/cereal/types/bitset.hpp +176 -0
  75. data/vendor/cereal/include/cereal/types/boost_variant.hpp +164 -0
  76. data/vendor/cereal/include/cereal/types/chrono.hpp +72 -0
  77. data/vendor/cereal/include/cereal/types/common.hpp +129 -0
  78. data/vendor/cereal/include/cereal/types/complex.hpp +56 -0
  79. data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +73 -0
  80. data/vendor/cereal/include/cereal/types/deque.hpp +62 -0
  81. data/vendor/cereal/include/cereal/types/forward_list.hpp +68 -0
  82. data/vendor/cereal/include/cereal/types/functional.hpp +43 -0
  83. data/vendor/cereal/include/cereal/types/list.hpp +62 -0
  84. data/vendor/cereal/include/cereal/types/map.hpp +36 -0
  85. data/vendor/cereal/include/cereal/types/memory.hpp +425 -0
  86. data/vendor/cereal/include/cereal/types/optional.hpp +66 -0
  87. data/vendor/cereal/include/cereal/types/polymorphic.hpp +483 -0
  88. data/vendor/cereal/include/cereal/types/queue.hpp +132 -0
  89. data/vendor/cereal/include/cereal/types/set.hpp +103 -0
  90. data/vendor/cereal/include/cereal/types/stack.hpp +76 -0
  91. data/vendor/cereal/include/cereal/types/string.hpp +61 -0
  92. data/vendor/cereal/include/cereal/types/tuple.hpp +123 -0
  93. data/vendor/cereal/include/cereal/types/unordered_map.hpp +36 -0
  94. data/vendor/cereal/include/cereal/types/unordered_set.hpp +99 -0
  95. data/vendor/cereal/include/cereal/types/utility.hpp +47 -0
  96. data/vendor/cereal/include/cereal/types/valarray.hpp +89 -0
  97. data/vendor/cereal/include/cereal/types/variant.hpp +109 -0
  98. data/vendor/cereal/include/cereal/types/vector.hpp +112 -0
  99. data/vendor/cereal/include/cereal/version.hpp +52 -0
  100. data/vendor/isotree/LICENSE +1 -1
  101. data/vendor/isotree/README.md +7 -2
  102. data/vendor/isotree/src/RcppExports.cpp +44 -4
  103. data/vendor/isotree/src/Rwrapper.cpp +141 -51
  104. data/vendor/isotree/src/crit.cpp +1 -1
  105. data/vendor/isotree/src/dealloc.cpp +1 -1
  106. data/vendor/isotree/src/dist.cpp +6 -6
  107. data/vendor/isotree/src/extended.cpp +5 -5
  108. data/vendor/isotree/src/fit_model.cpp +27 -5
  109. data/vendor/isotree/src/helpers_iforest.cpp +26 -11
  110. data/vendor/isotree/src/impute.cpp +7 -7
  111. data/vendor/isotree/src/isoforest.cpp +7 -7
  112. data/vendor/isotree/src/isotree.hpp +27 -5
  113. data/vendor/isotree/src/merge_models.cpp +1 -1
  114. data/vendor/isotree/src/mult.cpp +1 -1
  115. data/vendor/isotree/src/predict.cpp +20 -16
  116. data/vendor/isotree/src/serialize.cpp +1 -1
  117. data/vendor/isotree/src/sql.cpp +545 -0
  118. data/vendor/isotree/src/utils.cpp +36 -44
  119. metadata +102 -81
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b15de55d1a752d14cc97e2b5372308b2d4cb6a1e6fcfce0a05da6f769708b189
4
- data.tar.gz: af21414cea40a26b2e291230e5d48bf4f804e1c77837a3132921b896bc617961
3
+ metadata.gz: ffdc3a4283698bfc43c5563bd1c8cb4dd41881ec0a9920bfbe91f01fcbe0e822
4
+ data.tar.gz: 83cc4a38f6640fd5a37c2a6aabe0cba12e198bdbd7b010cabb34f8d36073a74b
5
5
  SHA512:
6
- metadata.gz: 8127b5402c9c9f03bd2bd475b01a5cc8fbd3900ac1517d401ff4647d634e1f1049c8de51086095b132f30217f3571f8aa9e84c5fd18a0d3ac420a84203da85b7
7
- data.tar.gz: 63b26ee19d8c49ce33d61891110db56597221a776830eb2aaad84c6d46038cb30822431a6f30b1051289f6becab0b652d968fbd4cf065c0925d50d5ef769c89a
6
+ metadata.gz: b6642570d6330fc2d72b210a040985dfa025d5d0deed622d04533fe9bf830c6cd3dda1ace94ace3d034c8302aa360e5ef6eeda576b48340db4d6c95016b8979c
7
+ data.tar.gz: df0716c317e01bd174157e5c1b09e4f4e566f07512ea3861324b8fa9659a416c6ef1319580d2ee8a7231c74f74b9a9e5bdc9770a5adf0d208b82a22db1a9b29d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,28 @@
1
+ ## 0.2.1 (2021-05-23)
2
+
3
+ - Improved performance
4
+
5
+ ## 0.2.0 (2021-05-17)
6
+
7
+ - Updated to Rice 4
8
+ - Dropped support for Ruby < 2.6
9
+
10
+ ## 0.1.5 (2021-03-14)
11
+
12
+ - Updated Isotree to 0.1.25
13
+ - Added support for exporting and importing models
14
+
15
+ ## 0.1.4 (2020-08-22)
16
+
17
+ - Added `missing_action`, `new_categ_action`, `categ_split_type`, `coefs`, `depth_imp`, and `weigh_imp_rows` options
18
+ - Fixed signal handling
19
+
20
+ ## 0.1.3 (2020-08-13)
21
+
22
+ - Added support for categorical data
23
+ - Added support for Rover data frames
24
+ - Added `output` option to `predict` method
25
+
1
26
  ## 0.1.2 (2020-08-11)
2
27
 
3
28
  - Fixed outlier scores
data/LICENSE.txt CHANGED
@@ -1,6 +1,7 @@
1
1
  BSD 2-Clause License
2
2
 
3
- Copyright (c) 2020, Andrew Kane
3
+ Copyright (c) 2020, David Cortes
4
+ Copyright (c) 2020-2021, Andrew Kane
4
5
  All rights reserved.
5
6
 
6
7
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -4,7 +4,9 @@
4
4
 
5
5
  Learn how [Isolation Forest](https://www.youtube.com/watch?v=RyFQXQf4w4w) works
6
6
 
7
- [![Build Status](https://travis-ci.org/ankane/isotree.svg?branch=master)](https://travis-ci.org/ankane/isotree)
7
+ :deciduous_tree: Check out [OutlierTree](https://github.com/ankane/outliertree) for human-readable explanations of outliers
8
+
9
+ [![Build Status](https://github.com/ankane/isotree/workflows/build/badge.svg?branch=master)](https://github.com/ankane/isotree/actions)
8
10
 
9
11
  ## Installation
10
12
 
@@ -19,24 +21,40 @@ gem 'isotree'
19
21
  Prep your data
20
22
 
21
23
  ```ruby
22
- x = [[1, 2], [3, 4], [5, 6], [7, 8]]
24
+ data = [
25
+ {department: "Books", sale: false, price: 2.50},
26
+ {department: "Books", sale: true, price: 3.00},
27
+ {department: "Movies", sale: false, price: 5.00}
28
+ ]
23
29
  ```
24
30
 
25
31
  Train a model
26
32
 
27
33
  ```ruby
28
34
  model = IsoTree::IsolationForest.new
29
- model.fit(x)
35
+ model.fit(data)
30
36
  ```
31
37
 
32
38
  Get outlier scores
33
39
 
34
40
  ```ruby
35
- model.predict(x)
41
+ model.predict(data)
36
42
  ```
37
43
 
38
44
  Scores are between 0 and 1, with higher scores indicating outliers
39
45
 
46
+ Export the model
47
+
48
+ ```ruby
49
+ model.export_model("model.bin")
50
+ ```
51
+
52
+ Import a model
53
+
54
+ ```ruby
55
+ model = IsoTree::IsolationForest.import_model("model.bin")
56
+ ```
57
+
40
58
  ## Parameters
41
59
 
42
60
  Pass parameters - default values below
@@ -52,12 +70,18 @@ IsoTree::IsolationForest.new(
52
70
  prob_split_avg_gain: 0,
53
71
  prob_split_pooled_gain: 0,
54
72
  min_gain: 0,
73
+ missing_action: "impute",
74
+ new_categ_action: "smallest",
75
+ categ_split_type: "subset",
55
76
  all_perm: false,
56
77
  coef_by_prop: false,
57
78
  sample_with_replacement: false,
58
79
  penalize_range: true,
59
80
  weigh_by_kurtosis: false,
81
+ coefs: "normal",
60
82
  min_imp_obs: 3,
83
+ depth_imp: "higher",
84
+ weigh_imp_rows: "inverse",
61
85
  random_seed: 1,
62
86
  nthreads: -1
63
87
  )
@@ -67,10 +91,20 @@ See a [detailed explanation](https://isotree.readthedocs.io/en/latest/#isotree.I
67
91
 
68
92
  ## Data
69
93
 
70
- Data can be an array of arrays
94
+ Data can be an array of hashes
71
95
 
72
96
  ```ruby
73
- [[1, 2, 3], [4, 5, 6]]
97
+ [
98
+ {department: "Books", sale: false, price: 2.50},
99
+ {department: "Books", sale: true, price: 3.00},
100
+ {department: "Movies", sale: false, price: 5.00}
101
+ ]
102
+ ```
103
+
104
+ Or a Rover data frame
105
+
106
+ ```ruby
107
+ Rover.read_csv("data.csv")
74
108
  ```
75
109
 
76
110
  Or a Numo array
@@ -94,6 +128,23 @@ gem uninstall isotree --force
94
128
  bundle install
95
129
  ```
96
130
 
131
+ ## Deployment
132
+
133
+ Check out [Trove](https://github.com/ankane/trove) for deploying models.
134
+
135
+ ```sh
136
+ trove push model.bin
137
+ trove push model.bin.metadata
138
+ ```
139
+
140
+ ## Reference
141
+
142
+ Get the average isolation depth
143
+
144
+ ```ruby
145
+ model.predict(data, output: "avg_depth")
146
+ ```
147
+
97
148
  ## History
98
149
 
99
150
  View the [changelog](https://github.com/ankane/isotree/blob/master/CHANGELOG.md)
data/ext/isotree/ext.cpp CHANGED
@@ -2,20 +2,102 @@
2
2
  #include <isotree.hpp>
3
3
 
4
4
  // rice
5
- #include <rice/Array.hpp>
6
- #include <rice/Hash.hpp>
7
- #include <rice/Module.hpp>
8
- #include <rice/String.hpp>
9
- #include <rice/Symbol.hpp>
5
+ #include <rice/rice.hpp>
10
6
 
11
7
  using Rice::Array;
12
8
  using Rice::Hash;
13
9
  using Rice::Module;
10
+ using Rice::Object;
14
11
  using Rice::String;
15
12
  using Rice::Symbol;
16
13
  using Rice::define_class_under;
17
14
  using Rice::define_module;
18
15
 
16
+ namespace Rice::detail
17
+ {
18
+ template<>
19
+ class From_Ruby<NewCategAction>
20
+ {
21
+ public:
22
+ NewCategAction convert(VALUE x)
23
+ {
24
+ auto value = Object(x).to_s().str();
25
+ if (value == "weighted") return Weighted;
26
+ if (value == "smallest") return Smallest;
27
+ if (value == "random") return Random;
28
+ throw std::runtime_error("Unknown new categ action: " + value);
29
+ }
30
+ };
31
+
32
+ template<>
33
+ class From_Ruby<MissingAction>
34
+ {
35
+ public:
36
+ MissingAction convert(VALUE x)
37
+ {
38
+ auto value = Object(x).to_s().str();
39
+ if (value == "divide") return Divide;
40
+ if (value == "impute") return Impute;
41
+ if (value == "fail") return Fail;
42
+ throw std::runtime_error("Unknown missing action: " + value);
43
+ }
44
+ };
45
+
46
+ template<>
47
+ class From_Ruby<CategSplit>
48
+ {
49
+ public:
50
+ CategSplit convert(VALUE x)
51
+ {
52
+ auto value = Object(x).to_s().str();
53
+ if (value == "subset") return SubSet;
54
+ if (value == "single_categ") return SingleCateg;
55
+ throw std::runtime_error("Unknown categ split: " + value);
56
+ }
57
+ };
58
+
59
+ template<>
60
+ class From_Ruby<CoefType>
61
+ {
62
+ public:
63
+ CoefType convert(VALUE x)
64
+ {
65
+ auto value = Object(x).to_s().str();
66
+ if (value == "uniform") return Uniform;
67
+ if (value == "normal") return Normal;
68
+ throw std::runtime_error("Unknown coef type: " + value);
69
+ }
70
+ };
71
+
72
+ template<>
73
+ class From_Ruby<UseDepthImp>
74
+ {
75
+ public:
76
+ UseDepthImp convert(VALUE x)
77
+ {
78
+ auto value = Object(x).to_s().str();
79
+ if (value == "lower") return Lower;
80
+ if (value == "higher") return Higher;
81
+ if (value == "same") return Same;
82
+ throw std::runtime_error("Unknown depth imp: " + value);
83
+ }
84
+ };
85
+
86
+ template<>
87
+ class From_Ruby<WeighImpRows>
88
+ {
89
+ public:
90
+ WeighImpRows convert(VALUE x)
91
+ {
92
+ auto value = Object(x).to_s().str();
93
+ if (value == "inverse") return Inverse;
94
+ if (value == "prop") return Prop;
95
+ if (value == "flat") return Flat;
96
+ throw std::runtime_error("Unknown weight imp rows: " + value);
97
+ }
98
+ };
99
+ }
100
+
19
101
  extern "C"
20
102
  void Init_ext()
21
103
  {
@@ -25,44 +107,36 @@ void Init_ext()
25
107
  define_class_under<ExtIsoForest>(rb_mExt, "ExtIsoForest");
26
108
 
27
109
  rb_mExt
28
- .define_singleton_method(
110
+ .define_singleton_function(
29
111
  "fit_iforest",
30
- *[](Hash options) {
112
+ [](Hash options) {
31
113
  // model
32
114
  ExtIsoForest iso;
33
115
 
34
116
  // data
35
117
  size_t nrows = options.get<size_t, Symbol>("nrows");
36
- size_t ncols = options.get<size_t, Symbol>("ncols");
37
- double* numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
38
- size_t ncols_numeric = ncols;
39
- int* categ_data = NULL;
40
- size_t ncols_categ = 0;
41
- int* ncat = NULL;
118
+ size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
119
+ size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
120
+
121
+ double *restrict numeric_data = NULL;
122
+ if (ncols_numeric > 0) {
123
+ numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
124
+ }
125
+
126
+ int *restrict categorical_data = NULL;
127
+ int *restrict ncat = NULL;
128
+ if (ncols_categ > 0) {
129
+ categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
130
+ ncat = (int*) options.get<String, Symbol>("ncat").c_str();
131
+ }
132
+
133
+ // not used (sparse matrices)
42
134
  double* Xc = NULL;
43
135
  sparse_ix* Xc_ind = NULL;
44
136
  sparse_ix* Xc_indptr = NULL;
45
137
 
46
138
  // options
47
- CoefType coef_type = Normal;
48
- double* sample_weights = NULL;
49
- bool weight_as_sample = false;
50
- size_t max_depth = 0;
51
- bool limit_depth = true;
52
- bool standardize_dist = false;
53
- double* tmat = NULL;
54
- double* output_depths = NULL;
55
- bool standardize_depth = false;
56
- double* col_weights = NULL;
57
- MissingAction missing_action = Impute;
58
- CategSplit cat_split_type = SubSet;
59
- NewCategAction new_cat_action = Smallest;
60
- Imputer *imputer = NULL;
61
- UseDepthImp depth_imp = Higher;
62
- WeighImpRows weigh_imp_rows = Inverse;
63
- bool impute_at_fit = false;
64
-
65
- // Rice has limit of 14 arguments, so use hash for options
139
+ // Rice has limit of 14 arguments, so use hash
66
140
  size_t sample_size = options.get<size_t, Symbol>("sample_size");
67
141
  size_t ndim = options.get<size_t, Symbol>("ndim");
68
142
  size_t ntrees = options.get<size_t, Symbol>("ntrees");
@@ -72,21 +146,41 @@ void Init_ext()
72
146
  double prob_pick_by_gain_pl = options.get<double, Symbol>("prob_pick_pooled_gain");
73
147
  double prob_split_by_gain_pl = options.get<double, Symbol>("prob_split_pooled_gain");
74
148
  double min_gain = options.get<double, Symbol>("min_gain");
149
+ MissingAction missing_action = options.get<MissingAction, Symbol>("missing_action");
150
+ CategSplit cat_split_type = options.get<CategSplit, Symbol>("categ_split_type");
151
+ NewCategAction new_cat_action = options.get<NewCategAction, Symbol>("new_categ_action");
75
152
  bool all_perm = options.get<bool, Symbol>("all_perm");
76
153
  bool coef_by_prop = options.get<bool, Symbol>("coef_by_prop");
77
154
  bool with_replacement = options.get<bool, Symbol>("sample_with_replacement");
78
155
  bool penalize_range = options.get<bool, Symbol>("penalize_range");
79
156
  bool weigh_by_kurt = options.get<bool, Symbol>("weigh_by_kurtosis");
157
+ CoefType coef_type = options.get<CoefType, Symbol>("coefs");
80
158
  size_t min_imp_obs = options.get<size_t, Symbol>("min_imp_obs");
159
+ UseDepthImp depth_imp = options.get<UseDepthImp, Symbol>("depth_imp");
160
+ WeighImpRows weigh_imp_rows = options.get<WeighImpRows, Symbol>("weigh_imp_rows");
81
161
  uint64_t random_seed = options.get<uint64_t, Symbol>("random_seed");
82
162
  int nthreads = options.get<int, Symbol>("nthreads");
83
163
 
164
+ // TODO options
165
+ double* sample_weights = NULL;
166
+ bool weight_as_sample = false;
167
+ size_t max_depth = 0;
168
+ bool limit_depth = true;
169
+ bool standardize_dist = false;
170
+ double* tmat = NULL;
171
+ double* output_depths = NULL;
172
+ bool standardize_depth = false;
173
+ double* col_weights = NULL;
174
+ Imputer *imputer = NULL;
175
+ bool impute_at_fit = false;
176
+ bool handle_interrupt = false;
177
+
84
178
  fit_iforest(
85
179
  NULL,
86
180
  &iso,
87
181
  numeric_data,
88
182
  ncols_numeric,
89
- categ_data,
183
+ categorical_data,
90
184
  ncols_categ,
91
185
  ncat,
92
186
  Xc,
@@ -126,18 +220,31 @@ void Init_ext()
126
220
  weigh_imp_rows,
127
221
  impute_at_fit,
128
222
  random_seed,
223
+ handle_interrupt,
129
224
  nthreads
130
225
  );
131
226
 
132
227
  return iso;
133
228
  })
134
- .define_singleton_method(
229
+ .define_singleton_function(
135
230
  "predict_iforest",
136
- *[](ExtIsoForest& iso, Hash options) {
231
+ [](ExtIsoForest& iso, Hash options) {
137
232
  // data
138
233
  size_t nrows = options.get<size_t, Symbol>("nrows");
139
- double* numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
140
- int* categ_data = NULL;
234
+ size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
235
+ size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
236
+
237
+ double *restrict numeric_data = NULL;
238
+ if (ncols_numeric > 0) {
239
+ numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
240
+ }
241
+
242
+ int *restrict categorical_data = NULL;
243
+ if (ncols_categ > 0) {
244
+ categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
245
+ }
246
+
247
+ // not used (sparse matrices)
141
248
  double* Xc = NULL;
142
249
  sparse_ix* Xc_ind = NULL;
143
250
  sparse_ix* Xc_indptr = NULL;
@@ -147,13 +254,13 @@ void Init_ext()
147
254
 
148
255
  // options
149
256
  int nthreads = options.get<int, Symbol>("nthreads");
150
- bool standardize = true;
257
+ bool standardize = options.get<bool, Symbol>("standardize");
151
258
  std::vector<double> outlier_scores(nrows);
152
259
  sparse_ix* tree_num = NULL;
153
260
 
154
261
  predict_iforest(
155
262
  numeric_data,
156
- categ_data,
263
+ categorical_data,
157
264
  Xc,
158
265
  Xc_ind,
159
266
  Xc_indptr,
@@ -174,5 +281,29 @@ void Init_ext()
174
281
  ret.push(outlier_scores[i]);
175
282
  }
176
283
  return ret;
284
+ })
285
+ .define_singleton_function(
286
+ "serialize_ext_isoforest",
287
+ [](ExtIsoForest& iso, String path) {
288
+ #ifdef _MSC_VER
289
+ // TODO convert to wchar_t
290
+ throw std::runtime_error("Not supported on Windows yet");
291
+ #else
292
+ serialize_ext_isoforest(iso, path.c_str());
293
+ #endif
294
+ })
295
+ .define_singleton_function(
296
+ "deserialize_ext_isoforest",
297
+ [](String path) {
298
+ ExtIsoForest iso;
299
+
300
+ #ifdef _MSC_VER
301
+ // TODO convert to wchar_t
302
+ throw std::runtime_error("Not supported on Windows yet");
303
+ #else
304
+ deserialize_ext_isoforest(iso, path.c_str());
305
+ #endif
306
+
307
+ return iso;
177
308
  });
178
309
  }