isotree 0.1.0 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (119) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -1
  3. data/LICENSE.txt +2 -1
  4. data/README.md +64 -6
  5. data/ext/isotree/ext.cpp +139 -30
  6. data/ext/isotree/extconf.rb +2 -1
  7. data/lib/isotree.rb +2 -0
  8. data/lib/isotree/dataset.rb +73 -0
  9. data/lib/isotree/isolation_forest.rb +182 -29
  10. data/lib/isotree/version.rb +1 -1
  11. data/vendor/cereal/LICENSE +24 -0
  12. data/vendor/cereal/README.md +85 -0
  13. data/vendor/cereal/include/cereal/access.hpp +351 -0
  14. data/vendor/cereal/include/cereal/archives/adapters.hpp +163 -0
  15. data/vendor/cereal/include/cereal/archives/binary.hpp +169 -0
  16. data/vendor/cereal/include/cereal/archives/json.hpp +1019 -0
  17. data/vendor/cereal/include/cereal/archives/portable_binary.hpp +334 -0
  18. data/vendor/cereal/include/cereal/archives/xml.hpp +956 -0
  19. data/vendor/cereal/include/cereal/cereal.hpp +1089 -0
  20. data/vendor/cereal/include/cereal/details/helpers.hpp +422 -0
  21. data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +796 -0
  22. data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +65 -0
  23. data/vendor/cereal/include/cereal/details/static_object.hpp +127 -0
  24. data/vendor/cereal/include/cereal/details/traits.hpp +1411 -0
  25. data/vendor/cereal/include/cereal/details/util.hpp +84 -0
  26. data/vendor/cereal/include/cereal/external/base64.hpp +134 -0
  27. data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +284 -0
  28. data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +78 -0
  29. data/vendor/cereal/include/cereal/external/rapidjson/document.h +2652 -0
  30. data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +299 -0
  31. data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +716 -0
  32. data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +74 -0
  33. data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +161 -0
  34. data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +99 -0
  35. data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +104 -0
  36. data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +151 -0
  37. data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +290 -0
  38. data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +271 -0
  39. data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +245 -0
  40. data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +78 -0
  41. data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +308 -0
  42. data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +186 -0
  43. data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +55 -0
  44. data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +740 -0
  45. data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +232 -0
  46. data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +69 -0
  47. data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +290 -0
  48. data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +46 -0
  49. data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +128 -0
  50. data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +70 -0
  51. data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +71 -0
  52. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +316 -0
  53. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +300 -0
  54. data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +81 -0
  55. data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +1414 -0
  56. data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +277 -0
  57. data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +656 -0
  58. data/vendor/cereal/include/cereal/external/rapidjson/reader.h +2230 -0
  59. data/vendor/cereal/include/cereal/external/rapidjson/schema.h +2497 -0
  60. data/vendor/cereal/include/cereal/external/rapidjson/stream.h +223 -0
  61. data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +121 -0
  62. data/vendor/cereal/include/cereal/external/rapidjson/writer.h +709 -0
  63. data/vendor/cereal/include/cereal/external/rapidxml/license.txt +52 -0
  64. data/vendor/cereal/include/cereal/external/rapidxml/manual.html +406 -0
  65. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +2624 -0
  66. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +175 -0
  67. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +428 -0
  68. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +123 -0
  69. data/vendor/cereal/include/cereal/macros.hpp +154 -0
  70. data/vendor/cereal/include/cereal/specialize.hpp +139 -0
  71. data/vendor/cereal/include/cereal/types/array.hpp +79 -0
  72. data/vendor/cereal/include/cereal/types/atomic.hpp +55 -0
  73. data/vendor/cereal/include/cereal/types/base_class.hpp +203 -0
  74. data/vendor/cereal/include/cereal/types/bitset.hpp +176 -0
  75. data/vendor/cereal/include/cereal/types/boost_variant.hpp +164 -0
  76. data/vendor/cereal/include/cereal/types/chrono.hpp +72 -0
  77. data/vendor/cereal/include/cereal/types/common.hpp +129 -0
  78. data/vendor/cereal/include/cereal/types/complex.hpp +56 -0
  79. data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +73 -0
  80. data/vendor/cereal/include/cereal/types/deque.hpp +62 -0
  81. data/vendor/cereal/include/cereal/types/forward_list.hpp +68 -0
  82. data/vendor/cereal/include/cereal/types/functional.hpp +43 -0
  83. data/vendor/cereal/include/cereal/types/list.hpp +62 -0
  84. data/vendor/cereal/include/cereal/types/map.hpp +36 -0
  85. data/vendor/cereal/include/cereal/types/memory.hpp +425 -0
  86. data/vendor/cereal/include/cereal/types/optional.hpp +66 -0
  87. data/vendor/cereal/include/cereal/types/polymorphic.hpp +483 -0
  88. data/vendor/cereal/include/cereal/types/queue.hpp +132 -0
  89. data/vendor/cereal/include/cereal/types/set.hpp +103 -0
  90. data/vendor/cereal/include/cereal/types/stack.hpp +76 -0
  91. data/vendor/cereal/include/cereal/types/string.hpp +61 -0
  92. data/vendor/cereal/include/cereal/types/tuple.hpp +123 -0
  93. data/vendor/cereal/include/cereal/types/unordered_map.hpp +36 -0
  94. data/vendor/cereal/include/cereal/types/unordered_set.hpp +99 -0
  95. data/vendor/cereal/include/cereal/types/utility.hpp +47 -0
  96. data/vendor/cereal/include/cereal/types/valarray.hpp +89 -0
  97. data/vendor/cereal/include/cereal/types/variant.hpp +109 -0
  98. data/vendor/cereal/include/cereal/types/vector.hpp +112 -0
  99. data/vendor/cereal/include/cereal/version.hpp +52 -0
  100. data/vendor/isotree/LICENSE +1 -1
  101. data/vendor/isotree/README.md +7 -2
  102. data/vendor/isotree/src/RcppExports.cpp +44 -4
  103. data/vendor/isotree/src/Rwrapper.cpp +141 -51
  104. data/vendor/isotree/src/crit.cpp +1 -1
  105. data/vendor/isotree/src/dealloc.cpp +1 -1
  106. data/vendor/isotree/src/dist.cpp +6 -6
  107. data/vendor/isotree/src/extended.cpp +5 -5
  108. data/vendor/isotree/src/fit_model.cpp +27 -5
  109. data/vendor/isotree/src/helpers_iforest.cpp +26 -11
  110. data/vendor/isotree/src/impute.cpp +7 -7
  111. data/vendor/isotree/src/isoforest.cpp +7 -7
  112. data/vendor/isotree/src/isotree.hpp +27 -5
  113. data/vendor/isotree/src/merge_models.cpp +1 -1
  114. data/vendor/isotree/src/mult.cpp +1 -1
  115. data/vendor/isotree/src/predict.cpp +20 -16
  116. data/vendor/isotree/src/serialize.cpp +1 -1
  117. data/vendor/isotree/src/sql.cpp +545 -0
  118. data/vendor/isotree/src/utils.cpp +36 -44
  119. metadata +99 -78
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa5516ad971c1fc1def4766fdf39ec74121a20f6c95d9b7bda705707324a5571
4
- data.tar.gz: c7aed404ad88d2e0365f7698cbad0a2d198a85860b9863d6322b10e53852f8a3
3
+ metadata.gz: a5974c65c4adc4fd79ee7770f324c906a2788534f3fe6f381d61711e7cdce78c
4
+ data.tar.gz: d401e7c5aaabcd5dcffd5e4f54d93ea4d0966698854e70d3475fd79a92e1e241
5
5
  SHA512:
6
- metadata.gz: 92a74815ea52c38c0a4d0f27cb78524413cc32f966732c520590937f9523c253344e04673be280ff04659c7ed4a8db96560fba54fc425221b8542244e275ed33
7
- data.tar.gz: a5824999e81a4732742646e66cdbae55812340667122b262c799bf3a5b243e9185445855ef64c31c877941d6427cd4fdfb7d77b4f87c2160b489ebe90aa18af6
6
+ metadata.gz: d00de0c3902b4f7fd3e13e08f0738ec4daf8be173f60cf46f49318e76f5be3c4ef3edd925f83074e03b4fe181aa6384d43c749f5f72247cf08d44f6811a6fe80
7
+ data.tar.gz: 70195ae442c2e4762b2f900a82a891d8f991ace1e521d625f910a1b5e3482334a299674c255e1aebf855728ee169ab5886428baf6df9a5e436bc73f3aff6dda6
data/CHANGELOG.md CHANGED
@@ -1,3 +1,27 @@
1
- ## 0.1.0 (unreleased)
1
+ ## 0.1.5 (2021-03-14)
2
+
3
+ - Updated Isotree to 0.1.25
4
+ - Added support for exporting and importing models
5
+
6
+ ## 0.1.4 (2020-08-22)
7
+
8
+ - Added `missing_action`, `new_categ_action`, `categ_split_type`, `coefs`, `depth_imp`, and `weigh_imp_rows` options
9
+ - Fixed signal handling
10
+
11
+ ## 0.1.3 (2020-08-13)
12
+
13
+ - Added support for categorical data
14
+ - Added support for Rover data frames
15
+ - Added `output` option to `predict` method
16
+
17
+ ## 0.1.2 (2020-08-11)
18
+
19
+ - Fixed outlier scores
20
+
21
+ ## 0.1.1 (2020-08-10)
22
+
23
+ - Fixed installation error when cereal not installed
24
+
25
+ ## 0.1.0 (2020-08-10)
2
26
 
3
27
  - First release
data/LICENSE.txt CHANGED
@@ -1,6 +1,7 @@
1
1
  BSD 2-Clause License
2
2
 
3
- Copyright (c) 2020, Andrew Kane
3
+ Copyright (c) 2020, David Cortes
4
+ Copyright (c) 2020-2021, Andrew Kane
4
5
  All rights reserved.
5
6
 
6
7
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -1,9 +1,13 @@
1
1
  # IsoTree
2
2
 
3
- :evergreen_tree: [IsoTree](https://github.com/david-cortes/isotree) - outlier/anomaly detection for using Isolation Forest - for Ruby
3
+ :evergreen_tree: [IsoTree](https://github.com/david-cortes/isotree) - outlier/anomaly detection using Isolation Forest - for Ruby
4
4
 
5
5
  Learn how [Isolation Forest](https://www.youtube.com/watch?v=RyFQXQf4w4w) works
6
6
 
7
+ :deciduous_tree: Check out [OutlierTree](https://github.com/ankane/outliertree) for human-readable explanations of outliers
8
+
9
+ [![Build Status](https://github.com/ankane/isotree/workflows/build/badge.svg?branch=master)](https://github.com/ankane/isotree/actions)
10
+
7
11
  ## Installation
8
12
 
9
13
  Add this line to your application’s Gemfile:
@@ -17,24 +21,40 @@ gem 'isotree'
17
21
  Prep your data
18
22
 
19
23
  ```ruby
20
- x = [[1, 2], [3, 4], [5, 6], [7, 8]]
24
+ data = [
25
+ {department: "Books", sale: false, price: 2.50},
26
+ {department: "Books", sale: true, price: 3.00},
27
+ {department: "Movies", sale: false, price: 5.00}
28
+ ]
21
29
  ```
22
30
 
23
31
  Train a model
24
32
 
25
33
  ```ruby
26
34
  model = IsoTree::IsolationForest.new
27
- model.fit(x)
35
+ model.fit(data)
28
36
  ```
29
37
 
30
38
  Get outlier scores
31
39
 
32
40
  ```ruby
33
- model.predict(x)
41
+ model.predict(data)
34
42
  ```
35
43
 
36
44
  Scores are between 0 and 1, with higher scores indicating outliers
37
45
 
46
+ Export the model
47
+
48
+ ```ruby
49
+ model.export_model("model.bin")
50
+ ```
51
+
52
+ Import a model
53
+
54
+ ```ruby
55
+ model = IsoTree::IsolationForest.import_model("model.bin")
56
+ ```
57
+
38
58
  ## Parameters
39
59
 
40
60
  Pass parameters - default values below
@@ -50,12 +70,18 @@ IsoTree::IsolationForest.new(
50
70
  prob_split_avg_gain: 0,
51
71
  prob_split_pooled_gain: 0,
52
72
  min_gain: 0,
73
+ missing_action: "impute",
74
+ new_categ_action: "smallest",
75
+ categ_split_type: "subset",
53
76
  all_perm: false,
54
77
  coef_by_prop: false,
55
78
  sample_with_replacement: false,
56
79
  penalize_range: true,
57
80
  weigh_by_kurtosis: false,
81
+ coefs: "normal",
58
82
  min_imp_obs: 3,
83
+ depth_imp: "higher",
84
+ weigh_imp_rows: "inverse",
59
85
  random_seed: 1,
60
86
  nthreads: -1
61
87
  )
@@ -65,10 +91,20 @@ See a [detailed explanation](https://isotree.readthedocs.io/en/latest/#isotree.I
65
91
 
66
92
  ## Data
67
93
 
68
- Data can be an array of arrays
94
+ Data can be an array of hashes
95
+
96
+ ```ruby
97
+ [
98
+ {department: "Books", sale: false, price: 2.50},
99
+ {department: "Books", sale: true, price: 3.00},
100
+ {department: "Movies", sale: false, price: 5.00}
101
+ ]
102
+ ```
103
+
104
+ Or a Rover data frame
69
105
 
70
106
  ```ruby
71
- [[1, 2, 3], [4, 5, 6]]
107
+ Rover.read_csv("data.csv")
72
108
  ```
73
109
 
74
110
  Or a Numo array
@@ -87,6 +123,28 @@ brew install libomp
87
123
 
88
124
  Then reinstall the gem.
89
125
 
126
+ ```sh
127
+ gem uninstall isotree --force
128
+ bundle install
129
+ ```
130
+
131
+ ## Deployment
132
+
133
+ Check out [Trove](https://github.com/ankane/trove) for deploying models.
134
+
135
+ ```sh
136
+ trove push model.bin
137
+ trove push model.bin.metadata
138
+ ```
139
+
140
+ ## Reference
141
+
142
+ Get the average isolation depth
143
+
144
+ ```ruby
145
+ model.predict(data, output: "avg_depth")
146
+ ```
147
+
90
148
  ## History
91
149
 
92
150
  View the [changelog](https://github.com/ankane/isotree/blob/master/CHANGELOG.md)
data/ext/isotree/ext.cpp CHANGED
@@ -5,17 +5,77 @@
5
5
  #include <rice/Array.hpp>
6
6
  #include <rice/Hash.hpp>
7
7
  #include <rice/Module.hpp>
8
+ #include <rice/Object.hpp>
8
9
  #include <rice/String.hpp>
9
10
  #include <rice/Symbol.hpp>
10
11
 
11
12
  using Rice::Array;
12
13
  using Rice::Hash;
13
14
  using Rice::Module;
15
+ using Rice::Object;
14
16
  using Rice::String;
15
17
  using Rice::Symbol;
16
18
  using Rice::define_class_under;
17
19
  using Rice::define_module;
18
20
 
21
+ template<>
22
+ NewCategAction from_ruby<NewCategAction>(Object x)
23
+ {
24
+ auto value = x.to_s().str();
25
+ if (value == "weighted") return Weighted;
26
+ if (value == "smallest") return Smallest;
27
+ if (value == "random") return Random;
28
+ throw std::runtime_error("Unknown new categ action: " + value);
29
+ }
30
+
31
+ template<>
32
+ MissingAction from_ruby<MissingAction>(Object x)
33
+ {
34
+ auto value = x.to_s().str();
35
+ if (value == "divide") return Divide;
36
+ if (value == "impute") return Impute;
37
+ if (value == "fail") return Fail;
38
+ throw std::runtime_error("Unknown missing action: " + value);
39
+ }
40
+
41
+ template<>
42
+ CategSplit from_ruby<CategSplit>(Object x)
43
+ {
44
+ auto value = x.to_s().str();
45
+ if (value == "subset") return SubSet;
46
+ if (value == "single_categ") return SingleCateg;
47
+ throw std::runtime_error("Unknown categ split: " + value);
48
+ }
49
+
50
+ template<>
51
+ CoefType from_ruby<CoefType>(Object x)
52
+ {
53
+ auto value = x.to_s().str();
54
+ if (value == "uniform") return Uniform;
55
+ if (value == "normal") return Normal;
56
+ throw std::runtime_error("Unknown coef type: " + value);
57
+ }
58
+
59
+ template<>
60
+ UseDepthImp from_ruby<UseDepthImp>(Object x)
61
+ {
62
+ auto value = x.to_s().str();
63
+ if (value == "lower") return Lower;
64
+ if (value == "higher") return Higher;
65
+ if (value == "same") return Same;
66
+ throw std::runtime_error("Unknown depth imp: " + value);
67
+ }
68
+
69
+ template<>
70
+ WeighImpRows from_ruby<WeighImpRows>(Object x)
71
+ {
72
+ auto value = x.to_s().str();
73
+ if (value == "inverse") return Inverse;
74
+ if (value == "prop") return Prop;
75
+ if (value == "flat") return Flat;
76
+ throw std::runtime_error("Unknown weight imp rows: " + value);
77
+ }
78
+
19
79
  extern "C"
20
80
  void Init_ext()
21
81
  {
@@ -33,36 +93,28 @@ void Init_ext()
33
93
 
34
94
  // data
35
95
  size_t nrows = options.get<size_t, Symbol>("nrows");
36
- size_t ncols = options.get<size_t, Symbol>("ncols");
37
- double* numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
38
- size_t ncols_numeric = ncols;
39
- int* categ_data = NULL;
40
- size_t ncols_categ = 0;
41
- int* ncat = NULL;
96
+ size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
97
+ size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
98
+
99
+ double *restrict numeric_data = NULL;
100
+ if (ncols_numeric > 0) {
101
+ numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
102
+ }
103
+
104
+ int *restrict categorical_data = NULL;
105
+ int *restrict ncat = NULL;
106
+ if (ncols_categ > 0) {
107
+ categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
108
+ ncat = (int*) options.get<String, Symbol>("ncat").c_str();
109
+ }
110
+
111
+ // not used (sparse matrices)
42
112
  double* Xc = NULL;
43
113
  sparse_ix* Xc_ind = NULL;
44
114
  sparse_ix* Xc_indptr = NULL;
45
115
 
46
116
  // options
47
- CoefType coef_type = Normal;
48
- double* sample_weights = NULL;
49
- bool weight_as_sample = false;
50
- size_t max_depth = 0;
51
- bool limit_depth = true;
52
- bool standardize_dist = false;
53
- double* tmat = NULL;
54
- double* output_depths = NULL;
55
- bool standardize_depth = false;
56
- double* col_weights = NULL;
57
- MissingAction missing_action = Impute;
58
- CategSplit cat_split_type = SubSet;
59
- NewCategAction new_cat_action = Smallest;
60
- Imputer *imputer = NULL;
61
- UseDepthImp depth_imp = Higher;
62
- WeighImpRows weigh_imp_rows = Inverse;
63
- bool impute_at_fit = false;
64
-
65
- // Rice has limit of 14 arguments, so use hash for options
117
+ // Rice has limit of 14 arguments, so use hash
66
118
  size_t sample_size = options.get<size_t, Symbol>("sample_size");
67
119
  size_t ndim = options.get<size_t, Symbol>("ndim");
68
120
  size_t ntrees = options.get<size_t, Symbol>("ntrees");
@@ -72,21 +124,41 @@ void Init_ext()
72
124
  double prob_pick_by_gain_pl = options.get<double, Symbol>("prob_pick_pooled_gain");
73
125
  double prob_split_by_gain_pl = options.get<double, Symbol>("prob_split_pooled_gain");
74
126
  double min_gain = options.get<double, Symbol>("min_gain");
127
+ MissingAction missing_action = options.get<MissingAction, Symbol>("missing_action");
128
+ CategSplit cat_split_type = options.get<CategSplit, Symbol>("categ_split_type");
129
+ NewCategAction new_cat_action = options.get<NewCategAction, Symbol>("new_categ_action");
75
130
  bool all_perm = options.get<bool, Symbol>("all_perm");
76
131
  bool coef_by_prop = options.get<bool, Symbol>("coef_by_prop");
77
132
  bool with_replacement = options.get<bool, Symbol>("sample_with_replacement");
78
133
  bool penalize_range = options.get<bool, Symbol>("penalize_range");
79
134
  bool weigh_by_kurt = options.get<bool, Symbol>("weigh_by_kurtosis");
135
+ CoefType coef_type = options.get<CoefType, Symbol>("coefs");
80
136
  size_t min_imp_obs = options.get<size_t, Symbol>("min_imp_obs");
137
+ UseDepthImp depth_imp = options.get<UseDepthImp, Symbol>("depth_imp");
138
+ WeighImpRows weigh_imp_rows = options.get<WeighImpRows, Symbol>("weigh_imp_rows");
81
139
  uint64_t random_seed = options.get<uint64_t, Symbol>("random_seed");
82
140
  int nthreads = options.get<int, Symbol>("nthreads");
83
141
 
142
+ // TODO options
143
+ double* sample_weights = NULL;
144
+ bool weight_as_sample = false;
145
+ size_t max_depth = 0;
146
+ bool limit_depth = true;
147
+ bool standardize_dist = false;
148
+ double* tmat = NULL;
149
+ double* output_depths = NULL;
150
+ bool standardize_depth = false;
151
+ double* col_weights = NULL;
152
+ Imputer *imputer = NULL;
153
+ bool impute_at_fit = false;
154
+ bool handle_interrupt = false;
155
+
84
156
  fit_iforest(
85
157
  NULL,
86
158
  &iso,
87
159
  numeric_data,
88
160
  ncols_numeric,
89
- categ_data,
161
+ categorical_data,
90
162
  ncols_categ,
91
163
  ncat,
92
164
  Xc,
@@ -126,6 +198,7 @@ void Init_ext()
126
198
  weigh_imp_rows,
127
199
  impute_at_fit,
128
200
  random_seed,
201
+ handle_interrupt,
129
202
  nthreads
130
203
  );
131
204
 
@@ -136,8 +209,20 @@ void Init_ext()
136
209
  *[](ExtIsoForest& iso, Hash options) {
137
210
  // data
138
211
  size_t nrows = options.get<size_t, Symbol>("nrows");
139
- double* numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
140
- int* categ_data = NULL;
212
+ size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
213
+ size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
214
+
215
+ double *restrict numeric_data = NULL;
216
+ if (ncols_numeric > 0) {
217
+ numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
218
+ }
219
+
220
+ int *restrict categorical_data = NULL;
221
+ if (ncols_categ > 0) {
222
+ categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
223
+ }
224
+
225
+ // not used (sparse matrices)
141
226
  double* Xc = NULL;
142
227
  sparse_ix* Xc_ind = NULL;
143
228
  sparse_ix* Xc_indptr = NULL;
@@ -147,13 +232,13 @@ void Init_ext()
147
232
 
148
233
  // options
149
234
  int nthreads = options.get<int, Symbol>("nthreads");
150
- bool standardize = true;
235
+ bool standardize = options.get<bool, Symbol>("standardize");
151
236
  std::vector<double> outlier_scores(nrows);
152
237
  sparse_ix* tree_num = NULL;
153
238
 
154
239
  predict_iforest(
155
240
  numeric_data,
156
- categ_data,
241
+ categorical_data,
157
242
  Xc,
158
243
  Xc_ind,
159
244
  Xc_indptr,
@@ -174,5 +259,29 @@ void Init_ext()
174
259
  ret.push(outlier_scores[i]);
175
260
  }
176
261
  return ret;
262
+ })
263
+ .define_singleton_method(
264
+ "serialize_ext_isoforest",
265
+ *[](ExtIsoForest& iso, String path) {
266
+ #ifdef _MSC_VER
267
+ // TODO convert to wchar_t
268
+ throw std::runtime_error("Not supported on Windows yet");
269
+ #else
270
+ serialize_ext_isoforest(iso, path.c_str());
271
+ #endif
272
+ })
273
+ .define_singleton_method(
274
+ "deserialize_ext_isoforest",
275
+ *[](String path) {
276
+ ExtIsoForest iso;
277
+
278
+ #ifdef _MSC_VER
279
+ // TODO convert to wchar_t
280
+ throw std::runtime_error("Not supported on Windows yet");
281
+ #else
282
+ deserialize_ext_isoforest(iso, path.c_str());
283
+ #endif
284
+
285
+ return iso;
177
286
  });
178
287
  }
@@ -12,10 +12,11 @@ end
12
12
 
13
13
  ext = File.expand_path(".", __dir__)
14
14
  isotree = File.expand_path("../../vendor/isotree/src", __dir__)
15
+ cereal = File.expand_path("../../vendor/cereal/include", __dir__)
15
16
 
16
17
  exclude = %w(Rwrapper.cpp RcppExports.cpp)
17
18
  $srcs = Dir["{#{ext},#{isotree}}/*.{cc,cpp}"].reject { |f| exclude.include?(File.basename(f)) }
18
- $INCFLAGS << " -I#{isotree}"
19
+ $INCFLAGS << " -I#{isotree} -I#{cereal}"
19
20
  $VPATH << isotree
20
21
 
21
22
  create_makefile("isotree/ext")
data/lib/isotree.rb CHANGED
@@ -3,7 +3,9 @@ require "isotree/ext"
3
3
 
4
4
  # stdlib
5
5
  require "etc"
6
+ require "json"
6
7
 
7
8
  # modules
9
+ require "isotree/dataset"
8
10
  require "isotree/isolation_forest"
9
11
  require "isotree/version"