isotree 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +0 -6
- data/ext/isotree/ext.cpp +112 -130
- data/lib/isotree/isolation_forest.rb +33 -13
- data/lib/isotree/version.rb +1 -1
- metadata +6 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fc614bbd4c6751f60bb22751881a739965950eb4bc8fe225521d6d0caa67e0c1
|
|
4
|
+
data.tar.gz: ef7305b765fc240173173f9926057bef019347a2769f3f91cafd048c9493957b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 641b9f59f1ec699dfdcbc19f882173306f677156ddfd9f8f6b49754c4fc703ef1d873ec30db152a5a252937f01b9a233d849645406346bed780e2f87ba827763
|
|
7
|
+
data.tar.gz: 8be8a5643ecd5dfde93aef878d643e3e47cf84943062137c66dbb27c7fd1501a3df746520764b420568bc6e8d162cbd8f3b8d37a94aa8f2e64292a35d5a8e6fa
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
|
@@ -157,12 +157,6 @@ Get the average isolation depth
|
|
|
157
157
|
model.predict(data, output: "avg_depth")
|
|
158
158
|
```
|
|
159
159
|
|
|
160
|
-
## Upgrading
|
|
161
|
-
|
|
162
|
-
### 0.3.0
|
|
163
|
-
|
|
164
|
-
This version uses IsoTree’s new serialization format. Exported models must be recreated.
|
|
165
|
-
|
|
166
160
|
## History
|
|
167
161
|
|
|
168
162
|
View the [changelog](https://github.com/ankane/isotree-ruby/blob/master/CHANGELOG.md)
|
data/ext/isotree/ext.cpp
CHANGED
|
@@ -1,7 +1,13 @@
|
|
|
1
1
|
// stdlib
|
|
2
2
|
#include <cmath>
|
|
3
|
+
#include <cstddef>
|
|
4
|
+
#include <cstdint>
|
|
3
5
|
#include <fstream>
|
|
4
6
|
#include <iostream>
|
|
7
|
+
#include <stdexcept>
|
|
8
|
+
#include <string>
|
|
9
|
+
#include <string_view>
|
|
10
|
+
#include <vector>
|
|
5
11
|
|
|
6
12
|
// isotree
|
|
7
13
|
#include <isotree.hpp>
|
|
@@ -11,21 +17,15 @@
|
|
|
11
17
|
|
|
12
18
|
using Rice::Array;
|
|
13
19
|
using Rice::Hash;
|
|
14
|
-
using Rice::Module;
|
|
15
20
|
using Rice::Object;
|
|
16
21
|
using Rice::String;
|
|
17
22
|
using Rice::Symbol;
|
|
18
|
-
using Rice::define_class_under;
|
|
19
|
-
using Rice::define_module;
|
|
20
23
|
|
|
21
|
-
namespace Rice::detail
|
|
22
|
-
{
|
|
24
|
+
namespace Rice::detail {
|
|
23
25
|
template<>
|
|
24
|
-
class From_Ruby<NewCategAction>
|
|
25
|
-
{
|
|
26
|
+
class From_Ruby<NewCategAction> {
|
|
26
27
|
public:
|
|
27
|
-
NewCategAction convert(VALUE x)
|
|
28
|
-
{
|
|
28
|
+
NewCategAction convert(VALUE x) {
|
|
29
29
|
auto value = Object(x).to_s().str();
|
|
30
30
|
if (value == "weighted" || value == "impute") return Weighted;
|
|
31
31
|
if (value == "smallest") return Smallest;
|
|
@@ -35,11 +35,9 @@ namespace Rice::detail
|
|
|
35
35
|
};
|
|
36
36
|
|
|
37
37
|
template<>
|
|
38
|
-
class From_Ruby<MissingAction>
|
|
39
|
-
{
|
|
38
|
+
class From_Ruby<MissingAction> {
|
|
40
39
|
public:
|
|
41
|
-
MissingAction convert(VALUE x)
|
|
42
|
-
{
|
|
40
|
+
MissingAction convert(VALUE x) {
|
|
43
41
|
auto value = Object(x).to_s().str();
|
|
44
42
|
if (value == "divide") return Divide;
|
|
45
43
|
if (value == "impute") return Impute;
|
|
@@ -49,11 +47,9 @@ namespace Rice::detail
|
|
|
49
47
|
};
|
|
50
48
|
|
|
51
49
|
template<>
|
|
52
|
-
class From_Ruby<CategSplit>
|
|
53
|
-
{
|
|
50
|
+
class From_Ruby<CategSplit> {
|
|
54
51
|
public:
|
|
55
|
-
CategSplit convert(VALUE x)
|
|
56
|
-
{
|
|
52
|
+
CategSplit convert(VALUE x) {
|
|
57
53
|
auto value = Object(x).to_s().str();
|
|
58
54
|
if (value == "subset") return SubSet;
|
|
59
55
|
if (value == "single_categ") return SingleCateg;
|
|
@@ -62,11 +58,9 @@ namespace Rice::detail
|
|
|
62
58
|
};
|
|
63
59
|
|
|
64
60
|
template<>
|
|
65
|
-
class From_Ruby<CoefType>
|
|
66
|
-
{
|
|
61
|
+
class From_Ruby<CoefType> {
|
|
67
62
|
public:
|
|
68
|
-
CoefType convert(VALUE x)
|
|
69
|
-
{
|
|
63
|
+
CoefType convert(VALUE x) {
|
|
70
64
|
auto value = Object(x).to_s().str();
|
|
71
65
|
if (value == "uniform") return Uniform;
|
|
72
66
|
if (value == "normal") return Normal;
|
|
@@ -75,11 +69,9 @@ namespace Rice::detail
|
|
|
75
69
|
};
|
|
76
70
|
|
|
77
71
|
template<>
|
|
78
|
-
class From_Ruby<UseDepthImp>
|
|
79
|
-
{
|
|
72
|
+
class From_Ruby<UseDepthImp> {
|
|
80
73
|
public:
|
|
81
|
-
UseDepthImp convert(VALUE x)
|
|
82
|
-
{
|
|
74
|
+
UseDepthImp convert(VALUE x) {
|
|
83
75
|
auto value = Object(x).to_s().str();
|
|
84
76
|
if (value == "lower") return Lower;
|
|
85
77
|
if (value == "higher") return Higher;
|
|
@@ -89,11 +81,9 @@ namespace Rice::detail
|
|
|
89
81
|
};
|
|
90
82
|
|
|
91
83
|
template<>
|
|
92
|
-
class From_Ruby<WeighImpRows>
|
|
93
|
-
{
|
|
84
|
+
class From_Ruby<WeighImpRows> {
|
|
94
85
|
public:
|
|
95
|
-
WeighImpRows convert(VALUE x)
|
|
96
|
-
{
|
|
86
|
+
WeighImpRows convert(VALUE x) {
|
|
97
87
|
auto value = Object(x).to_s().str();
|
|
98
88
|
if (value == "inverse") return Inverse;
|
|
99
89
|
if (value == "prop") return Prop;
|
|
@@ -103,11 +93,9 @@ namespace Rice::detail
|
|
|
103
93
|
};
|
|
104
94
|
|
|
105
95
|
template<>
|
|
106
|
-
class From_Ruby<ScoringMetric>
|
|
107
|
-
{
|
|
96
|
+
class From_Ruby<ScoringMetric> {
|
|
108
97
|
public:
|
|
109
|
-
ScoringMetric convert(VALUE x)
|
|
110
|
-
{
|
|
98
|
+
ScoringMetric convert(VALUE x) {
|
|
111
99
|
auto value = Object(x).to_s().str();
|
|
112
100
|
if (value == "depth") return Depth;
|
|
113
101
|
if (value == "adj_depth") return AdjDepth;
|
|
@@ -119,15 +107,14 @@ namespace Rice::detail
|
|
|
119
107
|
throw std::runtime_error("Unknown scoring metric: " + value);
|
|
120
108
|
}
|
|
121
109
|
};
|
|
122
|
-
}
|
|
110
|
+
} // namespace Rice::detail
|
|
123
111
|
|
|
124
112
|
extern "C"
|
|
125
|
-
void Init_ext()
|
|
126
|
-
|
|
127
|
-
Module rb_mIsoTree = define_module("IsoTree");
|
|
113
|
+
void Init_ext() {
|
|
114
|
+
Rice::Module rb_mIsoTree = Rice::define_module("IsoTree");
|
|
128
115
|
|
|
129
|
-
Module rb_mExt = define_module_under(rb_mIsoTree, "Ext");
|
|
130
|
-
define_class_under<ExtIsoForest>(rb_mExt, "ExtIsoForest");
|
|
116
|
+
Rice::Module rb_mExt = Rice::define_module_under(rb_mIsoTree, "Ext");
|
|
117
|
+
Rice::define_class_under<ExtIsoForest>(rb_mExt, "ExtIsoForest");
|
|
131
118
|
|
|
132
119
|
rb_mExt
|
|
133
120
|
.define_singleton_function(
|
|
@@ -137,77 +124,77 @@ void Init_ext()
|
|
|
137
124
|
ExtIsoForest iso;
|
|
138
125
|
|
|
139
126
|
// data
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
127
|
+
auto nrows = options.get<size_t, Symbol>("nrows");
|
|
128
|
+
auto ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
|
|
129
|
+
auto ncols_categ = options.get<size_t, Symbol>("ncols_categ");
|
|
143
130
|
|
|
144
|
-
real_t* numeric_data =
|
|
131
|
+
real_t* numeric_data = nullptr;
|
|
145
132
|
if (ncols_numeric > 0) {
|
|
146
|
-
numeric_data = (
|
|
133
|
+
numeric_data = reinterpret_cast<real_t*>(const_cast<char*>(options.get<String, Symbol>("numeric_data").c_str()));
|
|
147
134
|
}
|
|
148
135
|
|
|
149
|
-
int* categorical_data =
|
|
150
|
-
int* ncat =
|
|
136
|
+
int* categorical_data = nullptr;
|
|
137
|
+
int* ncat = nullptr;
|
|
151
138
|
if (ncols_categ > 0) {
|
|
152
|
-
categorical_data = (
|
|
153
|
-
ncat = (
|
|
139
|
+
categorical_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("categorical_data").c_str()));
|
|
140
|
+
ncat = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("ncat").c_str()));
|
|
154
141
|
}
|
|
155
142
|
|
|
156
143
|
// not used (sparse matrices)
|
|
157
|
-
real_t* Xc =
|
|
158
|
-
sparse_ix* Xc_ind =
|
|
159
|
-
sparse_ix* Xc_indptr =
|
|
144
|
+
real_t* Xc = nullptr;
|
|
145
|
+
sparse_ix* Xc_ind = nullptr;
|
|
146
|
+
sparse_ix* Xc_indptr = nullptr;
|
|
160
147
|
|
|
161
148
|
// options
|
|
162
149
|
// Rice has limit of 14 arguments, so use hash
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
150
|
+
auto sample_size = options.get<size_t, Symbol>("sample_size");
|
|
151
|
+
auto ndim = options.get<size_t, Symbol>("ndim");
|
|
152
|
+
auto ntrees = options.get<size_t, Symbol>("ntrees");
|
|
153
|
+
auto ntry = options.get<size_t, Symbol>("ntry");
|
|
154
|
+
auto prob_pick_by_gain_avg = options.get<double, Symbol>("prob_pick_avg_gain");
|
|
155
|
+
auto prob_pick_by_gain_pl = options.get<double, Symbol>("prob_pick_pooled_gain");
|
|
156
|
+
auto min_gain = options.get<double, Symbol>("min_gain");
|
|
157
|
+
auto missing_action = options.get<MissingAction, Symbol>("missing_action");
|
|
158
|
+
auto cat_split_type = options.get<CategSplit, Symbol>("categ_split_type");
|
|
159
|
+
auto new_cat_action = options.get<NewCategAction, Symbol>("new_categ_action");
|
|
160
|
+
auto all_perm = options.get<bool, Symbol>("all_perm");
|
|
161
|
+
auto coef_by_prop = options.get<bool, Symbol>("coef_by_prop");
|
|
162
|
+
auto with_replacement = options.get<bool, Symbol>("sample_with_replacement");
|
|
163
|
+
auto penalize_range = options.get<bool, Symbol>("penalize_range");
|
|
164
|
+
auto weigh_by_kurt = options.get<bool, Symbol>("weigh_by_kurtosis");
|
|
165
|
+
auto coef_type = options.get<CoefType, Symbol>("coefs");
|
|
166
|
+
auto min_imp_obs = options.get<size_t, Symbol>("min_imp_obs");
|
|
167
|
+
auto depth_imp = options.get<UseDepthImp, Symbol>("depth_imp");
|
|
168
|
+
auto weigh_imp_rows = options.get<WeighImpRows, Symbol>("weigh_imp_rows");
|
|
169
|
+
auto random_seed = options.get<uint64_t, Symbol>("random_seed");
|
|
170
|
+
auto use_long_double = options.get<bool, Symbol>("use_long_double");
|
|
171
|
+
auto nthreads = options.get<int, Symbol>("nthreads");
|
|
185
172
|
|
|
186
173
|
// TODO options
|
|
187
|
-
double* sample_weights =
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
174
|
+
double* sample_weights = nullptr;
|
|
175
|
+
auto weight_as_sample = options.get<bool, Symbol>("weights_as_sample_prob");
|
|
176
|
+
auto max_depth = options.get<size_t, Symbol>("max_depth");
|
|
177
|
+
auto limit_depth = options.get<bool, Symbol>("limit_depth");
|
|
191
178
|
bool standardize_dist = false;
|
|
192
|
-
double* tmat =
|
|
193
|
-
double* output_depths =
|
|
179
|
+
double* tmat = nullptr;
|
|
180
|
+
double* output_depths = nullptr;
|
|
194
181
|
bool standardize_depth = false;
|
|
195
|
-
real_t* col_weights =
|
|
196
|
-
Imputer* imputer =
|
|
182
|
+
real_t* col_weights = nullptr;
|
|
183
|
+
Imputer* imputer = nullptr;
|
|
197
184
|
bool impute_at_fit = false;
|
|
198
185
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
186
|
+
auto ncols_per_tree = options.get<int, Symbol>("ncols_per_tree");
|
|
187
|
+
auto standardize_data = options.get<bool, Symbol>("standardize_data");
|
|
188
|
+
auto scoring_metric = options.get<ScoringMetric, Symbol>("scoring_metric");
|
|
189
|
+
auto fast_bratio = options.get<bool, Symbol>("fast_bratio");
|
|
190
|
+
auto prob_pick_by_full_gain = options.get<double, Symbol>("prob_pick_full_gain");
|
|
191
|
+
auto prob_pick_by_dens = options.get<double, Symbol>("prob_pick_dens");
|
|
192
|
+
auto prob_pick_col_by_range = options.get<double, Symbol>("prob_pick_col_by_range");
|
|
193
|
+
auto prob_pick_col_by_var = options.get<double, Symbol>("prob_pick_col_by_var");
|
|
194
|
+
auto prob_pick_col_by_kurt = options.get<double, Symbol>("prob_pick_col_by_kurt");
|
|
208
195
|
|
|
209
196
|
fit_iforest(
|
|
210
|
-
|
|
197
|
+
nullptr,
|
|
211
198
|
&iso,
|
|
212
199
|
numeric_data,
|
|
213
200
|
ncols_numeric,
|
|
@@ -268,37 +255,37 @@ void Init_ext()
|
|
|
268
255
|
"predict_iforest",
|
|
269
256
|
[](ExtIsoForest& iso, Hash options) {
|
|
270
257
|
// data
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
258
|
+
auto nrows = options.get<size_t, Symbol>("nrows");
|
|
259
|
+
auto ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
|
|
260
|
+
auto ncols_categ = options.get<size_t, Symbol>("ncols_categ");
|
|
274
261
|
|
|
275
|
-
real_t* numeric_data =
|
|
262
|
+
real_t* numeric_data = nullptr;
|
|
276
263
|
if (ncols_numeric > 0) {
|
|
277
|
-
numeric_data = (
|
|
264
|
+
numeric_data = reinterpret_cast<real_t*>(const_cast<char*>(options.get<String, Symbol>("numeric_data").c_str()));
|
|
278
265
|
}
|
|
279
266
|
|
|
280
|
-
int* categorical_data =
|
|
267
|
+
int* categorical_data = nullptr;
|
|
281
268
|
if (ncols_categ > 0) {
|
|
282
|
-
categorical_data = (
|
|
269
|
+
categorical_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("categorical_data").c_str()));
|
|
283
270
|
}
|
|
284
271
|
|
|
285
272
|
// not used (sparse matrices)
|
|
286
|
-
real_t* Xc =
|
|
287
|
-
sparse_ix* Xc_ind =
|
|
288
|
-
sparse_ix* Xc_indptr =
|
|
289
|
-
real_t* Xr =
|
|
290
|
-
sparse_ix* Xr_ind =
|
|
291
|
-
sparse_ix* Xr_indptr =
|
|
273
|
+
real_t* Xc = nullptr;
|
|
274
|
+
sparse_ix* Xc_ind = nullptr;
|
|
275
|
+
sparse_ix* Xc_indptr = nullptr;
|
|
276
|
+
real_t* Xr = nullptr;
|
|
277
|
+
sparse_ix* Xr_ind = nullptr;
|
|
278
|
+
sparse_ix* Xr_indptr = nullptr;
|
|
292
279
|
|
|
293
280
|
// options
|
|
294
|
-
|
|
295
|
-
|
|
281
|
+
auto nthreads = options.get<int, Symbol>("nthreads");
|
|
282
|
+
auto standardize = options.get<bool, Symbol>("standardize");
|
|
296
283
|
std::vector<double> outlier_scores(nrows);
|
|
297
|
-
sparse_ix* tree_num =
|
|
284
|
+
sparse_ix* tree_num = nullptr;
|
|
298
285
|
bool is_col_major = true;
|
|
299
286
|
size_t ld_numeric = 0;
|
|
300
287
|
size_t ld_categ = 0;
|
|
301
|
-
double* per_tree_depths =
|
|
288
|
+
double* per_tree_depths = nullptr;
|
|
302
289
|
|
|
303
290
|
predict_iforest(
|
|
304
291
|
numeric_data,
|
|
@@ -315,49 +302,49 @@ void Init_ext()
|
|
|
315
302
|
nrows,
|
|
316
303
|
nthreads,
|
|
317
304
|
standardize,
|
|
318
|
-
|
|
305
|
+
nullptr,
|
|
319
306
|
&iso,
|
|
320
307
|
outlier_scores.data(),
|
|
321
308
|
tree_num,
|
|
322
309
|
per_tree_depths,
|
|
323
|
-
|
|
310
|
+
nullptr
|
|
324
311
|
);
|
|
325
312
|
|
|
326
313
|
Array ret;
|
|
327
|
-
for (
|
|
328
|
-
ret.push(
|
|
314
|
+
for (auto v : outlier_scores) {
|
|
315
|
+
ret.push(v, false);
|
|
329
316
|
}
|
|
330
317
|
return ret;
|
|
331
318
|
})
|
|
332
319
|
.define_singleton_function(
|
|
333
320
|
"serialize_combined",
|
|
334
321
|
[](ExtIsoForest& iso, String path, String metadata) {
|
|
335
|
-
|
|
322
|
+
#ifdef _MSC_VER
|
|
336
323
|
// TODO convert to wchar_t
|
|
337
324
|
throw std::runtime_error("Not supported on Windows yet");
|
|
338
|
-
|
|
325
|
+
#else
|
|
339
326
|
std::ofstream file;
|
|
340
327
|
file.open(path.c_str());
|
|
341
328
|
serialize_combined(
|
|
342
|
-
|
|
329
|
+
nullptr,
|
|
343
330
|
&iso,
|
|
344
|
-
|
|
345
|
-
|
|
331
|
+
nullptr,
|
|
332
|
+
nullptr,
|
|
346
333
|
metadata.c_str(),
|
|
347
334
|
// returns bytesize (RSTRING_LEN)
|
|
348
335
|
metadata.length(),
|
|
349
336
|
file
|
|
350
337
|
);
|
|
351
338
|
file.close();
|
|
352
|
-
|
|
339
|
+
#endif
|
|
353
340
|
})
|
|
354
341
|
.define_singleton_function(
|
|
355
342
|
"deserialize_combined",
|
|
356
343
|
[](String path) {
|
|
357
|
-
|
|
344
|
+
#ifdef _MSC_VER
|
|
358
345
|
// TODO convert to wchar_t
|
|
359
346
|
throw std::runtime_error("Not supported on Windows yet");
|
|
360
|
-
|
|
347
|
+
#else
|
|
361
348
|
Array ret;
|
|
362
349
|
|
|
363
350
|
std::ifstream file;
|
|
@@ -403,20 +390,15 @@ void Init_ext()
|
|
|
403
390
|
ExtIsoForest model_ext = ExtIsoForest();
|
|
404
391
|
Imputer imputer = Imputer();
|
|
405
392
|
TreesIndexer indexer = TreesIndexer();
|
|
406
|
-
char
|
|
407
|
-
if (optional_metadata == NULL) {
|
|
408
|
-
throw std::runtime_error("Cannot allocate memory");
|
|
409
|
-
}
|
|
393
|
+
std::vector<char> optional_metadata(size_metadata, 0);
|
|
410
394
|
|
|
411
|
-
deserialize_combined(file, &model, &model_ext, &imputer, &indexer, optional_metadata);
|
|
395
|
+
deserialize_combined(file, &model, &model_ext, &imputer, &indexer, optional_metadata.data());
|
|
412
396
|
file.close();
|
|
413
397
|
|
|
414
|
-
ret.push(Object(Rice::detail::To_Ruby<ExtIsoForest>().convert(model_ext)));
|
|
415
|
-
ret.push(String(std::
|
|
416
|
-
|
|
417
|
-
free(optional_metadata);
|
|
398
|
+
ret.push(Object(Rice::detail::To_Ruby<ExtIsoForest>().convert(model_ext)), false);
|
|
399
|
+
ret.push(String(std::string_view{optional_metadata.data(), optional_metadata.size()}), false);
|
|
418
400
|
|
|
419
401
|
return ret;
|
|
420
|
-
|
|
402
|
+
#endif
|
|
421
403
|
});
|
|
422
404
|
}
|
|
@@ -1,24 +1,44 @@
|
|
|
1
1
|
module IsoTree
|
|
2
2
|
class IsolationForest
|
|
3
3
|
def initialize(
|
|
4
|
-
sample_size: "auto",
|
|
4
|
+
sample_size: "auto",
|
|
5
|
+
ntrees: 500,
|
|
6
|
+
ndim: 3,
|
|
7
|
+
ntry: 1,
|
|
5
8
|
# categ_cols: nil,
|
|
6
|
-
max_depth: "auto",
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
max_depth: "auto",
|
|
10
|
+
ncols_per_tree: nil,
|
|
11
|
+
prob_pick_pooled_gain: 0.0,
|
|
12
|
+
prob_pick_avg_gain: 0.0,
|
|
13
|
+
prob_pick_full_gain: 0.0,
|
|
14
|
+
prob_pick_dens: 0.0,
|
|
15
|
+
prob_pick_col_by_range: 0.0,
|
|
16
|
+
prob_pick_col_by_var: 0.0,
|
|
17
|
+
prob_pick_col_by_kurt: 0.0,
|
|
18
|
+
min_gain: 0.0,
|
|
19
|
+
missing_action: "auto",
|
|
20
|
+
new_categ_action: "auto",
|
|
21
|
+
categ_split_type: "auto",
|
|
22
|
+
all_perm: false,
|
|
23
|
+
coef_by_prop: false,
|
|
12
24
|
# recode_categ: false,
|
|
13
25
|
weights_as_sample_prob: true,
|
|
14
|
-
sample_with_replacement: false,
|
|
15
|
-
|
|
16
|
-
|
|
26
|
+
sample_with_replacement: false,
|
|
27
|
+
penalize_range: false,
|
|
28
|
+
standardize_data: true,
|
|
29
|
+
scoring_metric: "depth",
|
|
30
|
+
fast_bratio: true,
|
|
31
|
+
weigh_by_kurtosis: false,
|
|
32
|
+
coefs: "uniform",
|
|
33
|
+
assume_full_distr: true,
|
|
17
34
|
# build_imputer: false,
|
|
18
|
-
min_imp_obs: 3,
|
|
19
|
-
|
|
35
|
+
min_imp_obs: 3,
|
|
36
|
+
depth_imp: "higher",
|
|
37
|
+
weigh_imp_rows: "inverse",
|
|
38
|
+
random_seed: 1,
|
|
39
|
+
use_long_double: false,
|
|
40
|
+
nthreads: -1
|
|
20
41
|
)
|
|
21
|
-
|
|
22
42
|
@sample_size = sample_size
|
|
23
43
|
@ntrees = ntrees
|
|
24
44
|
@ndim = ndim
|
data/lib/isotree/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: isotree
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: rice
|
|
@@ -16,15 +15,14 @@ dependencies:
|
|
|
16
15
|
requirements:
|
|
17
16
|
- - ">="
|
|
18
17
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: '4.
|
|
18
|
+
version: '4.7'
|
|
20
19
|
type: :runtime
|
|
21
20
|
prerelease: false
|
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
22
|
requirements:
|
|
24
23
|
- - ">="
|
|
25
24
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: '4.
|
|
27
|
-
description:
|
|
25
|
+
version: '4.7'
|
|
28
26
|
email: andrew@ankane.org
|
|
29
27
|
executables: []
|
|
30
28
|
extensions:
|
|
@@ -88,7 +86,6 @@ homepage: https://github.com/ankane/isotree-ruby
|
|
|
88
86
|
licenses:
|
|
89
87
|
- BSD-2-Clause
|
|
90
88
|
metadata: {}
|
|
91
|
-
post_install_message:
|
|
92
89
|
rdoc_options: []
|
|
93
90
|
require_paths:
|
|
94
91
|
- lib
|
|
@@ -96,15 +93,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
96
93
|
requirements:
|
|
97
94
|
- - ">="
|
|
98
95
|
- !ruby/object:Gem::Version
|
|
99
|
-
version: '3.
|
|
96
|
+
version: '3.3'
|
|
100
97
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
101
98
|
requirements:
|
|
102
99
|
- - ">="
|
|
103
100
|
- !ruby/object:Gem::Version
|
|
104
101
|
version: '0'
|
|
105
102
|
requirements: []
|
|
106
|
-
rubygems_version:
|
|
107
|
-
signing_key:
|
|
103
|
+
rubygems_version: 4.0.6
|
|
108
104
|
specification_version: 4
|
|
109
105
|
summary: Outlier/anomaly detection for Ruby using Isolation Forest
|
|
110
106
|
test_files: []
|