outliertree 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/outliertree/ext.cpp +56 -41
- data/lib/outliertree/model.rb +13 -4
- data/lib/outliertree/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fd25a8154076c68420d1908a6dcb29cf0b9d1fec972cbc24063c3c4cf6de63e4
|
|
4
|
+
data.tar.gz: a834dcb5791ee8083d7ae8721cb22abfb468e123ae78940143a33df0d2ef0f98
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0ab3d03adf97e689d188a91ceb86b77fbe69bbc45ca64426f84ae93360df2cdd395f5c1b66d7d8f371d1212c4a615c2466bdb8d041ef60d200d092967bbd3341
|
|
7
|
+
data.tar.gz: eb60aee1c0bb479db304491c804209e9c3ea51a5d089c5c05b90f1490c0190339b7a8f7c8b64c20b82ffafa7853f8f3cce2493647d95f0d86cbc890df4226987
|
data/CHANGELOG.md
CHANGED
data/ext/outliertree/ext.cpp
CHANGED
|
@@ -1,73 +1,83 @@
|
|
|
1
|
+
#include <complex>
|
|
2
|
+
#include <vector>
|
|
3
|
+
|
|
1
4
|
// outliertree
|
|
2
5
|
#include <outlier_tree.hpp>
|
|
3
6
|
|
|
7
|
+
// fix warning
|
|
8
|
+
#undef restrict
|
|
9
|
+
|
|
4
10
|
// rice
|
|
5
11
|
#include <rice/rice.hpp>
|
|
6
12
|
#include <rice/stl.hpp>
|
|
7
13
|
|
|
8
14
|
using Rice::Array;
|
|
9
15
|
using Rice::Hash;
|
|
10
|
-
using Rice::Module;
|
|
11
16
|
using Rice::Object;
|
|
12
17
|
using Rice::String;
|
|
13
18
|
using Rice::Symbol;
|
|
14
|
-
using Rice::define_class_under;
|
|
15
|
-
using Rice::define_module;
|
|
16
19
|
|
|
17
|
-
namespace Rice::detail
|
|
18
|
-
{
|
|
20
|
+
namespace Rice::detail {
|
|
19
21
|
template<typename T>
|
|
20
|
-
class To_Ruby<std::vector<T>>
|
|
21
|
-
{
|
|
22
|
+
class To_Ruby<std::vector<T>> {
|
|
22
23
|
public:
|
|
23
|
-
|
|
24
|
-
|
|
24
|
+
To_Ruby() = default;
|
|
25
|
+
|
|
26
|
+
explicit To_Ruby(Arg* arg) : arg_(arg) { }
|
|
27
|
+
|
|
28
|
+
VALUE convert(std::vector<T> const & x) {
|
|
25
29
|
auto a = rb_ary_new2(x.size());
|
|
26
30
|
for (const auto& v : x) {
|
|
27
31
|
rb_ary_push(a, To_Ruby<T>().convert(v));
|
|
28
32
|
}
|
|
29
33
|
return a;
|
|
30
34
|
}
|
|
35
|
+
|
|
36
|
+
private:
|
|
37
|
+
Arg* arg_ = nullptr;
|
|
31
38
|
};
|
|
32
39
|
|
|
33
40
|
template<>
|
|
34
|
-
class To_Ruby<std::vector<signed char>>
|
|
35
|
-
{
|
|
41
|
+
class To_Ruby<std::vector<signed char>> {
|
|
36
42
|
public:
|
|
37
|
-
|
|
38
|
-
|
|
43
|
+
To_Ruby() = default;
|
|
44
|
+
|
|
45
|
+
explicit To_Ruby(Arg* arg) : arg_(arg) { }
|
|
46
|
+
|
|
47
|
+
VALUE convert(std::vector<signed char> const & x) {
|
|
39
48
|
auto a = rb_ary_new2(x.size());
|
|
40
49
|
for (const auto& v : x) {
|
|
41
50
|
rb_ary_push(a, To_Ruby<signed char>().convert(v));
|
|
42
51
|
}
|
|
43
52
|
return a;
|
|
44
53
|
}
|
|
54
|
+
|
|
55
|
+
private:
|
|
56
|
+
Arg* arg_ = nullptr;
|
|
45
57
|
};
|
|
46
58
|
|
|
47
59
|
template<>
|
|
48
|
-
struct Type<std::vector<signed char>>
|
|
49
|
-
|
|
50
|
-
static bool verify()
|
|
51
|
-
{
|
|
60
|
+
struct Type<std::vector<signed char>> {
|
|
61
|
+
static bool verify() {
|
|
52
62
|
return true;
|
|
53
63
|
}
|
|
54
64
|
};
|
|
55
65
|
|
|
56
66
|
template<>
|
|
57
|
-
struct Type<ColType>
|
|
58
|
-
|
|
59
|
-
static bool verify()
|
|
60
|
-
{
|
|
67
|
+
struct Type<ColType> {
|
|
68
|
+
static bool verify() {
|
|
61
69
|
return true;
|
|
62
70
|
}
|
|
63
71
|
};
|
|
64
72
|
|
|
65
73
|
template<>
|
|
66
|
-
class To_Ruby<ColType>
|
|
67
|
-
{
|
|
74
|
+
class To_Ruby<ColType> {
|
|
68
75
|
public:
|
|
69
|
-
|
|
70
|
-
|
|
76
|
+
To_Ruby() = default;
|
|
77
|
+
|
|
78
|
+
explicit To_Ruby(Arg* arg) : arg_(arg) { }
|
|
79
|
+
|
|
80
|
+
VALUE convert(ColType const & x) {
|
|
71
81
|
switch (x) {
|
|
72
82
|
case Numeric: return Symbol("numeric");
|
|
73
83
|
case Categorical: return Symbol("categorical");
|
|
@@ -76,23 +86,26 @@ namespace Rice::detail
|
|
|
76
86
|
}
|
|
77
87
|
throw std::runtime_error("Unknown column type");
|
|
78
88
|
}
|
|
89
|
+
|
|
90
|
+
private:
|
|
91
|
+
Arg* arg_ = nullptr;
|
|
79
92
|
};
|
|
80
93
|
|
|
81
94
|
template<>
|
|
82
|
-
struct Type<SplitType>
|
|
83
|
-
|
|
84
|
-
static bool verify()
|
|
85
|
-
{
|
|
95
|
+
struct Type<SplitType> {
|
|
96
|
+
static bool verify() {
|
|
86
97
|
return true;
|
|
87
98
|
}
|
|
88
99
|
};
|
|
89
100
|
|
|
90
101
|
template<>
|
|
91
|
-
class To_Ruby<SplitType>
|
|
92
|
-
{
|
|
102
|
+
class To_Ruby<SplitType> {
|
|
93
103
|
public:
|
|
94
|
-
|
|
95
|
-
|
|
104
|
+
To_Ruby() = default;
|
|
105
|
+
|
|
106
|
+
explicit To_Ruby(Arg* arg) : arg_(arg) { }
|
|
107
|
+
|
|
108
|
+
VALUE convert(SplitType const & x) {
|
|
96
109
|
switch (x) {
|
|
97
110
|
case LessOrEqual: return Symbol("less_or_equal");
|
|
98
111
|
case Greater: return Symbol("greater");
|
|
@@ -107,16 +120,18 @@ namespace Rice::detail
|
|
|
107
120
|
}
|
|
108
121
|
throw std::runtime_error("Unknown split type");
|
|
109
122
|
}
|
|
123
|
+
|
|
124
|
+
private:
|
|
125
|
+
Arg* arg_ = nullptr;
|
|
110
126
|
};
|
|
111
|
-
}
|
|
127
|
+
} // namespace Rice::detail
|
|
112
128
|
|
|
113
129
|
extern "C"
|
|
114
|
-
void Init_ext()
|
|
115
|
-
|
|
116
|
-
Module
|
|
117
|
-
Module rb_mExt = define_module_under(rb_mOutlierTree, "Ext");
|
|
130
|
+
void Init_ext() {
|
|
131
|
+
Rice::Module rb_mOutlierTree = Rice::define_module("OutlierTree");
|
|
132
|
+
Rice::Module rb_mExt = Rice::define_module_under(rb_mOutlierTree, "Ext");
|
|
118
133
|
|
|
119
|
-
define_class_under<Cluster>(rb_mExt, "Cluster")
|
|
134
|
+
Rice::define_class_under<Cluster>(rb_mExt, "Cluster")
|
|
120
135
|
.define_method("upper_lim", [](Cluster& self) { return self.upper_lim; })
|
|
121
136
|
.define_method("display_lim_high", [](Cluster& self) { return self.display_lim_high; })
|
|
122
137
|
.define_method("perc_below", [](Cluster& self) { return self.perc_below; })
|
|
@@ -133,7 +148,7 @@ void Init_ext()
|
|
|
133
148
|
.define_method("has_na_branch", [](Cluster& self) { return self.has_NA_branch; })
|
|
134
149
|
.define_method("col_num", [](Cluster& self) { return self.col_num; });
|
|
135
150
|
|
|
136
|
-
define_class_under<ClusterTree>(rb_mExt, "ClusterTree")
|
|
151
|
+
Rice::define_class_under<ClusterTree>(rb_mExt, "ClusterTree")
|
|
137
152
|
.define_method("parent_branch", [](ClusterTree& self) { return self.parent_branch; })
|
|
138
153
|
.define_method("parent", [](ClusterTree& self) { return self.parent; })
|
|
139
154
|
.define_method("all_branches", [](ClusterTree& self) { return self.all_branches; })
|
|
@@ -143,7 +158,7 @@ void Init_ext()
|
|
|
143
158
|
.define_method("split_subset", [](ClusterTree& self) { return self.split_subset; })
|
|
144
159
|
.define_method("split_lev", [](ClusterTree& self) { return self.split_lev; });
|
|
145
160
|
|
|
146
|
-
define_class_under<ModelOutputs>(rb_mExt, "ModelOutputs")
|
|
161
|
+
Rice::define_class_under<ModelOutputs>(rb_mExt, "ModelOutputs")
|
|
147
162
|
.define_method("outlier_scores_final", [](ModelOutputs& self) { return self.outlier_scores_final; })
|
|
148
163
|
.define_method("outlier_columns_final", [](ModelOutputs& self) { return self.outlier_columns_final; })
|
|
149
164
|
.define_method("outlier_clusters_final", [](ModelOutputs& self) { return self.outlier_clusters_final; })
|
data/lib/outliertree/model.rb
CHANGED
|
@@ -1,11 +1,20 @@
|
|
|
1
1
|
module OutlierTree
|
|
2
2
|
class Model
|
|
3
3
|
def initialize(
|
|
4
|
-
max_depth: 4,
|
|
5
|
-
|
|
6
|
-
|
|
4
|
+
max_depth: 4,
|
|
5
|
+
min_gain: 0.01,
|
|
6
|
+
z_norm: 2.67,
|
|
7
|
+
z_outlier: 8.0,
|
|
8
|
+
pct_outliers: 0.01,
|
|
9
|
+
min_size_numeric: 25,
|
|
10
|
+
min_size_categ: 50,
|
|
11
|
+
categ_split: "binarize",
|
|
12
|
+
categ_outliers: "tail",
|
|
13
|
+
numeric_split: "raw",
|
|
14
|
+
follow_all: false,
|
|
15
|
+
gain_as_pct: true,
|
|
16
|
+
nthreads: -1
|
|
7
17
|
)
|
|
8
|
-
|
|
9
18
|
# TODO validate values
|
|
10
19
|
@max_depth = max_depth
|
|
11
20
|
@min_gain = min_gain
|
data/lib/outliertree/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: outliertree
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
@@ -72,7 +72,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
72
72
|
- !ruby/object:Gem::Version
|
|
73
73
|
version: '0'
|
|
74
74
|
requirements: []
|
|
75
|
-
rubygems_version: 3.6.
|
|
75
|
+
rubygems_version: 3.6.9
|
|
76
76
|
specification_version: 4
|
|
77
77
|
summary: Explainable outlier/anomaly detection for Ruby
|
|
78
78
|
test_files: []
|