outliertree 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/outliertree/ext.cpp +104 -105
- data/ext/outliertree/extconf.rb +1 -1
- data/lib/outliertree/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 57de739284b2cb4fd83c9f5edc81bf15e95f659b2bd85459fd2e55a0b3964dd0
|
4
|
+
data.tar.gz: 4f2799ead4bbe1e6db6a5676c8709c6b518afb7321e5917f3858add97aed543b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8bc84c17be1778abb020066848325beeff068dd55c95701928cbcec1869a4c4123fc76c33871782734e0fceef37ddc6af9a4ef44da45d70310cce70610ac2d13
|
7
|
+
data.tar.gz: 6611cc3958469710b626429d1c6cc7703f0ef3d4e0e1feb52120134f9954819e3e87fc52ed3449e9be4c89a4974f8b3f6825aead9ec9739f92d538097e078117
|
data/CHANGELOG.md
CHANGED
data/ext/outliertree/ext.cpp
CHANGED
@@ -2,12 +2,8 @@
|
|
2
2
|
#include <outlier_tree.hpp>
|
3
3
|
|
4
4
|
// rice
|
5
|
-
#include <rice/
|
6
|
-
#include <rice/
|
7
|
-
#include <rice/Module.hpp>
|
8
|
-
#include <rice/Object.hpp>
|
9
|
-
#include <rice/String.hpp>
|
10
|
-
#include <rice/Symbol.hpp>
|
5
|
+
#include <rice/rice.hpp>
|
6
|
+
#include <rice/stl.hpp>
|
11
7
|
|
12
8
|
using Rice::Array;
|
13
9
|
using Rice::Hash;
|
@@ -18,74 +14,77 @@ using Rice::Symbol;
|
|
18
14
|
using Rice::define_class_under;
|
19
15
|
using Rice::define_module;
|
20
16
|
|
21
|
-
|
22
|
-
Object to_ruby<std::vector<char>>(std::vector<char> const & x)
|
17
|
+
namespace Rice::detail
|
23
18
|
{
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
19
|
+
template<typename T>
|
20
|
+
class To_Ruby<std::vector<T>>
|
21
|
+
{
|
22
|
+
public:
|
23
|
+
VALUE convert(std::vector<T> const & x)
|
24
|
+
{
|
25
|
+
auto a = rb_ary_new2(x.size());
|
26
|
+
for (const auto& v : x) {
|
27
|
+
rb_ary_push(a, To_Ruby<T>().convert(v));
|
28
|
+
}
|
29
|
+
return a;
|
30
|
+
}
|
31
|
+
};
|
30
32
|
|
31
|
-
template<>
|
32
|
-
|
33
|
-
{
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
}
|
33
|
+
template<>
|
34
|
+
struct Type<ColType>
|
35
|
+
{
|
36
|
+
static bool verify()
|
37
|
+
{
|
38
|
+
return true;
|
39
|
+
}
|
40
|
+
};
|
40
41
|
|
41
|
-
template<>
|
42
|
-
|
43
|
-
{
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
42
|
+
template<>
|
43
|
+
class To_Ruby<ColType>
|
44
|
+
{
|
45
|
+
public:
|
46
|
+
VALUE convert(ColType const & x)
|
47
|
+
{
|
48
|
+
switch (x) {
|
49
|
+
case Numeric: return Symbol("numeric");
|
50
|
+
case Categorical: return Symbol("categorical");
|
51
|
+
case Ordinal: return Symbol("ordinal");
|
52
|
+
case NoType: return Symbol("no_type");
|
53
|
+
}
|
54
|
+
throw std::runtime_error("Unknown column type");
|
55
|
+
}
|
56
|
+
};
|
50
57
|
|
51
|
-
template<>
|
52
|
-
|
53
|
-
{
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
}
|
58
|
+
template<>
|
59
|
+
struct Type<SplitType>
|
60
|
+
{
|
61
|
+
static bool verify()
|
62
|
+
{
|
63
|
+
return true;
|
64
|
+
}
|
65
|
+
};
|
60
66
|
|
61
|
-
template<>
|
62
|
-
|
63
|
-
{
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
case NotInSubset: return Symbol("not_in_subset");
|
83
|
-
case SingleCateg: return Symbol("single_categ");
|
84
|
-
case SubTrees: return Symbol("sub_trees");
|
85
|
-
case IsNa: return Symbol("is_na");
|
86
|
-
case Root: return Symbol("root");
|
87
|
-
}
|
88
|
-
throw std::runtime_error("Unknown split type");
|
67
|
+
template<>
|
68
|
+
class To_Ruby<SplitType>
|
69
|
+
{
|
70
|
+
public:
|
71
|
+
VALUE convert(SplitType const & x)
|
72
|
+
{
|
73
|
+
switch (x) {
|
74
|
+
case LessOrEqual: return Symbol("less_or_equal");
|
75
|
+
case Greater: return Symbol("greater");
|
76
|
+
case Equal: return Symbol("equal");
|
77
|
+
case NotEqual: return Symbol("not_equal");
|
78
|
+
case InSubset: return Symbol("in_subset");
|
79
|
+
case NotInSubset: return Symbol("not_in_subset");
|
80
|
+
case SingleCateg: return Symbol("single_categ");
|
81
|
+
case SubTrees: return Symbol("sub_trees");
|
82
|
+
case IsNa: return Symbol("is_na");
|
83
|
+
case Root: return Symbol("root");
|
84
|
+
}
|
85
|
+
throw std::runtime_error("Unknown split type");
|
86
|
+
}
|
87
|
+
};
|
89
88
|
}
|
90
89
|
|
91
90
|
extern "C"
|
@@ -95,55 +94,55 @@ void Init_ext()
|
|
95
94
|
Module rb_mExt = define_module_under(rb_mOutlierTree, "Ext");
|
96
95
|
|
97
96
|
define_class_under<Cluster>(rb_mExt, "Cluster")
|
98
|
-
.define_method("upper_lim",
|
99
|
-
.define_method("display_lim_high",
|
100
|
-
.define_method("perc_below",
|
101
|
-
.define_method("display_lim_low",
|
102
|
-
.define_method("perc_above",
|
103
|
-
.define_method("display_mean",
|
104
|
-
.define_method("display_sd",
|
105
|
-
.define_method("cluster_size",
|
106
|
-
.define_method("split_point",
|
107
|
-
.define_method("split_subset",
|
108
|
-
.define_method("split_lev",
|
109
|
-
.define_method("split_type",
|
110
|
-
.define_method("column_type",
|
111
|
-
.define_method("has_na_branch",
|
112
|
-
.define_method("col_num",
|
97
|
+
.define_method("upper_lim", [](Cluster& self) { return self.upper_lim; })
|
98
|
+
.define_method("display_lim_high", [](Cluster& self) { return self.display_lim_high; })
|
99
|
+
.define_method("perc_below", [](Cluster& self) { return self.perc_below; })
|
100
|
+
.define_method("display_lim_low", [](Cluster& self) { return self.display_lim_low; })
|
101
|
+
.define_method("perc_above", [](Cluster& self) { return self.perc_above; })
|
102
|
+
.define_method("display_mean", [](Cluster& self) { return self.display_mean; })
|
103
|
+
.define_method("display_sd", [](Cluster& self) { return self.display_sd; })
|
104
|
+
.define_method("cluster_size", [](Cluster& self) { return self.cluster_size; })
|
105
|
+
.define_method("split_point", [](Cluster& self) { return self.split_point; })
|
106
|
+
.define_method("split_subset", [](Cluster& self) { return self.split_subset; })
|
107
|
+
.define_method("split_lev", [](Cluster& self) { return self.split_lev; })
|
108
|
+
.define_method("split_type", [](Cluster& self) { return self.split_type; })
|
109
|
+
.define_method("column_type", [](Cluster& self) { return self.column_type; })
|
110
|
+
.define_method("has_na_branch", [](Cluster& self) { return self.has_NA_branch; })
|
111
|
+
.define_method("col_num", [](Cluster& self) { return self.col_num; });
|
113
112
|
|
114
113
|
define_class_under<ClusterTree>(rb_mExt, "ClusterTree")
|
115
|
-
.define_method("parent_branch",
|
116
|
-
.define_method("parent",
|
117
|
-
.define_method("all_branches",
|
118
|
-
.define_method("column_type",
|
119
|
-
.define_method("col_num",
|
120
|
-
.define_method("split_point",
|
121
|
-
.define_method("split_subset",
|
122
|
-
.define_method("split_lev",
|
114
|
+
.define_method("parent_branch", [](ClusterTree& self) { return self.parent_branch; })
|
115
|
+
.define_method("parent", [](ClusterTree& self) { return self.parent; })
|
116
|
+
.define_method("all_branches", [](ClusterTree& self) { return self.all_branches; })
|
117
|
+
.define_method("column_type", [](ClusterTree& self) { return self.column_type; })
|
118
|
+
.define_method("col_num", [](ClusterTree& self) { return self.col_num; })
|
119
|
+
.define_method("split_point", [](ClusterTree& self) { return self.split_point; })
|
120
|
+
.define_method("split_subset", [](ClusterTree& self) { return self.split_subset; })
|
121
|
+
.define_method("split_lev", [](ClusterTree& self) { return self.split_lev; });
|
123
122
|
|
124
123
|
define_class_under<ModelOutputs>(rb_mExt, "ModelOutputs")
|
125
|
-
.define_method("outlier_scores_final",
|
126
|
-
.define_method("outlier_columns_final",
|
127
|
-
.define_method("outlier_clusters_final",
|
128
|
-
.define_method("outlier_trees_final",
|
129
|
-
.define_method("outlier_depth_final",
|
130
|
-
.define_method("outlier_decimals_distr",
|
131
|
-
.define_method("min_decimals_col",
|
124
|
+
.define_method("outlier_scores_final", [](ModelOutputs& self) { return self.outlier_scores_final; })
|
125
|
+
.define_method("outlier_columns_final", [](ModelOutputs& self) { return self.outlier_columns_final; })
|
126
|
+
.define_method("outlier_clusters_final", [](ModelOutputs& self) { return self.outlier_clusters_final; })
|
127
|
+
.define_method("outlier_trees_final", [](ModelOutputs& self) { return self.outlier_trees_final; })
|
128
|
+
.define_method("outlier_depth_final", [](ModelOutputs& self) { return self.outlier_depth_final; })
|
129
|
+
.define_method("outlier_decimals_distr", [](ModelOutputs& self) { return self.outlier_decimals_distr; })
|
130
|
+
.define_method("min_decimals_col", [](ModelOutputs& self) { return self.min_decimals_col; })
|
132
131
|
.define_method(
|
133
132
|
"all_clusters",
|
134
|
-
|
133
|
+
[](ModelOutputs& self, size_t i, size_t j) {
|
135
134
|
return self.all_clusters[i][j];
|
136
135
|
})
|
137
136
|
.define_method(
|
138
137
|
"all_trees",
|
139
|
-
|
138
|
+
[](ModelOutputs& self, size_t i, size_t j) {
|
140
139
|
return self.all_trees[i][j];
|
141
140
|
});
|
142
141
|
|
143
142
|
rb_mExt
|
144
|
-
.
|
143
|
+
.define_singleton_function(
|
145
144
|
"fit_outliers_models",
|
146
|
-
|
145
|
+
[](Hash options) {
|
147
146
|
ModelOutputs model_outputs;
|
148
147
|
|
149
148
|
// data
|
@@ -219,9 +218,9 @@ void Init_ext()
|
|
219
218
|
);
|
220
219
|
return model_outputs;
|
221
220
|
})
|
222
|
-
.
|
221
|
+
.define_singleton_function(
|
223
222
|
"find_new_outliers",
|
224
|
-
|
223
|
+
[](ModelOutputs& model_outputs, Hash options) {
|
225
224
|
// data
|
226
225
|
size_t nrows = options.get<size_t, Symbol>("nrows");
|
227
226
|
size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
|
data/ext/outliertree/extconf.rb
CHANGED
data/lib/outliertree/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: outliertree
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-05-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 4.0.2
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 4.0.2
|
27
27
|
description:
|
28
28
|
email: andrew@ankane.org
|
29
29
|
executables: []
|
@@ -66,7 +66,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
66
66
|
requirements:
|
67
67
|
- - ">="
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '2.
|
69
|
+
version: '2.6'
|
70
70
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
71
|
requirements:
|
72
72
|
- - ">="
|