outliertree 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/outliertree/ext.cpp +104 -105
- data/ext/outliertree/extconf.rb +1 -1
- data/lib/outliertree/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 57de739284b2cb4fd83c9f5edc81bf15e95f659b2bd85459fd2e55a0b3964dd0
|
4
|
+
data.tar.gz: 4f2799ead4bbe1e6db6a5676c8709c6b518afb7321e5917f3858add97aed543b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8bc84c17be1778abb020066848325beeff068dd55c95701928cbcec1869a4c4123fc76c33871782734e0fceef37ddc6af9a4ef44da45d70310cce70610ac2d13
|
7
|
+
data.tar.gz: 6611cc3958469710b626429d1c6cc7703f0ef3d4e0e1feb52120134f9954819e3e87fc52ed3449e9be4c89a4974f8b3f6825aead9ec9739f92d538097e078117
|
data/CHANGELOG.md
CHANGED
data/ext/outliertree/ext.cpp
CHANGED
@@ -2,12 +2,8 @@
|
|
2
2
|
#include <outlier_tree.hpp>
|
3
3
|
|
4
4
|
// rice
|
5
|
-
#include <rice/
|
6
|
-
#include <rice/
|
7
|
-
#include <rice/Module.hpp>
|
8
|
-
#include <rice/Object.hpp>
|
9
|
-
#include <rice/String.hpp>
|
10
|
-
#include <rice/Symbol.hpp>
|
5
|
+
#include <rice/rice.hpp>
|
6
|
+
#include <rice/stl.hpp>
|
11
7
|
|
12
8
|
using Rice::Array;
|
13
9
|
using Rice::Hash;
|
@@ -18,74 +14,77 @@ using Rice::Symbol;
|
|
18
14
|
using Rice::define_class_under;
|
19
15
|
using Rice::define_module;
|
20
16
|
|
21
|
-
|
22
|
-
Object to_ruby<std::vector<char>>(std::vector<char> const & x)
|
17
|
+
namespace Rice::detail
|
23
18
|
{
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
19
|
+
template<typename T>
|
20
|
+
class To_Ruby<std::vector<T>>
|
21
|
+
{
|
22
|
+
public:
|
23
|
+
VALUE convert(std::vector<T> const & x)
|
24
|
+
{
|
25
|
+
auto a = rb_ary_new2(x.size());
|
26
|
+
for (const auto& v : x) {
|
27
|
+
rb_ary_push(a, To_Ruby<T>().convert(v));
|
28
|
+
}
|
29
|
+
return a;
|
30
|
+
}
|
31
|
+
};
|
30
32
|
|
31
|
-
template<>
|
32
|
-
|
33
|
-
{
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
}
|
33
|
+
template<>
|
34
|
+
struct Type<ColType>
|
35
|
+
{
|
36
|
+
static bool verify()
|
37
|
+
{
|
38
|
+
return true;
|
39
|
+
}
|
40
|
+
};
|
40
41
|
|
41
|
-
template<>
|
42
|
-
|
43
|
-
{
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
42
|
+
template<>
|
43
|
+
class To_Ruby<ColType>
|
44
|
+
{
|
45
|
+
public:
|
46
|
+
VALUE convert(ColType const & x)
|
47
|
+
{
|
48
|
+
switch (x) {
|
49
|
+
case Numeric: return Symbol("numeric");
|
50
|
+
case Categorical: return Symbol("categorical");
|
51
|
+
case Ordinal: return Symbol("ordinal");
|
52
|
+
case NoType: return Symbol("no_type");
|
53
|
+
}
|
54
|
+
throw std::runtime_error("Unknown column type");
|
55
|
+
}
|
56
|
+
};
|
50
57
|
|
51
|
-
template<>
|
52
|
-
|
53
|
-
{
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
}
|
58
|
+
template<>
|
59
|
+
struct Type<SplitType>
|
60
|
+
{
|
61
|
+
static bool verify()
|
62
|
+
{
|
63
|
+
return true;
|
64
|
+
}
|
65
|
+
};
|
60
66
|
|
61
|
-
template<>
|
62
|
-
|
63
|
-
{
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
case NotInSubset: return Symbol("not_in_subset");
|
83
|
-
case SingleCateg: return Symbol("single_categ");
|
84
|
-
case SubTrees: return Symbol("sub_trees");
|
85
|
-
case IsNa: return Symbol("is_na");
|
86
|
-
case Root: return Symbol("root");
|
87
|
-
}
|
88
|
-
throw std::runtime_error("Unknown split type");
|
67
|
+
template<>
|
68
|
+
class To_Ruby<SplitType>
|
69
|
+
{
|
70
|
+
public:
|
71
|
+
VALUE convert(SplitType const & x)
|
72
|
+
{
|
73
|
+
switch (x) {
|
74
|
+
case LessOrEqual: return Symbol("less_or_equal");
|
75
|
+
case Greater: return Symbol("greater");
|
76
|
+
case Equal: return Symbol("equal");
|
77
|
+
case NotEqual: return Symbol("not_equal");
|
78
|
+
case InSubset: return Symbol("in_subset");
|
79
|
+
case NotInSubset: return Symbol("not_in_subset");
|
80
|
+
case SingleCateg: return Symbol("single_categ");
|
81
|
+
case SubTrees: return Symbol("sub_trees");
|
82
|
+
case IsNa: return Symbol("is_na");
|
83
|
+
case Root: return Symbol("root");
|
84
|
+
}
|
85
|
+
throw std::runtime_error("Unknown split type");
|
86
|
+
}
|
87
|
+
};
|
89
88
|
}
|
90
89
|
|
91
90
|
extern "C"
|
@@ -95,55 +94,55 @@ void Init_ext()
|
|
95
94
|
Module rb_mExt = define_module_under(rb_mOutlierTree, "Ext");
|
96
95
|
|
97
96
|
define_class_under<Cluster>(rb_mExt, "Cluster")
|
98
|
-
.define_method("upper_lim",
|
99
|
-
.define_method("display_lim_high",
|
100
|
-
.define_method("perc_below",
|
101
|
-
.define_method("display_lim_low",
|
102
|
-
.define_method("perc_above",
|
103
|
-
.define_method("display_mean",
|
104
|
-
.define_method("display_sd",
|
105
|
-
.define_method("cluster_size",
|
106
|
-
.define_method("split_point",
|
107
|
-
.define_method("split_subset",
|
108
|
-
.define_method("split_lev",
|
109
|
-
.define_method("split_type",
|
110
|
-
.define_method("column_type",
|
111
|
-
.define_method("has_na_branch",
|
112
|
-
.define_method("col_num",
|
97
|
+
.define_method("upper_lim", [](Cluster& self) { return self.upper_lim; })
|
98
|
+
.define_method("display_lim_high", [](Cluster& self) { return self.display_lim_high; })
|
99
|
+
.define_method("perc_below", [](Cluster& self) { return self.perc_below; })
|
100
|
+
.define_method("display_lim_low", [](Cluster& self) { return self.display_lim_low; })
|
101
|
+
.define_method("perc_above", [](Cluster& self) { return self.perc_above; })
|
102
|
+
.define_method("display_mean", [](Cluster& self) { return self.display_mean; })
|
103
|
+
.define_method("display_sd", [](Cluster& self) { return self.display_sd; })
|
104
|
+
.define_method("cluster_size", [](Cluster& self) { return self.cluster_size; })
|
105
|
+
.define_method("split_point", [](Cluster& self) { return self.split_point; })
|
106
|
+
.define_method("split_subset", [](Cluster& self) { return self.split_subset; })
|
107
|
+
.define_method("split_lev", [](Cluster& self) { return self.split_lev; })
|
108
|
+
.define_method("split_type", [](Cluster& self) { return self.split_type; })
|
109
|
+
.define_method("column_type", [](Cluster& self) { return self.column_type; })
|
110
|
+
.define_method("has_na_branch", [](Cluster& self) { return self.has_NA_branch; })
|
111
|
+
.define_method("col_num", [](Cluster& self) { return self.col_num; });
|
113
112
|
|
114
113
|
define_class_under<ClusterTree>(rb_mExt, "ClusterTree")
|
115
|
-
.define_method("parent_branch",
|
116
|
-
.define_method("parent",
|
117
|
-
.define_method("all_branches",
|
118
|
-
.define_method("column_type",
|
119
|
-
.define_method("col_num",
|
120
|
-
.define_method("split_point",
|
121
|
-
.define_method("split_subset",
|
122
|
-
.define_method("split_lev",
|
114
|
+
.define_method("parent_branch", [](ClusterTree& self) { return self.parent_branch; })
|
115
|
+
.define_method("parent", [](ClusterTree& self) { return self.parent; })
|
116
|
+
.define_method("all_branches", [](ClusterTree& self) { return self.all_branches; })
|
117
|
+
.define_method("column_type", [](ClusterTree& self) { return self.column_type; })
|
118
|
+
.define_method("col_num", [](ClusterTree& self) { return self.col_num; })
|
119
|
+
.define_method("split_point", [](ClusterTree& self) { return self.split_point; })
|
120
|
+
.define_method("split_subset", [](ClusterTree& self) { return self.split_subset; })
|
121
|
+
.define_method("split_lev", [](ClusterTree& self) { return self.split_lev; });
|
123
122
|
|
124
123
|
define_class_under<ModelOutputs>(rb_mExt, "ModelOutputs")
|
125
|
-
.define_method("outlier_scores_final",
|
126
|
-
.define_method("outlier_columns_final",
|
127
|
-
.define_method("outlier_clusters_final",
|
128
|
-
.define_method("outlier_trees_final",
|
129
|
-
.define_method("outlier_depth_final",
|
130
|
-
.define_method("outlier_decimals_distr",
|
131
|
-
.define_method("min_decimals_col",
|
124
|
+
.define_method("outlier_scores_final", [](ModelOutputs& self) { return self.outlier_scores_final; })
|
125
|
+
.define_method("outlier_columns_final", [](ModelOutputs& self) { return self.outlier_columns_final; })
|
126
|
+
.define_method("outlier_clusters_final", [](ModelOutputs& self) { return self.outlier_clusters_final; })
|
127
|
+
.define_method("outlier_trees_final", [](ModelOutputs& self) { return self.outlier_trees_final; })
|
128
|
+
.define_method("outlier_depth_final", [](ModelOutputs& self) { return self.outlier_depth_final; })
|
129
|
+
.define_method("outlier_decimals_distr", [](ModelOutputs& self) { return self.outlier_decimals_distr; })
|
130
|
+
.define_method("min_decimals_col", [](ModelOutputs& self) { return self.min_decimals_col; })
|
132
131
|
.define_method(
|
133
132
|
"all_clusters",
|
134
|
-
|
133
|
+
[](ModelOutputs& self, size_t i, size_t j) {
|
135
134
|
return self.all_clusters[i][j];
|
136
135
|
})
|
137
136
|
.define_method(
|
138
137
|
"all_trees",
|
139
|
-
|
138
|
+
[](ModelOutputs& self, size_t i, size_t j) {
|
140
139
|
return self.all_trees[i][j];
|
141
140
|
});
|
142
141
|
|
143
142
|
rb_mExt
|
144
|
-
.
|
143
|
+
.define_singleton_function(
|
145
144
|
"fit_outliers_models",
|
146
|
-
|
145
|
+
[](Hash options) {
|
147
146
|
ModelOutputs model_outputs;
|
148
147
|
|
149
148
|
// data
|
@@ -219,9 +218,9 @@ void Init_ext()
|
|
219
218
|
);
|
220
219
|
return model_outputs;
|
221
220
|
})
|
222
|
-
.
|
221
|
+
.define_singleton_function(
|
223
222
|
"find_new_outliers",
|
224
|
-
|
223
|
+
[](ModelOutputs& model_outputs, Hash options) {
|
225
224
|
// data
|
226
225
|
size_t nrows = options.get<size_t, Symbol>("nrows");
|
227
226
|
size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
|
data/ext/outliertree/extconf.rb
CHANGED
data/lib/outliertree/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: outliertree
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-05-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 4.0.2
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 4.0.2
|
27
27
|
description:
|
28
28
|
email: andrew@ankane.org
|
29
29
|
executables: []
|
@@ -66,7 +66,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
66
66
|
requirements:
|
67
67
|
- - ">="
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '2.
|
69
|
+
version: '2.6'
|
70
70
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
71
|
requirements:
|
72
72
|
- - ">="
|