outliertree 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/NOTICE.txt +1 -1
- data/ext/outliertree/ext.cpp +48 -76
- data/lib/outliertree/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ef5a7a99c49ab6a263bfac8293d85a914fb21794566245737640001315d34cc8
|
|
4
|
+
data.tar.gz: 29a5aec79b8e24912b422c279fed5ca58de62775296ee1aaad6af2b0fdbbad7a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4b67ff777a77cbd3b01617583deed6c2a033e28b9d7f57721d201e1adc5d72a3ec5ccaa6e4f41337dd607f594dd3a9a1de8ace4ffd55726bcf92b18631bc70cd
|
|
7
|
+
data.tar.gz: 020d5831ea8abedbcc25c130ea7ed2b009d9126ca64e61ecc27cf051cbe451ec5bd6ec01e0b606c7ff834d252119a19da80a199ecab1fab549caa5958f00ae88
|
data/CHANGELOG.md
CHANGED
data/NOTICE.txt
CHANGED
data/ext/outliertree/ext.cpp
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
#include <
|
|
1
|
+
#include <cstddef>
|
|
2
|
+
#include <stdexcept>
|
|
2
3
|
#include <vector>
|
|
3
4
|
|
|
4
5
|
// outliertree
|
|
@@ -9,11 +10,8 @@
|
|
|
9
10
|
|
|
10
11
|
// rice
|
|
11
12
|
#include <rice/rice.hpp>
|
|
12
|
-
#include <rice/stl.hpp>
|
|
13
13
|
|
|
14
|
-
using Rice::Array;
|
|
15
14
|
using Rice::Hash;
|
|
16
|
-
using Rice::Object;
|
|
17
15
|
using Rice::String;
|
|
18
16
|
using Rice::Symbol;
|
|
19
17
|
|
|
@@ -25,10 +23,10 @@ namespace Rice::detail {
|
|
|
25
23
|
|
|
26
24
|
explicit To_Ruby(Arg* arg) : arg_(arg) { }
|
|
27
25
|
|
|
28
|
-
VALUE convert(std::vector<T
|
|
29
|
-
auto a = rb_ary_new2
|
|
26
|
+
VALUE convert(const std::vector<T>& x) {
|
|
27
|
+
auto a = detail::protect(rb_ary_new2, x.size());
|
|
30
28
|
for (const auto& v : x) {
|
|
31
|
-
rb_ary_push
|
|
29
|
+
detail::protect(rb_ary_push, a, To_Ruby<T>().convert(v));
|
|
32
30
|
}
|
|
33
31
|
return a;
|
|
34
32
|
}
|
|
@@ -37,32 +35,6 @@ namespace Rice::detail {
|
|
|
37
35
|
Arg* arg_ = nullptr;
|
|
38
36
|
};
|
|
39
37
|
|
|
40
|
-
template<>
|
|
41
|
-
class To_Ruby<std::vector<signed char>> {
|
|
42
|
-
public:
|
|
43
|
-
To_Ruby() = default;
|
|
44
|
-
|
|
45
|
-
explicit To_Ruby(Arg* arg) : arg_(arg) { }
|
|
46
|
-
|
|
47
|
-
VALUE convert(std::vector<signed char> const & x) {
|
|
48
|
-
auto a = rb_ary_new2(x.size());
|
|
49
|
-
for (const auto& v : x) {
|
|
50
|
-
rb_ary_push(a, To_Ruby<signed char>().convert(v));
|
|
51
|
-
}
|
|
52
|
-
return a;
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
private:
|
|
56
|
-
Arg* arg_ = nullptr;
|
|
57
|
-
};
|
|
58
|
-
|
|
59
|
-
template<>
|
|
60
|
-
struct Type<std::vector<signed char>> {
|
|
61
|
-
static bool verify() {
|
|
62
|
-
return true;
|
|
63
|
-
}
|
|
64
|
-
};
|
|
65
|
-
|
|
66
38
|
template<>
|
|
67
39
|
struct Type<ColType> {
|
|
68
40
|
static bool verify() {
|
|
@@ -169,12 +141,12 @@ void Init_ext() {
|
|
|
169
141
|
.define_method(
|
|
170
142
|
"all_clusters",
|
|
171
143
|
[](ModelOutputs& self, size_t i, size_t j) {
|
|
172
|
-
return self.all_clusters
|
|
144
|
+
return self.all_clusters.at(i).at(j);
|
|
173
145
|
})
|
|
174
146
|
.define_method(
|
|
175
147
|
"all_trees",
|
|
176
148
|
[](ModelOutputs& self, size_t i, size_t j) {
|
|
177
|
-
return self.all_trees
|
|
149
|
+
return self.all_trees.at(i).at(j);
|
|
178
150
|
});
|
|
179
151
|
|
|
180
152
|
rb_mExt
|
|
@@ -184,47 +156,47 @@ void Init_ext() {
|
|
|
184
156
|
ModelOutputs model_outputs;
|
|
185
157
|
|
|
186
158
|
// data
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
159
|
+
auto nrows = options.get<size_t, Symbol>("nrows");
|
|
160
|
+
auto ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
|
|
161
|
+
auto ncols_categ = options.get<size_t, Symbol>("ncols_categ");
|
|
162
|
+
auto ncols_ord = options.get<size_t, Symbol>("ncols_ord");
|
|
191
163
|
|
|
192
|
-
double
|
|
164
|
+
double* numeric_data = nullptr;
|
|
193
165
|
if (ncols_numeric > 0) {
|
|
194
|
-
numeric_data = (
|
|
166
|
+
numeric_data = reinterpret_cast<double*>(const_cast<char*>(options.get<String, Symbol>("numeric_data").c_str()));
|
|
195
167
|
}
|
|
196
168
|
|
|
197
|
-
int
|
|
198
|
-
int
|
|
169
|
+
int* categorical_data = nullptr;
|
|
170
|
+
int* ncat = nullptr;
|
|
199
171
|
if (ncols_categ > 0) {
|
|
200
|
-
categorical_data = (
|
|
201
|
-
ncat = (
|
|
172
|
+
categorical_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("categorical_data").c_str()));
|
|
173
|
+
ncat = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("ncat").c_str()));
|
|
202
174
|
}
|
|
203
175
|
|
|
204
|
-
int
|
|
205
|
-
int
|
|
176
|
+
int* ordinal_data = nullptr;
|
|
177
|
+
int* ncat_ord = nullptr;
|
|
206
178
|
if (ncols_ord > 0) {
|
|
207
|
-
ordinal_data = (
|
|
208
|
-
ncat_ord = (
|
|
179
|
+
ordinal_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("ordinal_data").c_str()));
|
|
180
|
+
ncat_ord = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("ncat_ord").c_str()));
|
|
209
181
|
}
|
|
210
182
|
|
|
211
183
|
// options
|
|
212
|
-
char
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
184
|
+
char* cols_ignore = nullptr;
|
|
185
|
+
auto nthreads = options.get<int, Symbol>("nthreads");
|
|
186
|
+
auto categ_as_bin = options.get<bool, Symbol>("categ_as_bin");
|
|
187
|
+
auto ord_as_bin = options.get<bool, Symbol>("ord_as_bin");
|
|
188
|
+
auto cat_bruteforce_subset = options.get<bool, Symbol>("cat_bruteforce_subset");
|
|
189
|
+
auto categ_from_maj = options.get<bool, Symbol>("categ_from_maj");
|
|
190
|
+
auto take_mid = options.get<bool, Symbol>("take_mid");
|
|
191
|
+
auto max_depth = options.get<size_t, Symbol>("max_depth");
|
|
192
|
+
auto max_perc_outliers = options.get<double, Symbol>("pct_outliers");
|
|
193
|
+
auto min_size_numeric = options.get<size_t, Symbol>("min_size_numeric");
|
|
194
|
+
auto min_size_categ = options.get<size_t, Symbol>("min_size_categ");
|
|
195
|
+
auto min_gain = options.get<double, Symbol>("min_gain");
|
|
196
|
+
auto gain_as_pct = options.get<bool, Symbol>("gain_as_pct");
|
|
197
|
+
auto follow_all = options.get<bool, Symbol>("follow_all");
|
|
198
|
+
auto z_norm = options.get<double, Symbol>("z_norm");
|
|
199
|
+
auto z_outlier = options.get<double, Symbol>("z_outlier");
|
|
228
200
|
|
|
229
201
|
fit_outliers_models(
|
|
230
202
|
model_outputs,
|
|
@@ -260,28 +232,28 @@ void Init_ext() {
|
|
|
260
232
|
"find_new_outliers",
|
|
261
233
|
[](ModelOutputs& model_outputs, Hash options) {
|
|
262
234
|
// data
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
235
|
+
auto nrows = options.get<size_t, Symbol>("nrows");
|
|
236
|
+
auto ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
|
|
237
|
+
auto ncols_categ = options.get<size_t, Symbol>("ncols_categ");
|
|
238
|
+
auto ncols_ord = options.get<size_t, Symbol>("ncols_ord");
|
|
267
239
|
|
|
268
|
-
double
|
|
240
|
+
double* numeric_data = nullptr;
|
|
269
241
|
if (ncols_numeric > 0) {
|
|
270
|
-
numeric_data = (
|
|
242
|
+
numeric_data = reinterpret_cast<double*>(const_cast<char*>(options.get<String, Symbol>("numeric_data").c_str()));
|
|
271
243
|
}
|
|
272
244
|
|
|
273
|
-
int
|
|
245
|
+
int* categorical_data = nullptr;
|
|
274
246
|
if (ncols_categ > 0) {
|
|
275
|
-
categorical_data = (
|
|
247
|
+
categorical_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("categorical_data").c_str()));
|
|
276
248
|
}
|
|
277
249
|
|
|
278
|
-
int
|
|
250
|
+
int* ordinal_data = nullptr;
|
|
279
251
|
if (ncols_ord > 0) {
|
|
280
|
-
ordinal_data = (
|
|
252
|
+
ordinal_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("ordinal_data").c_str()));
|
|
281
253
|
}
|
|
282
254
|
|
|
283
255
|
// options
|
|
284
|
-
|
|
256
|
+
auto nthreads = options.get<int, Symbol>("nthreads");
|
|
285
257
|
|
|
286
258
|
find_new_outliers(
|
|
287
259
|
numeric_data,
|
data/lib/outliertree/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: outliertree
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
@@ -65,14 +65,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
65
65
|
requirements:
|
|
66
66
|
- - ">="
|
|
67
67
|
- !ruby/object:Gem::Version
|
|
68
|
-
version: '3.
|
|
68
|
+
version: '3.3'
|
|
69
69
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
70
70
|
requirements:
|
|
71
71
|
- - ">="
|
|
72
72
|
- !ruby/object:Gem::Version
|
|
73
73
|
version: '0'
|
|
74
74
|
requirements: []
|
|
75
|
-
rubygems_version:
|
|
75
|
+
rubygems_version: 4.0.6
|
|
76
76
|
specification_version: 4
|
|
77
77
|
summary: Explainable outlier/anomaly detection for Ruby
|
|
78
78
|
test_files: []
|