outliertree 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/NOTICE.txt +1 -0
- data/README.md +24 -1
- data/lib/outliertree/model.rb +2 -2
- data/lib/outliertree/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9f26ec3795a8e6194fa6b4485255b7fcf69867ded0ec0f203ed275f30227f662
|
4
|
+
data.tar.gz: ca9185c4c4fb6ccd52155c637445c4b19998062e4e394cd03f8ebbcf308f8cb6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5ab838e58023073d234a7f91c1d959c608609144d77a8421d7e3f011ee05f22dcb56b83e89186356d16c51bfb7f08f1ebb85de35ca6752234e4d212b87437f49
|
7
|
+
data.tar.gz: 5ebebdde922e690ad090b55eb9cf2826faca1491bb8948412e105eec5542ce6710834e6290fdcb8476bbb87cc73c58ed5dac704f4b219955b0008781cd9db256
|
data/CHANGELOG.md
CHANGED
data/NOTICE.txt
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# OutlierTree
|
2
2
|
|
3
|
-
:
|
3
|
+
:deciduous_tree: [OutlierTree](https://github.com/david-cortes/outliertree) - explainable outlier/anomaly detection - for Ruby
|
4
4
|
|
5
5
|
Produces human-readable explanations for why values are detected as outliers
|
6
6
|
|
@@ -8,6 +8,10 @@ Produces human-readable explanations for why values are detected as outliers
|
|
8
8
|
Price (2.50) looks low given Department is Books and Sale is false
|
9
9
|
```
|
10
10
|
|
11
|
+
:evergreen_tree: Check out [IsoTree](https://github.com/ankane/isotree) for an alternative approach that uses Isolation Forest
|
12
|
+
|
13
|
+
[](https://travis-ci.org/ankane/outliertree)
|
14
|
+
|
11
15
|
## Installation
|
12
16
|
|
13
17
|
Add this line to your application’s Gemfile:
|
@@ -83,6 +87,25 @@ Or a Rover data frame
|
|
83
87
|
Rover.read_csv("data.csv")
|
84
88
|
```
|
85
89
|
|
90
|
+
## Performance
|
91
|
+
|
92
|
+
OutlierTree uses OpenMP when possible for best performance. To enable OpenMP on Mac, run:
|
93
|
+
|
94
|
+
```sh
|
95
|
+
brew install libomp
|
96
|
+
```
|
97
|
+
|
98
|
+
Then reinstall the gem.
|
99
|
+
|
100
|
+
```sh
|
101
|
+
gem uninstall outliertree --force
|
102
|
+
bundle install
|
103
|
+
```
|
104
|
+
|
105
|
+
## Resources
|
106
|
+
|
107
|
+
- [Explainable outlier detection through decision tree conditioning](https://arxiv.org/pdf/2001.00636.pdf)
|
108
|
+
|
86
109
|
## History
|
87
110
|
|
88
111
|
View the [changelog](https://github.com/ankane/outliertree/blob/master/CHANGELOG.md)
|
data/lib/outliertree/model.rb
CHANGED
@@ -74,7 +74,7 @@ module OutlierTree
|
|
74
74
|
numeric_data = String.new
|
75
75
|
@numeric_columns.each do |k|
|
76
76
|
# more efficient for Rover
|
77
|
-
numeric_data << (df[k].respond_to?(:
|
77
|
+
numeric_data << (df[k].respond_to?(:to_numo) ? df[k].to_numo.cast_to(Numo::DFloat).to_binary : df[k].pack("d*"))
|
78
78
|
end
|
79
79
|
options[:numeric_data] = numeric_data
|
80
80
|
options[:ncols_numeric] = @numeric_columns.size
|
@@ -92,7 +92,7 @@ module OutlierTree
|
|
92
92
|
warn "[outliertree] Unseen values in column: #{k}"
|
93
93
|
end
|
94
94
|
# more efficient for Rover
|
95
|
-
categorical_data << (values.respond_to?(:
|
95
|
+
categorical_data << (values.respond_to?(:to_numo) ? values.to_numo.cast_to(Numo::Int32).to_binary : values.pack("i*"))
|
96
96
|
ncat << [categories.size].pack("i")
|
97
97
|
end
|
98
98
|
options[:categorical_data] = categorical_data
|
data/lib/outliertree/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: outliertree
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|