midas-edge 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +6 -4
- data/ext/midas/ext.cpp +32 -16
- data/ext/midas/extconf.rb +1 -5
- data/lib/midas/detector.rb +7 -4
- data/lib/midas/version.rb +1 -1
- data/vendor/MIDAS/LICENSE +0 -25
- data/vendor/MIDAS/README.md +110 -40
- metadata +2 -10
- data/vendor/MIDAS/anom.cpp +0 -88
- data/vendor/MIDAS/anom.hpp +0 -10
- data/vendor/MIDAS/argparse.hpp +0 -539
- data/vendor/MIDAS/edgehash.cpp +0 -63
- data/vendor/MIDAS/edgehash.hpp +0 -25
- data/vendor/MIDAS/main.cpp +0 -127
- data/vendor/MIDAS/nodehash.cpp +0 -63
- data/vendor/MIDAS/nodehash.hpp +0 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e4c7b4cac2e7a9dac3a6085b40d79a48b9f14dad32834e0d267f7bb667d86b9
|
4
|
+
data.tar.gz: abd0836d284e7a9c34c7733f2f195e38faca2fb1ac6ab01687c3790bc96d3cfa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 48c92dbdbff039a514ca99207a88276fada4a1391497a1fd4595176f90bec0ac46f0fc89126d36c7ccfc3a1afe212aa11f2142fb7d3d836734f64ab912a88ffb
|
7
|
+
data.tar.gz: 9a2a140ed5d5a120969ee5fc0a599cccc6d590755c9eddf949c2e0a5897aaf39b6f8fe8137c9dde3fd418c78f04cdd8801d1353ca61009d4fbae5a610ad2edc5
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -43,9 +43,11 @@ Pass parameters - default values below
|
|
43
43
|
Midas.new(
|
44
44
|
rows: 2, # number of hash functions
|
45
45
|
buckets: 769, # number of buckets
|
46
|
-
alpha: 0.
|
46
|
+
alpha: 0.5, # temporal decay factor
|
47
|
+
threshold: nil, # todo
|
47
48
|
relations: true, # whether to use MIDAS-R or MIDAS
|
48
|
-
directed: true
|
49
|
+
directed: true, # treat the graph as directed or undirected
|
50
|
+
seed: 0 # random seed
|
49
51
|
)
|
50
52
|
```
|
51
53
|
|
@@ -57,10 +59,10 @@ Data can be an array of arrays
|
|
57
59
|
[[1, 2, 3], [4, 5, 6]]
|
58
60
|
```
|
59
61
|
|
60
|
-
Or a Numo
|
62
|
+
Or a Numo array
|
61
63
|
|
62
64
|
```ruby
|
63
|
-
Numo::
|
65
|
+
Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
|
64
66
|
```
|
65
67
|
|
66
68
|
## Performance
|
data/ext/midas/ext.cpp
CHANGED
@@ -3,7 +3,9 @@
|
|
3
3
|
#include <vector>
|
4
4
|
|
5
5
|
// midas
|
6
|
-
#include <
|
6
|
+
#include <FilteringCore.hpp>
|
7
|
+
#include <NormalCore.hpp>
|
8
|
+
#include <RelationalCore.hpp>
|
7
9
|
|
8
10
|
// rice
|
9
11
|
#include <rice/Module.hpp>
|
@@ -14,7 +16,7 @@ using Rice::String;
|
|
14
16
|
using Rice::define_module;
|
15
17
|
using Rice::define_class_under;
|
16
18
|
|
17
|
-
void load_str(vector<int>& src, vector<int>& dst, vector<int>& times, std::string input, bool directed) {
|
19
|
+
void load_str(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, std::string input, bool directed) {
|
18
20
|
int* input_ptr = (int*) input.data();
|
19
21
|
size_t n = input.size() / sizeof(int);
|
20
22
|
|
@@ -39,7 +41,7 @@ void load_str(vector
|
|
39
41
|
// load_data from main.cpp
|
40
42
|
// modified to throw std::runtime_error when cannot find file
|
41
43
|
// instead of exiting
|
42
|
-
void load_file(vector<int>& src, vector<int>& dst, vector<int>& times, std::string input_file, bool undirected)
|
44
|
+
void load_file(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, std::string input_file, bool undirected)
|
43
45
|
{
|
44
46
|
FILE* infile = fopen(input_file.c_str(), "r");
|
45
47
|
if (infile == NULL) {
|
@@ -56,7 +58,7 @@ void load_file(vector
|
|
56
58
|
}
|
57
59
|
}
|
58
60
|
else {
|
59
|
-
while (fscanf(infile, "%d
|
61
|
+
while (fscanf(infile, "%d,%d,%d", &s, &d, &t) == 3) {
|
60
62
|
src.push_back(s);
|
61
63
|
dst.push_back(d);
|
62
64
|
times.push_back(t);
|
@@ -67,14 +69,28 @@ void load_file(vector
|
|
67
69
|
}
|
68
70
|
}
|
69
71
|
|
70
|
-
std::string fit_predict(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets,
|
71
|
-
|
72
|
-
|
73
|
-
|
72
|
+
std::string fit_predict(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, int num_rows, int num_buckets, float factor, float threshold, bool relations, int seed) {
|
73
|
+
srand(seed);
|
74
|
+
size_t n = src.size();
|
75
|
+
const auto result = new float[n];
|
76
|
+
|
77
|
+
if (!std::isnan(threshold)) {
|
78
|
+
MIDAS::FilteringCore midas(num_rows, num_buckets, threshold, factor);
|
79
|
+
for (size_t i = 0; i < n; i++) {
|
80
|
+
result[i] = midas(src[i], dst[i], times[i]);
|
81
|
+
}
|
82
|
+
} else if (relations) {
|
83
|
+
MIDAS::RelationalCore midas(num_rows, num_buckets, factor);
|
84
|
+
for (size_t i = 0; i < n; i++) {
|
85
|
+
result[i] = midas(src[i], dst[i], times[i]);
|
86
|
+
}
|
74
87
|
} else {
|
75
|
-
|
88
|
+
MIDAS::NormalCore midas(num_rows, num_buckets);
|
89
|
+
for (size_t i = 0; i < n; i++) {
|
90
|
+
result[i] = midas(src[i], dst[i], times[i]);
|
91
|
+
}
|
76
92
|
}
|
77
|
-
return std::string((char*) result
|
93
|
+
return std::string((char*) result, sizeof(float) / sizeof(char) * n);
|
78
94
|
}
|
79
95
|
|
80
96
|
extern "C"
|
@@ -85,16 +101,16 @@ void Init_ext()
|
|
85
101
|
define_class_under(rb_mMidas, "Detector")
|
86
102
|
.define_method(
|
87
103
|
"_fit_predict_str",
|
88
|
-
*[](std::string input, int num_rows, int num_buckets,
|
89
|
-
vector<int> src, dst, times;
|
104
|
+
*[](std::string input, int num_rows, int num_buckets, float factor, float threshold, bool relations, bool directed, int seed) {
|
105
|
+
std::vector<int> src, dst, times;
|
90
106
|
load_str(src, dst, times, input, directed);
|
91
|
-
return fit_predict(src, dst, times, num_rows, num_buckets, factor, relations);
|
107
|
+
return fit_predict(src, dst, times, num_rows, num_buckets, factor, threshold, relations, seed);
|
92
108
|
})
|
93
109
|
.define_method(
|
94
110
|
"_fit_predict_file",
|
95
|
-
*[](std::string input, int num_rows, int num_buckets,
|
96
|
-
vector<int> src, dst, times;
|
111
|
+
*[](std::string input, int num_rows, int num_buckets, float factor, float threshold, bool relations, bool directed, int seed) {
|
112
|
+
std::vector<int> src, dst, times;
|
97
113
|
load_file(src, dst, times, input, !directed);
|
98
|
-
return fit_predict(src, dst, times, num_rows, num_buckets, factor, relations);
|
114
|
+
return fit_predict(src, dst, times, num_rows, num_buckets, factor, threshold, relations, seed);
|
99
115
|
});
|
100
116
|
}
|
data/ext/midas/extconf.rb
CHANGED
@@ -2,11 +2,7 @@ require "mkmf-rice"
|
|
2
2
|
|
3
3
|
$CXXFLAGS << " -std=c++17"
|
4
4
|
|
5
|
-
|
6
|
-
midas = File.expand_path("../../vendor/MIDAS", __dir__)
|
7
|
-
|
8
|
-
$srcs = Dir["{#{ext},#{midas}}/*.{cc,cpp}"]
|
5
|
+
midas = File.expand_path("../../vendor/MIDAS/src", __dir__)
|
9
6
|
$INCFLAGS << " -I#{midas}"
|
10
|
-
$VPATH << midas
|
11
7
|
|
12
8
|
create_makefile("midas/ext")
|
data/lib/midas/detector.rb
CHANGED
@@ -1,25 +1,28 @@
|
|
1
1
|
module Midas
|
2
2
|
class Detector
|
3
|
-
def initialize(rows: 2, buckets: 769, alpha: 0.
|
3
|
+
def initialize(rows: 2, buckets: 769, alpha: 0.5, threshold: nil, relations: true, directed: true, seed: 0)
|
4
4
|
@rows = rows
|
5
5
|
@buckets = buckets
|
6
6
|
@alpha = alpha
|
7
|
+
@threshold = threshold
|
7
8
|
@relations = relations
|
8
9
|
@directed = directed
|
10
|
+
@seed = seed
|
9
11
|
end
|
10
12
|
|
11
13
|
def fit_predict(x)
|
14
|
+
threshold = @threshold || Float::NAN
|
12
15
|
result =
|
13
16
|
if x.is_a?(String)
|
14
|
-
_fit_predict_file(x, @rows, @buckets, @alpha, @relations, @directed)
|
17
|
+
_fit_predict_file(x, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
|
15
18
|
else
|
16
19
|
x = Numo::Int32.cast(x) unless x.is_a?(Numo::NArray)
|
17
20
|
x = x.cast_to(Numo::Int32) unless x.is_a?(Numo::Int32)
|
18
21
|
raise ArgumentError, "Bad shape: #{x.shape}" unless x.rank == 2 && x.shape[1] == 3
|
19
|
-
_fit_predict_str(x.to_binary, @rows, @buckets, @alpha, @relations, @directed)
|
22
|
+
_fit_predict_str(x.to_binary, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
|
20
23
|
end
|
21
24
|
|
22
|
-
Numo::
|
25
|
+
Numo::SFloat.from_binary(result)
|
23
26
|
end
|
24
27
|
end
|
25
28
|
end
|
data/lib/midas/version.rb
CHANGED
data/vendor/MIDAS/LICENSE
CHANGED
@@ -174,28 +174,3 @@
|
|
174
174
|
of your accepting any such warranty or additional liability.
|
175
175
|
|
176
176
|
END OF TERMS AND CONDITIONS
|
177
|
-
|
178
|
-
APPENDIX: How to apply the Apache License to your work.
|
179
|
-
|
180
|
-
To apply the Apache License to your work, attach the following
|
181
|
-
boilerplate notice, with the fields enclosed by brackets "[]"
|
182
|
-
replaced with your own identifying information. (Don't include
|
183
|
-
the brackets!) The text should be enclosed in the appropriate
|
184
|
-
comment syntax for the file format. We also recommend that a
|
185
|
-
file or class name and description of purpose be included on the
|
186
|
-
same "printed page" as the copyright notice for easier
|
187
|
-
identification within third-party archives.
|
188
|
-
|
189
|
-
Copyright [yyyy] [name of copyright owner]
|
190
|
-
|
191
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
192
|
-
you may not use this file except in compliance with the License.
|
193
|
-
You may obtain a copy of the License at
|
194
|
-
|
195
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
196
|
-
|
197
|
-
Unless required by applicable law or agreed to in writing, software
|
198
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
199
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200
|
-
See the License for the specific language governing permissions and
|
201
|
-
limitations under the License.
|
data/vendor/MIDAS/README.md
CHANGED
@@ -1,66 +1,136 @@
|
|
1
1
|
# MIDAS
|
2
|
-
[![Conference](http://img.shields.io/badge/AAAI-2020-red.svg)](https://aaai.org/Conferences/AAAI-20/)
|
3
|
-
[![Paper](http://img.shields.io/badge/Paper-pdf-brightgreen.svg)](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midas.pdf)
|
4
|
-
[![Poster](http://img.shields.io/badge/Poster-pdf-blueviolet.svg)](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasposter.pdf)
|
5
|
-
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE)
|
6
2
|
|
7
|
-
<p
|
8
|
-
<
|
3
|
+
<p>
|
4
|
+
<a href="https://aaai.org/Conferences/AAAI-20/">
|
5
|
+
<img src="http://img.shields.io/badge/AAAI-2020-red.svg">
|
6
|
+
</a>
|
7
|
+
<a href="https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midas.pdf"><img src="http://img.shields.io/badge/Paper-PDF-brightgreen.svg"></a>
|
8
|
+
<a href="https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasslides.pdf">
|
9
|
+
<img src="http://img.shields.io/badge/Slides-PDF-ff9e18.svg">
|
10
|
+
</a>
|
11
|
+
<a href="https://youtu.be/Bd4PyLCHrto">
|
12
|
+
<img src="http://img.shields.io/badge/Talk-Youtube-ff69b4.svg">
|
13
|
+
</a>
|
14
|
+
<a href="https://www.kdnuggets.com/2020/04/midas-new-baseline-anomaly-detection-graphs.html">
|
15
|
+
<img src="https://img.shields.io/badge/Press-KDnuggets-orange.svg">
|
16
|
+
</a>
|
17
|
+
<a href="https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE">
|
18
|
+
<img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg">
|
19
|
+
</a>
|
9
20
|
</p>
|
10
21
|
|
22
|
+
C++ implementation of
|
11
23
|
|
12
|
-
|
24
|
+
- Real-time Streaming Anomaly Detection in Dynamic Graphs. *Siddharth Bhatia, Rui Liu, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. (Under Review)
|
25
|
+
- [MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams](asset/Conference.pdf). *Siddharth Bhatia, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. AAAI 2020.
|
13
26
|
|
14
|
-
|
27
|
+
The old implementation is in another branch `OldImplementation`, it should be considered as being archived and will hardly receive feature updates.
|
15
28
|
|
16
|
-
|
29
|
+
![](asset/Intro.png)
|
17
30
|
|
18
|
-
|
31
|
+
## Table of Contents
|
19
32
|
|
33
|
+
- [Features](#features)
|
34
|
+
- [Demo](#demo)
|
35
|
+
- [Customization](#customization)
|
36
|
+
- [Online Articles](#online-articles)
|
37
|
+
- [MIDAS in other Languages](#midas-in-other-languages)
|
38
|
+
- [Citation](#citation)
|
20
39
|
|
21
|
-
##
|
22
|
-
1. Run `make` to compile code and create the binary.
|
23
|
-
2. Run `./midas -i ` followed by the input file path and name.
|
24
|
-
3. Run `make clean` to clean binaries.
|
40
|
+
## Features
|
25
41
|
|
42
|
+
- Finds Anomalies in Dynamic/Time-Evolving Graph: (Intrusion Detection, Fake Ratings, Financial Fraud)
|
43
|
+
- Detects Microcluster Anomalies (suddenly arriving groups of suspiciously similar edges e.g. DoS attack)
|
44
|
+
- Theoretical Guarantees on False Positive Probability
|
45
|
+
- Constant Memory (independent of graph size)
|
46
|
+
- Constant Update Time (real-time anomaly detection to minimize harm)
|
47
|
+
- Up to 55% more accurate and 929 times faster than the state of the art approaches
|
48
|
+
- Some experiments are performed on the following datasets:
|
49
|
+
- [DARPA](https://www.ll.mit.edu/r-d/datasets/1998-darpa-intrusion-detection-evaluation-dataset)
|
50
|
+
- [TwitterWorldCup2014](http://odds.cs.stonybrook.edu/twitterworldcup2014-dataset)
|
51
|
+
- [TwitterSecurity](http://odds.cs.stonybrook.edu/twittersecurity-dataset)
|
26
52
|
|
27
53
|
## Demo
|
28
|
-
1. Run `./demo.sh` to compile the code and run it on example dataset.
|
29
54
|
|
55
|
+
If you use Windows:
|
30
56
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
* `-a --alpha`: Temporal Decay Factor (default: 0.6)
|
38
|
-
* `--norelations` : Run MIDAS instead of MIDAS-R
|
39
|
-
* `--undirected` : Treat graph as undirected instead of directed
|
57
|
+
1. Open a Visual Studio developer command prompt, we want their toolchain
|
58
|
+
1. `cd` to the project root `MIDAS/`
|
59
|
+
1. `cmake -DCMAKE_BUILD_TYPE=Release -G "NMake Makefiles" -S . -B build/release`
|
60
|
+
1. `cmake --build build/release --target Demo`
|
61
|
+
1. `cd` to `MIDAS/build/release/src`
|
62
|
+
1. `.\Demo.exe`
|
40
63
|
|
64
|
+
If you use Linux/macOS systems:
|
41
65
|
|
42
|
-
|
43
|
-
|
44
|
-
1. `
|
45
|
-
|
46
|
-
|
66
|
+
1. Open a terminal
|
67
|
+
1. `cd` to the project root `MIDAS/`
|
68
|
+
1. `cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/release`
|
69
|
+
1. `cmake --build build/release --target Demo`
|
70
|
+
1. `cd` to `MIDAS/build/release/src`
|
71
|
+
1. `./Demo`
|
47
72
|
|
48
|
-
|
73
|
+
The demo runs on `MIDAS/data/DARPA/darpa_processed.csv`, which has 4.5M records, with the filtering core.
|
49
74
|
|
75
|
+
The scores will be exported to `MIDAS/temp/Score.txt`, higher means more anomalous.
|
50
76
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
77
|
+
All file paths are absolute and "hardcoded" by CMake, but it's suggested NOT to run by double-click on the executable file.
|
78
|
+
|
79
|
+
## Customization
|
80
|
+
|
81
|
+
### Switch Cores
|
82
|
+
|
83
|
+
Cores are instantiated at `MIDAS/example/Demo.cpp:64-66`, uncomment the chosen one.
|
84
|
+
|
85
|
+
### Custom Dataset + `Demo.cpp`
|
86
|
+
|
87
|
+
You need to prepare three files:
|
88
|
+
|
89
|
+
- Meta file
|
90
|
+
- Only includes an integer `N`, the number of records in the dataset
|
91
|
+
- Use its path for `pathMeta`
|
92
|
+
- Data file
|
93
|
+
- A header-less csv format file of shape `[N,3]`
|
94
|
+
- Columns are sources, destinations, timestamps
|
95
|
+
- Use its path for `pathData`
|
96
|
+
- Label file
|
97
|
+
- A header-less csv format file of shape `[N,1]`
|
98
|
+
- The corresponding label for data records
|
99
|
+
- 0 means normal record
|
100
|
+
- 1 means anomalous record
|
101
|
+
- Use its path for `pathGroundTruth`
|
102
|
+
|
103
|
+
### Custom Dataset + Custom Runner
|
104
|
+
|
105
|
+
1. Include the header `MIDAS/CPU/NormalCore.hpp`, `MIDAS/CPU/RelationalCore.hpp` or `MIDAS/CPU/FilteringCore.hpp`
|
106
|
+
1. Instantiate cores with required parameters
|
107
|
+
1. Call `operator()` on individual data records, it returns the anomaly score for the input record.
|
108
|
+
|
109
|
+
## Online Articles
|
110
|
+
|
111
|
+
1. KDnuggets: [Introducing MIDAS: A New Baseline for Anomaly Detection in Graphs](https://www.kdnuggets.com/2020/04/midas-new-baseline-anomaly-detection-graphs.html)
|
112
|
+
2. Towards Data Science: [Controlling Fake News using Graphs and Statistics](https://towardsdatascience.com/controlling-fake-news-using-graphs-and-statistics-31ed116a986f)
|
113
|
+
2. Towards Data Science: [Anomaly detection in dynamic graphs using MIDAS](https://towardsdatascience.com/anomaly-detection-in-dynamic-graphs-using-midas-e4f8d0b1db45)
|
114
|
+
4. Towards AI: [Anomaly Detection with MIDAS](https://medium.com/towards-artificial-intelligence/anomaly-detection-with-midas-2735a2e6dce8)
|
115
|
+
5. [AIhub Interview](https://aihub.org/2020/05/01/interview-with-siddharth-bhatia-a-new-approach-for-anomaly-detection/)
|
116
|
+
|
117
|
+
## MIDAS in Other Languages
|
118
|
+
|
119
|
+
1. [Golang](https://github.com/steve0hh/midas) by [Steve Tan](https://github.com/steve0hh)
|
120
|
+
2. [Ruby](https://github.com/ankane/midas) by [Andrew Kane](https://github.com/ankane)
|
121
|
+
3. [Rust](https://github.com/scooter-dangle/midas_rs) by [Scott Steele](https://github.com/scooter-dangle)
|
122
|
+
4. [R](https://github.com/pteridin/MIDASwrappeR) by [Tobias Heidler](https://github.com/pteridin)
|
123
|
+
5. [Python](https://github.com/ritesh99rakesh/pyMIDAS) by [Ritesh Kumar](https://github.com/ritesh99rakesh)
|
55
124
|
|
56
125
|
## Citation
|
126
|
+
|
57
127
|
If you use this code for your research, please consider citing our paper.
|
58
128
|
|
59
129
|
```
|
60
|
-
@
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
130
|
+
@inproceedings{bhatia2020midas,
|
131
|
+
title="MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams",
|
132
|
+
author="Siddharth {Bhatia} and Bryan {Hooi} and Minji {Yoon} and Kijung {Shin} and Christos {Faloutsos}",
|
133
|
+
booktitle="AAAI 2020 : The Thirty-Fourth AAAI Conference on Artificial Intelligence",
|
134
|
+
year="2020"
|
65
135
|
}
|
66
|
-
```
|
136
|
+
```
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: midas-edge
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -112,14 +112,6 @@ files:
|
|
112
112
|
- lib/midas/version.rb
|
113
113
|
- vendor/MIDAS/LICENSE
|
114
114
|
- vendor/MIDAS/README.md
|
115
|
-
- vendor/MIDAS/anom.cpp
|
116
|
-
- vendor/MIDAS/anom.hpp
|
117
|
-
- vendor/MIDAS/argparse.hpp
|
118
|
-
- vendor/MIDAS/edgehash.cpp
|
119
|
-
- vendor/MIDAS/edgehash.hpp
|
120
|
-
- vendor/MIDAS/main.cpp
|
121
|
-
- vendor/MIDAS/nodehash.cpp
|
122
|
-
- vendor/MIDAS/nodehash.hpp
|
123
115
|
homepage: https://github.com/ankane/midas
|
124
116
|
licenses:
|
125
117
|
- MIT
|
data/vendor/MIDAS/anom.cpp
DELETED
@@ -1,88 +0,0 @@
|
|
1
|
-
#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
|
2
|
-
#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
|
3
|
-
|
4
|
-
#include <iostream>
|
5
|
-
#include <math.h>
|
6
|
-
#include <algorithm>
|
7
|
-
#include <vector>
|
8
|
-
#include "anom.hpp"
|
9
|
-
#include "edgehash.hpp"
|
10
|
-
#include "nodehash.hpp"
|
11
|
-
|
12
|
-
vector<double>* midas(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets)
|
13
|
-
{
|
14
|
-
int m = *max_element(src.begin(), src.end());
|
15
|
-
Edgehash cur_count(num_rows, num_buckets, m);
|
16
|
-
Edgehash total_count(num_rows, num_buckets, m);
|
17
|
-
vector<double>* anom_score = new vector<double>(src.size());
|
18
|
-
int cur_t = 1, size = src.size(), cur_src, cur_dst;
|
19
|
-
double cur_mean, sqerr, cur_score;
|
20
|
-
for (int i = 0; i < size; i++) {
|
21
|
-
|
22
|
-
if (i == 0 || times[i] > cur_t) {
|
23
|
-
cur_count.clear();
|
24
|
-
cur_t = times[i];
|
25
|
-
}
|
26
|
-
|
27
|
-
cur_src = src[i];
|
28
|
-
cur_dst = dst[i];
|
29
|
-
cur_count.insert(cur_src, cur_dst, 1);
|
30
|
-
total_count.insert(cur_src, cur_dst, 1);
|
31
|
-
cur_mean = total_count.get_count(cur_src, cur_dst) / cur_t;
|
32
|
-
sqerr = pow(cur_count.get_count(cur_src, cur_dst) - cur_mean, 2);
|
33
|
-
if (cur_t == 1) cur_score = 0;
|
34
|
-
else cur_score = sqerr / cur_mean + sqerr / (cur_mean * (cur_t - 1));
|
35
|
-
(*anom_score)[i] = cur_score;
|
36
|
-
}
|
37
|
-
|
38
|
-
return anom_score;
|
39
|
-
}
|
40
|
-
|
41
|
-
double counts_to_anom(double tot, double cur, int cur_t)
|
42
|
-
{
|
43
|
-
double cur_mean = tot / cur_t;
|
44
|
-
double sqerr = pow(MAX(0, cur - cur_mean), 2);
|
45
|
-
return sqerr / cur_mean + sqerr / (cur_mean * MAX(1, cur_t - 1));
|
46
|
-
}
|
47
|
-
|
48
|
-
vector<double>* midasR(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets, double factor)
|
49
|
-
{
|
50
|
-
int m = *max_element(src.begin(), src.end());
|
51
|
-
Edgehash cur_count(num_rows, num_buckets, m);
|
52
|
-
Edgehash total_count(num_rows, num_buckets, m);
|
53
|
-
Nodehash src_score(num_rows, num_buckets);
|
54
|
-
Nodehash dst_score(num_rows, num_buckets);
|
55
|
-
Nodehash src_total(num_rows, num_buckets);
|
56
|
-
Nodehash dst_total(num_rows, num_buckets);
|
57
|
-
vector<double>* anom_score = new vector<double>(src.size());
|
58
|
-
int cur_t = 1, size = src.size(), cur_src, cur_dst;
|
59
|
-
double cur_score, cur_score_src, cur_score_dst, combined_score;
|
60
|
-
|
61
|
-
for (int i = 0; i < size; i++) {
|
62
|
-
|
63
|
-
if (i == 0 || times[i] > cur_t) {
|
64
|
-
cur_count.lower(factor);
|
65
|
-
src_score.lower(factor);
|
66
|
-
dst_score.lower(factor);
|
67
|
-
cur_t = times[i];
|
68
|
-
}
|
69
|
-
|
70
|
-
cur_src = src[i];
|
71
|
-
cur_dst = dst[i];
|
72
|
-
cur_count.insert(cur_src, cur_dst, 1);
|
73
|
-
total_count.insert(cur_src, cur_dst, 1);
|
74
|
-
src_score.insert(cur_src, 1);
|
75
|
-
dst_score.insert(cur_dst, 1);
|
76
|
-
src_total.insert(cur_src, 1);
|
77
|
-
dst_total.insert(cur_dst, 1);
|
78
|
-
cur_score = counts_to_anom(total_count.get_count(cur_src, cur_dst), cur_count.get_count(cur_src, cur_dst), cur_t);
|
79
|
-
cur_score_src = counts_to_anom(src_total.get_count(cur_src), src_score.get_count(cur_src), cur_t);
|
80
|
-
cur_score_dst = counts_to_anom(dst_total.get_count(cur_dst), dst_score.get_count(cur_dst), cur_t);
|
81
|
-
//combined_score = MAX(cur_score_src, cur_score_dst) + cur_score;
|
82
|
-
//combined_score = cur_score_src + cur_score_dst + cur_score;
|
83
|
-
combined_score = MAX(MAX(cur_score_src, cur_score_dst), cur_score);
|
84
|
-
(*anom_score)[i] = log(1 + combined_score);
|
85
|
-
}
|
86
|
-
|
87
|
-
return anom_score;
|
88
|
-
}
|