midas-edge 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +6 -4
- data/ext/midas/ext.cpp +32 -16
- data/ext/midas/extconf.rb +1 -5
- data/lib/midas/detector.rb +7 -4
- data/lib/midas/version.rb +1 -1
- data/vendor/MIDAS/LICENSE +0 -25
- data/vendor/MIDAS/README.md +110 -40
- metadata +2 -10
- data/vendor/MIDAS/anom.cpp +0 -88
- data/vendor/MIDAS/anom.hpp +0 -10
- data/vendor/MIDAS/argparse.hpp +0 -539
- data/vendor/MIDAS/edgehash.cpp +0 -63
- data/vendor/MIDAS/edgehash.hpp +0 -25
- data/vendor/MIDAS/main.cpp +0 -127
- data/vendor/MIDAS/nodehash.cpp +0 -63
- data/vendor/MIDAS/nodehash.hpp +0 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e4c7b4cac2e7a9dac3a6085b40d79a48b9f14dad32834e0d267f7bb667d86b9
|
4
|
+
data.tar.gz: abd0836d284e7a9c34c7733f2f195e38faca2fb1ac6ab01687c3790bc96d3cfa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 48c92dbdbff039a514ca99207a88276fada4a1391497a1fd4595176f90bec0ac46f0fc89126d36c7ccfc3a1afe212aa11f2142fb7d3d836734f64ab912a88ffb
|
7
|
+
data.tar.gz: 9a2a140ed5d5a120969ee5fc0a599cccc6d590755c9eddf949c2e0a5897aaf39b6f8fe8137c9dde3fd418c78f04cdd8801d1353ca61009d4fbae5a610ad2edc5
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -43,9 +43,11 @@ Pass parameters - default values below
|
|
43
43
|
Midas.new(
|
44
44
|
rows: 2, # number of hash functions
|
45
45
|
buckets: 769, # number of buckets
|
46
|
-
alpha: 0.
|
46
|
+
alpha: 0.5, # temporal decay factor
|
47
|
+
threshold: nil, # todo
|
47
48
|
relations: true, # whether to use MIDAS-R or MIDAS
|
48
|
-
directed: true
|
49
|
+
directed: true, # treat the graph as directed or undirected
|
50
|
+
seed: 0 # random seed
|
49
51
|
)
|
50
52
|
```
|
51
53
|
|
@@ -57,10 +59,10 @@ Data can be an array of arrays
|
|
57
59
|
[[1, 2, 3], [4, 5, 6]]
|
58
60
|
```
|
59
61
|
|
60
|
-
Or a Numo
|
62
|
+
Or a Numo array
|
61
63
|
|
62
64
|
```ruby
|
63
|
-
Numo::
|
65
|
+
Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
|
64
66
|
```
|
65
67
|
|
66
68
|
## Performance
|
data/ext/midas/ext.cpp
CHANGED
@@ -3,7 +3,9 @@
|
|
3
3
|
#include <vector>
|
4
4
|
|
5
5
|
// midas
|
6
|
-
#include <
|
6
|
+
#include <FilteringCore.hpp>
|
7
|
+
#include <NormalCore.hpp>
|
8
|
+
#include <RelationalCore.hpp>
|
7
9
|
|
8
10
|
// rice
|
9
11
|
#include <rice/Module.hpp>
|
@@ -14,7 +16,7 @@ using Rice::String;
|
|
14
16
|
using Rice::define_module;
|
15
17
|
using Rice::define_class_under;
|
16
18
|
|
17
|
-
void load_str(vector<int>& src, vector<int>& dst, vector<int>& times, std::string input, bool directed) {
|
19
|
+
void load_str(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, std::string input, bool directed) {
|
18
20
|
int* input_ptr = (int*) input.data();
|
19
21
|
size_t n = input.size() / sizeof(int);
|
20
22
|
|
@@ -39,7 +41,7 @@ void load_str(vector
|
|
39
41
|
// load_data from main.cpp
|
40
42
|
// modified to throw std::runtime_error when cannot find file
|
41
43
|
// instead of exiting
|
42
|
-
void load_file(vector<int>& src, vector<int>& dst, vector<int>& times, std::string input_file, bool undirected)
|
44
|
+
void load_file(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, std::string input_file, bool undirected)
|
43
45
|
{
|
44
46
|
FILE* infile = fopen(input_file.c_str(), "r");
|
45
47
|
if (infile == NULL) {
|
@@ -56,7 +58,7 @@ void load_file(vector
|
|
56
58
|
}
|
57
59
|
}
|
58
60
|
else {
|
59
|
-
while (fscanf(infile, "%d
|
61
|
+
while (fscanf(infile, "%d,%d,%d", &s, &d, &t) == 3) {
|
60
62
|
src.push_back(s);
|
61
63
|
dst.push_back(d);
|
62
64
|
times.push_back(t);
|
@@ -67,14 +69,28 @@ void load_file(vector
|
|
67
69
|
}
|
68
70
|
}
|
69
71
|
|
70
|
-
std::string fit_predict(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets,
|
71
|
-
|
72
|
-
|
73
|
-
|
72
|
+
std::string fit_predict(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, int num_rows, int num_buckets, float factor, float threshold, bool relations, int seed) {
|
73
|
+
srand(seed);
|
74
|
+
size_t n = src.size();
|
75
|
+
const auto result = new float[n];
|
76
|
+
|
77
|
+
if (!std::isnan(threshold)) {
|
78
|
+
MIDAS::FilteringCore midas(num_rows, num_buckets, threshold, factor);
|
79
|
+
for (size_t i = 0; i < n; i++) {
|
80
|
+
result[i] = midas(src[i], dst[i], times[i]);
|
81
|
+
}
|
82
|
+
} else if (relations) {
|
83
|
+
MIDAS::RelationalCore midas(num_rows, num_buckets, factor);
|
84
|
+
for (size_t i = 0; i < n; i++) {
|
85
|
+
result[i] = midas(src[i], dst[i], times[i]);
|
86
|
+
}
|
74
87
|
} else {
|
75
|
-
|
88
|
+
MIDAS::NormalCore midas(num_rows, num_buckets);
|
89
|
+
for (size_t i = 0; i < n; i++) {
|
90
|
+
result[i] = midas(src[i], dst[i], times[i]);
|
91
|
+
}
|
76
92
|
}
|
77
|
-
return std::string((char*) result
|
93
|
+
return std::string((char*) result, sizeof(float) / sizeof(char) * n);
|
78
94
|
}
|
79
95
|
|
80
96
|
extern "C"
|
@@ -85,16 +101,16 @@ void Init_ext()
|
|
85
101
|
define_class_under(rb_mMidas, "Detector")
|
86
102
|
.define_method(
|
87
103
|
"_fit_predict_str",
|
88
|
-
*[](std::string input, int num_rows, int num_buckets,
|
89
|
-
vector<int> src, dst, times;
|
104
|
+
*[](std::string input, int num_rows, int num_buckets, float factor, float threshold, bool relations, bool directed, int seed) {
|
105
|
+
std::vector<int> src, dst, times;
|
90
106
|
load_str(src, dst, times, input, directed);
|
91
|
-
return fit_predict(src, dst, times, num_rows, num_buckets, factor, relations);
|
107
|
+
return fit_predict(src, dst, times, num_rows, num_buckets, factor, threshold, relations, seed);
|
92
108
|
})
|
93
109
|
.define_method(
|
94
110
|
"_fit_predict_file",
|
95
|
-
*[](std::string input, int num_rows, int num_buckets,
|
96
|
-
vector<int> src, dst, times;
|
111
|
+
*[](std::string input, int num_rows, int num_buckets, float factor, float threshold, bool relations, bool directed, int seed) {
|
112
|
+
std::vector<int> src, dst, times;
|
97
113
|
load_file(src, dst, times, input, !directed);
|
98
|
-
return fit_predict(src, dst, times, num_rows, num_buckets, factor, relations);
|
114
|
+
return fit_predict(src, dst, times, num_rows, num_buckets, factor, threshold, relations, seed);
|
99
115
|
});
|
100
116
|
}
|
data/ext/midas/extconf.rb
CHANGED
@@ -2,11 +2,7 @@ require "mkmf-rice"
|
|
2
2
|
|
3
3
|
$CXXFLAGS << " -std=c++17"
|
4
4
|
|
5
|
-
|
6
|
-
midas = File.expand_path("../../vendor/MIDAS", __dir__)
|
7
|
-
|
8
|
-
$srcs = Dir["{#{ext},#{midas}}/*.{cc,cpp}"]
|
5
|
+
midas = File.expand_path("../../vendor/MIDAS/src", __dir__)
|
9
6
|
$INCFLAGS << " -I#{midas}"
|
10
|
-
$VPATH << midas
|
11
7
|
|
12
8
|
create_makefile("midas/ext")
|
data/lib/midas/detector.rb
CHANGED
@@ -1,25 +1,28 @@
|
|
1
1
|
module Midas
|
2
2
|
class Detector
|
3
|
-
def initialize(rows: 2, buckets: 769, alpha: 0.
|
3
|
+
def initialize(rows: 2, buckets: 769, alpha: 0.5, threshold: nil, relations: true, directed: true, seed: 0)
|
4
4
|
@rows = rows
|
5
5
|
@buckets = buckets
|
6
6
|
@alpha = alpha
|
7
|
+
@threshold = threshold
|
7
8
|
@relations = relations
|
8
9
|
@directed = directed
|
10
|
+
@seed = seed
|
9
11
|
end
|
10
12
|
|
11
13
|
def fit_predict(x)
|
14
|
+
threshold = @threshold || Float::NAN
|
12
15
|
result =
|
13
16
|
if x.is_a?(String)
|
14
|
-
_fit_predict_file(x, @rows, @buckets, @alpha, @relations, @directed)
|
17
|
+
_fit_predict_file(x, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
|
15
18
|
else
|
16
19
|
x = Numo::Int32.cast(x) unless x.is_a?(Numo::NArray)
|
17
20
|
x = x.cast_to(Numo::Int32) unless x.is_a?(Numo::Int32)
|
18
21
|
raise ArgumentError, "Bad shape: #{x.shape}" unless x.rank == 2 && x.shape[1] == 3
|
19
|
-
_fit_predict_str(x.to_binary, @rows, @buckets, @alpha, @relations, @directed)
|
22
|
+
_fit_predict_str(x.to_binary, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
|
20
23
|
end
|
21
24
|
|
22
|
-
Numo::
|
25
|
+
Numo::SFloat.from_binary(result)
|
23
26
|
end
|
24
27
|
end
|
25
28
|
end
|
data/lib/midas/version.rb
CHANGED
data/vendor/MIDAS/LICENSE
CHANGED
@@ -174,28 +174,3 @@
|
|
174
174
|
of your accepting any such warranty or additional liability.
|
175
175
|
|
176
176
|
END OF TERMS AND CONDITIONS
|
177
|
-
|
178
|
-
APPENDIX: How to apply the Apache License to your work.
|
179
|
-
|
180
|
-
To apply the Apache License to your work, attach the following
|
181
|
-
boilerplate notice, with the fields enclosed by brackets "[]"
|
182
|
-
replaced with your own identifying information. (Don't include
|
183
|
-
the brackets!) The text should be enclosed in the appropriate
|
184
|
-
comment syntax for the file format. We also recommend that a
|
185
|
-
file or class name and description of purpose be included on the
|
186
|
-
same "printed page" as the copyright notice for easier
|
187
|
-
identification within third-party archives.
|
188
|
-
|
189
|
-
Copyright [yyyy] [name of copyright owner]
|
190
|
-
|
191
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
192
|
-
you may not use this file except in compliance with the License.
|
193
|
-
You may obtain a copy of the License at
|
194
|
-
|
195
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
196
|
-
|
197
|
-
Unless required by applicable law or agreed to in writing, software
|
198
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
199
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200
|
-
See the License for the specific language governing permissions and
|
201
|
-
limitations under the License.
|
data/vendor/MIDAS/README.md
CHANGED
@@ -1,66 +1,136 @@
|
|
1
1
|
# MIDAS
|
2
|
-
[](https://aaai.org/Conferences/AAAI-20/)
|
3
|
-
[](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midas.pdf)
|
4
|
-
[](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasposter.pdf)
|
5
|
-
[](https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE)
|
6
2
|
|
7
|
-
<p
|
8
|
-
<
|
3
|
+
<p>
|
4
|
+
<a href="https://aaai.org/Conferences/AAAI-20/">
|
5
|
+
<img src="http://img.shields.io/badge/AAAI-2020-red.svg">
|
6
|
+
</a>
|
7
|
+
<a href="https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midas.pdf"><img src="http://img.shields.io/badge/Paper-PDF-brightgreen.svg"></a>
|
8
|
+
<a href="https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasslides.pdf">
|
9
|
+
<img src="http://img.shields.io/badge/Slides-PDF-ff9e18.svg">
|
10
|
+
</a>
|
11
|
+
<a href="https://youtu.be/Bd4PyLCHrto">
|
12
|
+
<img src="http://img.shields.io/badge/Talk-Youtube-ff69b4.svg">
|
13
|
+
</a>
|
14
|
+
<a href="https://www.kdnuggets.com/2020/04/midas-new-baseline-anomaly-detection-graphs.html">
|
15
|
+
<img src="https://img.shields.io/badge/Press-KDnuggets-orange.svg">
|
16
|
+
</a>
|
17
|
+
<a href="https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE">
|
18
|
+
<img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg">
|
19
|
+
</a>
|
9
20
|
</p>
|
10
21
|
|
22
|
+
C++ implementation of
|
11
23
|
|
12
|
-
|
24
|
+
- Real-time Streaming Anomaly Detection in Dynamic Graphs. *Siddharth Bhatia, Rui Liu, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. (Under Review)
|
25
|
+
- [MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams](asset/Conference.pdf). *Siddharth Bhatia, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. AAAI 2020.
|
13
26
|
|
14
|
-
|
27
|
+
The old implementation is in another branch `OldImplementation`, it should be considered as being archived and will hardly receive feature updates.
|
15
28
|
|
16
|
-
|
29
|
+

|
17
30
|
|
18
|
-
|
31
|
+
## Table of Contents
|
19
32
|
|
33
|
+
- [Features](#features)
|
34
|
+
- [Demo](#demo)
|
35
|
+
- [Customization](#customization)
|
36
|
+
- [Online Articles](#online-articles)
|
37
|
+
- [MIDAS in other Languages](#midas-in-other-languages)
|
38
|
+
- [Citation](#citation)
|
20
39
|
|
21
|
-
##
|
22
|
-
1. Run `make` to compile code and create the binary.
|
23
|
-
2. Run `./midas -i ` followed by the input file path and name.
|
24
|
-
3. Run `make clean` to clean binaries.
|
40
|
+
## Features
|
25
41
|
|
42
|
+
- Finds Anomalies in Dynamic/Time-Evolving Graph: (Intrusion Detection, Fake Ratings, Financial Fraud)
|
43
|
+
- Detects Microcluster Anomalies (suddenly arriving groups of suspiciously similar edges e.g. DoS attack)
|
44
|
+
- Theoretical Guarantees on False Positive Probability
|
45
|
+
- Constant Memory (independent of graph size)
|
46
|
+
- Constant Update Time (real-time anomaly detection to minimize harm)
|
47
|
+
- Up to 55% more accurate and 929 times faster than the state of the art approaches
|
48
|
+
- Some experiments are performed on the following datasets:
|
49
|
+
- [DARPA](https://www.ll.mit.edu/r-d/datasets/1998-darpa-intrusion-detection-evaluation-dataset)
|
50
|
+
- [TwitterWorldCup2014](http://odds.cs.stonybrook.edu/twitterworldcup2014-dataset)
|
51
|
+
- [TwitterSecurity](http://odds.cs.stonybrook.edu/twittersecurity-dataset)
|
26
52
|
|
27
53
|
## Demo
|
28
|
-
1. Run `./demo.sh` to compile the code and run it on example dataset.
|
29
54
|
|
55
|
+
If you use Windows:
|
30
56
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
* `-a --alpha`: Temporal Decay Factor (default: 0.6)
|
38
|
-
* `--norelations` : Run MIDAS instead of MIDAS-R
|
39
|
-
* `--undirected` : Treat graph as undirected instead of directed
|
57
|
+
1. Open a Visual Studio developer command prompt, we want their toolchain
|
58
|
+
1. `cd` to the project root `MIDAS/`
|
59
|
+
1. `cmake -DCMAKE_BUILD_TYPE=Release -G "NMake Makefiles" -S . -B build/release`
|
60
|
+
1. `cmake --build build/release --target Demo`
|
61
|
+
1. `cd` to `MIDAS/build/release/src`
|
62
|
+
1. `.\Demo.exe`
|
40
63
|
|
64
|
+
If you use Linux/macOS systems:
|
41
65
|
|
42
|
-
|
43
|
-
|
44
|
-
1. `
|
45
|
-
|
46
|
-
|
66
|
+
1. Open a terminal
|
67
|
+
1. `cd` to the project root `MIDAS/`
|
68
|
+
1. `cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/release`
|
69
|
+
1. `cmake --build build/release --target Demo`
|
70
|
+
1. `cd` to `MIDAS/build/release/src`
|
71
|
+
1. `./Demo`
|
47
72
|
|
48
|
-
|
73
|
+
The demo runs on `MIDAS/data/DARPA/darpa_processed.csv`, which has 4.5M records, with the filtering core.
|
49
74
|
|
75
|
+
The scores will be exported to `MIDAS/temp/Score.txt`, higher means more anomalous.
|
50
76
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
77
|
+
All file paths are absolute and "hardcoded" by CMake, but it's suggested NOT to run by double-click on the executable file.
|
78
|
+
|
79
|
+
## Customization
|
80
|
+
|
81
|
+
### Switch Cores
|
82
|
+
|
83
|
+
Cores are instantiated at `MIDAS/example/Demo.cpp:64-66`, uncomment the chosen one.
|
84
|
+
|
85
|
+
### Custom Dataset + `Demo.cpp`
|
86
|
+
|
87
|
+
You need to prepare three files:
|
88
|
+
|
89
|
+
- Meta file
|
90
|
+
- Only includes an integer `N`, the number of records in the dataset
|
91
|
+
- Use its path for `pathMeta`
|
92
|
+
- Data file
|
93
|
+
- A header-less csv format file of shape `[N,3]`
|
94
|
+
- Columns are sources, destinations, timestamps
|
95
|
+
- Use its path for `pathData`
|
96
|
+
- Label file
|
97
|
+
- A header-less csv format file of shape `[N,1]`
|
98
|
+
- The corresponding label for data records
|
99
|
+
- 0 means normal record
|
100
|
+
- 1 means anomalous record
|
101
|
+
- Use its path for `pathGroundTruth`
|
102
|
+
|
103
|
+
### Custom Dataset + Custom Runner
|
104
|
+
|
105
|
+
1. Include the header `MIDAS/CPU/NormalCore.hpp`, `MIDAS/CPU/RelationalCore.hpp` or `MIDAS/CPU/FilteringCore.hpp`
|
106
|
+
1. Instantiate cores with required parameters
|
107
|
+
1. Call `operator()` on individual data records, it returns the anomaly score for the input record.
|
108
|
+
|
109
|
+
## Online Articles
|
110
|
+
|
111
|
+
1. KDnuggets: [Introducing MIDAS: A New Baseline for Anomaly Detection in Graphs](https://www.kdnuggets.com/2020/04/midas-new-baseline-anomaly-detection-graphs.html)
|
112
|
+
2. Towards Data Science: [Controlling Fake News using Graphs and Statistics](https://towardsdatascience.com/controlling-fake-news-using-graphs-and-statistics-31ed116a986f)
|
113
|
+
2. Towards Data Science: [Anomaly detection in dynamic graphs using MIDAS](https://towardsdatascience.com/anomaly-detection-in-dynamic-graphs-using-midas-e4f8d0b1db45)
|
114
|
+
4. Towards AI: [Anomaly Detection with MIDAS](https://medium.com/towards-artificial-intelligence/anomaly-detection-with-midas-2735a2e6dce8)
|
115
|
+
5. [AIhub Interview](https://aihub.org/2020/05/01/interview-with-siddharth-bhatia-a-new-approach-for-anomaly-detection/)
|
116
|
+
|
117
|
+
## MIDAS in Other Languages
|
118
|
+
|
119
|
+
1. [Golang](https://github.com/steve0hh/midas) by [Steve Tan](https://github.com/steve0hh)
|
120
|
+
2. [Ruby](https://github.com/ankane/midas) by [Andrew Kane](https://github.com/ankane)
|
121
|
+
3. [Rust](https://github.com/scooter-dangle/midas_rs) by [Scott Steele](https://github.com/scooter-dangle)
|
122
|
+
4. [R](https://github.com/pteridin/MIDASwrappeR) by [Tobias Heidler](https://github.com/pteridin)
|
123
|
+
5. [Python](https://github.com/ritesh99rakesh/pyMIDAS) by [Ritesh Kumar](https://github.com/ritesh99rakesh)
|
55
124
|
|
56
125
|
## Citation
|
126
|
+
|
57
127
|
If you use this code for your research, please consider citing our paper.
|
58
128
|
|
59
129
|
```
|
60
|
-
@
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
130
|
+
@inproceedings{bhatia2020midas,
|
131
|
+
title="MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams",
|
132
|
+
author="Siddharth {Bhatia} and Bryan {Hooi} and Minji {Yoon} and Kijung {Shin} and Christos {Faloutsos}",
|
133
|
+
booktitle="AAAI 2020 : The Thirty-Fourth AAAI Conference on Artificial Intelligence",
|
134
|
+
year="2020"
|
65
135
|
}
|
66
|
-
```
|
136
|
+
```
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: midas-edge
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -112,14 +112,6 @@ files:
|
|
112
112
|
- lib/midas/version.rb
|
113
113
|
- vendor/MIDAS/LICENSE
|
114
114
|
- vendor/MIDAS/README.md
|
115
|
-
- vendor/MIDAS/anom.cpp
|
116
|
-
- vendor/MIDAS/anom.hpp
|
117
|
-
- vendor/MIDAS/argparse.hpp
|
118
|
-
- vendor/MIDAS/edgehash.cpp
|
119
|
-
- vendor/MIDAS/edgehash.hpp
|
120
|
-
- vendor/MIDAS/main.cpp
|
121
|
-
- vendor/MIDAS/nodehash.cpp
|
122
|
-
- vendor/MIDAS/nodehash.hpp
|
123
115
|
homepage: https://github.com/ankane/midas
|
124
116
|
licenses:
|
125
117
|
- MIT
|
data/vendor/MIDAS/anom.cpp
DELETED
@@ -1,88 +0,0 @@
|
|
1
|
-
#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
|
2
|
-
#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
|
3
|
-
|
4
|
-
#include <iostream>
|
5
|
-
#include <math.h>
|
6
|
-
#include <algorithm>
|
7
|
-
#include <vector>
|
8
|
-
#include "anom.hpp"
|
9
|
-
#include "edgehash.hpp"
|
10
|
-
#include "nodehash.hpp"
|
11
|
-
|
12
|
-
vector<double>* midas(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets)
|
13
|
-
{
|
14
|
-
int m = *max_element(src.begin(), src.end());
|
15
|
-
Edgehash cur_count(num_rows, num_buckets, m);
|
16
|
-
Edgehash total_count(num_rows, num_buckets, m);
|
17
|
-
vector<double>* anom_score = new vector<double>(src.size());
|
18
|
-
int cur_t = 1, size = src.size(), cur_src, cur_dst;
|
19
|
-
double cur_mean, sqerr, cur_score;
|
20
|
-
for (int i = 0; i < size; i++) {
|
21
|
-
|
22
|
-
if (i == 0 || times[i] > cur_t) {
|
23
|
-
cur_count.clear();
|
24
|
-
cur_t = times[i];
|
25
|
-
}
|
26
|
-
|
27
|
-
cur_src = src[i];
|
28
|
-
cur_dst = dst[i];
|
29
|
-
cur_count.insert(cur_src, cur_dst, 1);
|
30
|
-
total_count.insert(cur_src, cur_dst, 1);
|
31
|
-
cur_mean = total_count.get_count(cur_src, cur_dst) / cur_t;
|
32
|
-
sqerr = pow(cur_count.get_count(cur_src, cur_dst) - cur_mean, 2);
|
33
|
-
if (cur_t == 1) cur_score = 0;
|
34
|
-
else cur_score = sqerr / cur_mean + sqerr / (cur_mean * (cur_t - 1));
|
35
|
-
(*anom_score)[i] = cur_score;
|
36
|
-
}
|
37
|
-
|
38
|
-
return anom_score;
|
39
|
-
}
|
40
|
-
|
41
|
-
double counts_to_anom(double tot, double cur, int cur_t)
|
42
|
-
{
|
43
|
-
double cur_mean = tot / cur_t;
|
44
|
-
double sqerr = pow(MAX(0, cur - cur_mean), 2);
|
45
|
-
return sqerr / cur_mean + sqerr / (cur_mean * MAX(1, cur_t - 1));
|
46
|
-
}
|
47
|
-
|
48
|
-
vector<double>* midasR(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets, double factor)
|
49
|
-
{
|
50
|
-
int m = *max_element(src.begin(), src.end());
|
51
|
-
Edgehash cur_count(num_rows, num_buckets, m);
|
52
|
-
Edgehash total_count(num_rows, num_buckets, m);
|
53
|
-
Nodehash src_score(num_rows, num_buckets);
|
54
|
-
Nodehash dst_score(num_rows, num_buckets);
|
55
|
-
Nodehash src_total(num_rows, num_buckets);
|
56
|
-
Nodehash dst_total(num_rows, num_buckets);
|
57
|
-
vector<double>* anom_score = new vector<double>(src.size());
|
58
|
-
int cur_t = 1, size = src.size(), cur_src, cur_dst;
|
59
|
-
double cur_score, cur_score_src, cur_score_dst, combined_score;
|
60
|
-
|
61
|
-
for (int i = 0; i < size; i++) {
|
62
|
-
|
63
|
-
if (i == 0 || times[i] > cur_t) {
|
64
|
-
cur_count.lower(factor);
|
65
|
-
src_score.lower(factor);
|
66
|
-
dst_score.lower(factor);
|
67
|
-
cur_t = times[i];
|
68
|
-
}
|
69
|
-
|
70
|
-
cur_src = src[i];
|
71
|
-
cur_dst = dst[i];
|
72
|
-
cur_count.insert(cur_src, cur_dst, 1);
|
73
|
-
total_count.insert(cur_src, cur_dst, 1);
|
74
|
-
src_score.insert(cur_src, 1);
|
75
|
-
dst_score.insert(cur_dst, 1);
|
76
|
-
src_total.insert(cur_src, 1);
|
77
|
-
dst_total.insert(cur_dst, 1);
|
78
|
-
cur_score = counts_to_anom(total_count.get_count(cur_src, cur_dst), cur_count.get_count(cur_src, cur_dst), cur_t);
|
79
|
-
cur_score_src = counts_to_anom(src_total.get_count(cur_src), src_score.get_count(cur_src), cur_t);
|
80
|
-
cur_score_dst = counts_to_anom(dst_total.get_count(cur_dst), dst_score.get_count(cur_dst), cur_t);
|
81
|
-
//combined_score = MAX(cur_score_src, cur_score_dst) + cur_score;
|
82
|
-
//combined_score = cur_score_src + cur_score_dst + cur_score;
|
83
|
-
combined_score = MAX(MAX(cur_score_src, cur_score_dst), cur_score);
|
84
|
-
(*anom_score)[i] = log(1 + combined_score);
|
85
|
-
}
|
86
|
-
|
87
|
-
return anom_score;
|
88
|
-
}
|