midas-edge 0.1.0 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/NOTICE.txt +1 -0
- data/README.md +10 -7
- data/ext/midas/ext.cpp +32 -16
- data/ext/midas/extconf.rb +2 -8
- data/lib/midas/detector.rb +7 -4
- data/lib/midas/version.rb +1 -1
- data/vendor/MIDAS/LICENSE +0 -25
- data/vendor/MIDAS/README.md +185 -40
- data/vendor/MIDAS/src/CountMinSketch.hpp +105 -0
- data/vendor/MIDAS/src/FilteringCore.hpp +98 -0
- data/vendor/MIDAS/src/NormalCore.hpp +53 -0
- data/vendor/MIDAS/src/RelationalCore.hpp +79 -0
- metadata +11 -16
- data/lib/midas/ext.bundle +0 -0
- data/vendor/MIDAS/anom.cpp +0 -88
- data/vendor/MIDAS/anom.hpp +0 -10
- data/vendor/MIDAS/argparse.hpp +0 -539
- data/vendor/MIDAS/edgehash.cpp +0 -63
- data/vendor/MIDAS/edgehash.hpp +0 -25
- data/vendor/MIDAS/main.cpp +0 -127
- data/vendor/MIDAS/nodehash.cpp +0 -63
- data/vendor/MIDAS/nodehash.hpp +0 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71b32a1851488ba989ccc792af8922d1148f0c0f4585574821ea1759c908ad41
|
4
|
+
data.tar.gz: a4b9105e96a3407708d8e64a31c33e93dbbb8c9127c1f99f802044cdd4aa6818
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28149251ec92a2b24e07c7745ad0b759c23ff4d957fa56298d6c6a92384f798e0651086a524b3b95e7911ed5500405929ef6627059429d41f3126381b73e479e
|
7
|
+
data.tar.gz: 2146d83339cd7c466d053f82f937a44ceb16d609d31034cd740b90425601ea12861e9bd1377a8696c5655af929a8dc1201fae6e79bed3addfd96125931e2f674
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,27 @@
|
|
1
|
+
## 0.2.3 (2020-11-17)
|
2
|
+
|
3
|
+
- Updated MIDAS to 1.1.2
|
4
|
+
|
5
|
+
## 0.2.2 (2020-09-23)
|
6
|
+
|
7
|
+
- Updated MIDAS to 1.1.0
|
8
|
+
|
9
|
+
## 0.2.1 (2020-06-17)
|
10
|
+
|
11
|
+
- Fixed installation (missing header files)
|
12
|
+
|
13
|
+
## 0.2.0 (2020-06-17)
|
14
|
+
|
15
|
+
- Updated MIDAS to 1.0.0
|
16
|
+
- Added `threshold` option
|
17
|
+
- Added `seed` option
|
18
|
+
- Changed default `alpha` to 0.5
|
19
|
+
- Fixed reading data from files with `directed: false`
|
20
|
+
|
21
|
+
## 0.1.1 (2020-02-19)
|
22
|
+
|
23
|
+
- Fixed installation on Linux
|
24
|
+
|
1
25
|
## 0.1.0 (2020-02-17)
|
2
26
|
|
3
27
|
- First release
|
data/NOTICE.txt
CHANGED
data/README.md
CHANGED
@@ -2,13 +2,14 @@
|
|
2
2
|
|
3
3
|
[MIDAS](https://github.com/bhatiasiddharth/MIDAS) - edge stream anomaly detection - for Ruby
|
4
4
|
|
5
|
+
[](https://travis-ci.org/ankane/midas)
|
6
|
+
|
5
7
|
## Installation
|
6
8
|
|
7
|
-
Add
|
9
|
+
Add these lines to your application’s Gemfile:
|
8
10
|
|
9
11
|
```ruby
|
10
12
|
gem 'midas-edge'
|
11
|
-
gem 'rice', github: 'jasonroelofs/rice' # for now for c++17
|
12
13
|
```
|
13
14
|
|
14
15
|
## Getting Started
|
@@ -41,9 +42,11 @@ Pass parameters - default values below
|
|
41
42
|
Midas.new(
|
42
43
|
rows: 2, # number of hash functions
|
43
44
|
buckets: 769, # number of buckets
|
44
|
-
alpha: 0.
|
45
|
+
alpha: 0.5, # temporal decay factor
|
46
|
+
threshold: nil, # todo
|
45
47
|
relations: true, # whether to use MIDAS-R or MIDAS
|
46
|
-
directed: true
|
48
|
+
directed: true, # treat the graph as directed or undirected
|
49
|
+
seed: 0 # random seed
|
47
50
|
)
|
48
51
|
```
|
49
52
|
|
@@ -55,10 +58,10 @@ Data can be an array of arrays
|
|
55
58
|
[[1, 2, 3], [4, 5, 6]]
|
56
59
|
```
|
57
60
|
|
58
|
-
Or a Numo
|
61
|
+
Or a Numo array
|
59
62
|
|
60
63
|
```ruby
|
61
|
-
Numo::
|
64
|
+
Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
|
62
65
|
```
|
63
66
|
|
64
67
|
## Performance
|
@@ -89,7 +92,7 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
|
|
89
92
|
To get started with development:
|
90
93
|
|
91
94
|
```sh
|
92
|
-
git clone https://github.com/ankane/midas.git
|
95
|
+
git clone --recursive https://github.com/ankane/midas.git
|
93
96
|
cd midas
|
94
97
|
bundle install
|
95
98
|
bundle exec rake compile
|
data/ext/midas/ext.cpp
CHANGED
@@ -3,7 +3,9 @@
|
|
3
3
|
#include <vector>
|
4
4
|
|
5
5
|
// midas
|
6
|
-
#include <
|
6
|
+
#include <FilteringCore.hpp>
|
7
|
+
#include <NormalCore.hpp>
|
8
|
+
#include <RelationalCore.hpp>
|
7
9
|
|
8
10
|
// rice
|
9
11
|
#include <rice/Module.hpp>
|
@@ -14,7 +16,7 @@ using Rice::String;
|
|
14
16
|
using Rice::define_module;
|
15
17
|
using Rice::define_class_under;
|
16
18
|
|
17
|
-
void load_str(vector<int>& src, vector<int>& dst, vector<int>& times, std::string input, bool directed) {
|
19
|
+
void load_str(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, std::string input, bool directed) {
|
18
20
|
int* input_ptr = (int*) input.data();
|
19
21
|
size_t n = input.size() / sizeof(int);
|
20
22
|
|
@@ -39,7 +41,7 @@ void load_str(vector<int>& src, vector<int>& dst, vector<int>& times, std::strin
|
|
39
41
|
// load_data from main.cpp
|
40
42
|
// modified to throw std::runtime_error when cannot find file
|
41
43
|
// instead of exiting
|
42
|
-
void load_file(vector<int>& src, vector<int>& dst, vector<int>& times, std::string input_file, bool undirected)
|
44
|
+
void load_file(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, std::string input_file, bool undirected)
|
43
45
|
{
|
44
46
|
FILE* infile = fopen(input_file.c_str(), "r");
|
45
47
|
if (infile == NULL) {
|
@@ -56,7 +58,7 @@ void load_file(vector<int>& src, vector<int>& dst, vector<int>& times, std::stri
|
|
56
58
|
}
|
57
59
|
}
|
58
60
|
else {
|
59
|
-
while (fscanf(infile, "%d
|
61
|
+
while (fscanf(infile, "%d,%d,%d", &s, &d, &t) == 3) {
|
60
62
|
src.push_back(s);
|
61
63
|
dst.push_back(d);
|
62
64
|
times.push_back(t);
|
@@ -67,14 +69,28 @@ void load_file(vector<int>& src, vector<int>& dst, vector<int>& times, std::stri
|
|
67
69
|
}
|
68
70
|
}
|
69
71
|
|
70
|
-
std::string fit_predict(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets,
|
71
|
-
|
72
|
-
|
73
|
-
|
72
|
+
std::string fit_predict(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, int num_rows, int num_buckets, float factor, float threshold, bool relations, int seed) {
|
73
|
+
srand(seed);
|
74
|
+
size_t n = src.size();
|
75
|
+
const auto result = new float[n];
|
76
|
+
|
77
|
+
if (!std::isnan(threshold)) {
|
78
|
+
MIDAS::FilteringCore midas(num_rows, num_buckets, threshold, factor);
|
79
|
+
for (size_t i = 0; i < n; i++) {
|
80
|
+
result[i] = midas(src[i], dst[i], times[i]);
|
81
|
+
}
|
82
|
+
} else if (relations) {
|
83
|
+
MIDAS::RelationalCore midas(num_rows, num_buckets, factor);
|
84
|
+
for (size_t i = 0; i < n; i++) {
|
85
|
+
result[i] = midas(src[i], dst[i], times[i]);
|
86
|
+
}
|
74
87
|
} else {
|
75
|
-
|
88
|
+
MIDAS::NormalCore midas(num_rows, num_buckets);
|
89
|
+
for (size_t i = 0; i < n; i++) {
|
90
|
+
result[i] = midas(src[i], dst[i], times[i]);
|
91
|
+
}
|
76
92
|
}
|
77
|
-
return std::string((char*) result
|
93
|
+
return std::string((char*) result, sizeof(float) / sizeof(char) * n);
|
78
94
|
}
|
79
95
|
|
80
96
|
extern "C"
|
@@ -85,16 +101,16 @@ void Init_ext()
|
|
85
101
|
define_class_under(rb_mMidas, "Detector")
|
86
102
|
.define_method(
|
87
103
|
"_fit_predict_str",
|
88
|
-
*[](std::string input, int num_rows, int num_buckets,
|
89
|
-
vector<int> src, dst, times;
|
104
|
+
*[](std::string input, int num_rows, int num_buckets, float factor, float threshold, bool relations, bool directed, int seed) {
|
105
|
+
std::vector<int> src, dst, times;
|
90
106
|
load_str(src, dst, times, input, directed);
|
91
|
-
return fit_predict(src, dst, times, num_rows, num_buckets, factor, relations);
|
107
|
+
return fit_predict(src, dst, times, num_rows, num_buckets, factor, threshold, relations, seed);
|
92
108
|
})
|
93
109
|
.define_method(
|
94
110
|
"_fit_predict_file",
|
95
|
-
*[](std::string input, int num_rows, int num_buckets,
|
96
|
-
vector<int> src, dst, times;
|
111
|
+
*[](std::string input, int num_rows, int num_buckets, float factor, float threshold, bool relations, bool directed, int seed) {
|
112
|
+
std::vector<int> src, dst, times;
|
97
113
|
load_file(src, dst, times, input, !directed);
|
98
|
-
return fit_predict(src, dst, times, num_rows, num_buckets, factor, relations);
|
114
|
+
return fit_predict(src, dst, times, num_rows, num_buckets, factor, threshold, relations, seed);
|
99
115
|
});
|
100
116
|
}
|
data/ext/midas/extconf.rb
CHANGED
@@ -1,14 +1,8 @@
|
|
1
1
|
require "mkmf-rice"
|
2
2
|
|
3
|
-
|
3
|
+
$CXXFLAGS << " -std=c++11"
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
ext = File.expand_path(".", __dir__)
|
8
|
-
midas = File.expand_path("../../vendor/midas", __dir__)
|
9
|
-
|
10
|
-
$srcs = Dir["{#{ext},#{midas}}/*.{cc,cpp}"]
|
5
|
+
midas = File.expand_path("../../vendor/MIDAS/src", __dir__)
|
11
6
|
$INCFLAGS << " -I#{midas}"
|
12
|
-
$VPATH << midas
|
13
7
|
|
14
8
|
create_makefile("midas/ext")
|
data/lib/midas/detector.rb
CHANGED
@@ -1,25 +1,28 @@
|
|
1
1
|
module Midas
|
2
2
|
class Detector
|
3
|
-
def initialize(rows: 2, buckets: 769, alpha: 0.
|
3
|
+
def initialize(rows: 2, buckets: 769, alpha: 0.5, threshold: nil, relations: true, directed: true, seed: 0)
|
4
4
|
@rows = rows
|
5
5
|
@buckets = buckets
|
6
6
|
@alpha = alpha
|
7
|
+
@threshold = threshold
|
7
8
|
@relations = relations
|
8
9
|
@directed = directed
|
10
|
+
@seed = seed
|
9
11
|
end
|
10
12
|
|
11
13
|
def fit_predict(x)
|
14
|
+
threshold = @threshold || Float::NAN
|
12
15
|
result =
|
13
16
|
if x.is_a?(String)
|
14
|
-
_fit_predict_file(x, @rows, @buckets, @alpha, @relations, @directed)
|
17
|
+
_fit_predict_file(x, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
|
15
18
|
else
|
16
19
|
x = Numo::Int32.cast(x) unless x.is_a?(Numo::NArray)
|
17
20
|
x = x.cast_to(Numo::Int32) unless x.is_a?(Numo::Int32)
|
18
21
|
raise ArgumentError, "Bad shape: #{x.shape}" unless x.rank == 2 && x.shape[1] == 3
|
19
|
-
_fit_predict_str(x.to_binary, @rows, @buckets, @alpha, @relations, @directed)
|
22
|
+
_fit_predict_str(x.to_binary, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
|
20
23
|
end
|
21
24
|
|
22
|
-
Numo::
|
25
|
+
Numo::SFloat.from_binary(result)
|
23
26
|
end
|
24
27
|
end
|
25
28
|
end
|
data/lib/midas/version.rb
CHANGED
data/vendor/MIDAS/LICENSE
CHANGED
@@ -174,28 +174,3 @@
|
|
174
174
|
of your accepting any such warranty or additional liability.
|
175
175
|
|
176
176
|
END OF TERMS AND CONDITIONS
|
177
|
-
|
178
|
-
APPENDIX: How to apply the Apache License to your work.
|
179
|
-
|
180
|
-
To apply the Apache License to your work, attach the following
|
181
|
-
boilerplate notice, with the fields enclosed by brackets "[]"
|
182
|
-
replaced with your own identifying information. (Don't include
|
183
|
-
the brackets!) The text should be enclosed in the appropriate
|
184
|
-
comment syntax for the file format. We also recommend that a
|
185
|
-
file or class name and description of purpose be included on the
|
186
|
-
same "printed page" as the copyright notice for easier
|
187
|
-
identification within third-party archives.
|
188
|
-
|
189
|
-
Copyright [yyyy] [name of copyright owner]
|
190
|
-
|
191
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
192
|
-
you may not use this file except in compliance with the License.
|
193
|
-
You may obtain a copy of the License at
|
194
|
-
|
195
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
196
|
-
|
197
|
-
Unless required by applicable law or agreed to in writing, software
|
198
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
199
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200
|
-
See the License for the specific language governing permissions and
|
201
|
-
limitations under the License.
|
data/vendor/MIDAS/README.md
CHANGED
@@ -1,66 +1,211 @@
|
|
1
1
|
# MIDAS
|
2
|
-
[](https://aaai.org/Conferences/AAAI-20/)
|
3
|
-
[](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midas.pdf)
|
4
|
-
[](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasposter.pdf)
|
5
|
-
[](https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE)
|
6
2
|
|
7
|
-
<p
|
8
|
-
<
|
3
|
+
<p>
|
4
|
+
<a href="https://aaai.org/Conferences/AAAI-20/">
|
5
|
+
<img src="http://img.shields.io/badge/AAAI-2020-red.svg">
|
6
|
+
</a>
|
7
|
+
<a href="https://arxiv.org/pdf/2009.08452.pdf"><img src="http://img.shields.io/badge/Paper-PDF-brightgreen.svg"></a>
|
8
|
+
<a href="https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasslides.pdf">
|
9
|
+
<img src="http://img.shields.io/badge/Slides-PDF-ff9e18.svg">
|
10
|
+
</a>
|
11
|
+
<a href="https://youtu.be/Bd4PyLCHrto">
|
12
|
+
<img src="http://img.shields.io/badge/Talk-Youtube-ff69b4.svg">
|
13
|
+
</a>
|
14
|
+
<a href="https://www.youtube.com/watch?v=DPmN-uPW8qU">
|
15
|
+
<img src="https://img.shields.io/badge/Overview-Youtube-orange.svg">
|
16
|
+
</a>
|
17
|
+
<a href="https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE">
|
18
|
+
<img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg">
|
19
|
+
</a>
|
9
20
|
</p>
|
10
21
|
|
22
|
+
C++ implementation of
|
11
23
|
|
12
|
-
|
24
|
+
- [Real-time Streaming Anomaly Detection in Dynamic Graphs](https://arxiv.org/pdf/2009.08452.pdf). *Siddharth Bhatia, Rui Liu, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. (Under Review)
|
25
|
+
- [MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams](https://arxiv.org/pdf/1911.04464.pdf). *Siddharth Bhatia, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. AAAI 2020.
|
13
26
|
|
14
|
-
|
27
|
+
The old implementation is in another branch `OldImplementation`, it should be considered as being archived and will hardly receive feature updates.
|
15
28
|
|
16
|
-
|
29
|
+

|
17
30
|
|
18
|
-
|
31
|
+
## Table of Contents
|
19
32
|
|
33
|
+
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
|
34
|
+
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
20
35
|
|
21
|
-
## Getting started
|
22
|
-
1. Run `make` to compile code and create the binary.
|
23
|
-
2. Run `./midas -i ` followed by the input file path and name.
|
24
|
-
3. Run `make clean` to clean binaries.
|
25
36
|
|
37
|
+
- [Features](#features)
|
38
|
+
- [Demo](#demo)
|
39
|
+
- [Customization](#customization)
|
40
|
+
- [Other Files](#other-files)
|
41
|
+
- [In Other Languages](#in-other-languages)
|
42
|
+
- [Online Coverage](#online-coverage)
|
43
|
+
- [Citation](#citation)
|
44
|
+
|
45
|
+
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
|
46
|
+
|
47
|
+
## Features
|
48
|
+
|
49
|
+
- Finds Anomalies in Dynamic/Time-Evolving Graph: (Intrusion Detection, Fake Ratings, Financial Fraud)
|
50
|
+
- Detects Microcluster Anomalies (suddenly arriving groups of suspiciously similar edges e.g. DoS attack)
|
51
|
+
- Theoretical Guarantees on False Positive Probability
|
52
|
+
- Constant Memory (independent of graph size)
|
53
|
+
- Constant Update Time (real-time anomaly detection to minimize harm)
|
54
|
+
- Up to 55% more accurate and 929 times faster than the state of the art approaches
|
55
|
+
- Experiments are performed using the following datasets:
|
56
|
+
- [DARPA](https://www.ll.mit.edu/r-d/datasets/1998-darpa-intrusion-detection-evaluation-dataset)
|
57
|
+
- [TwitterWorldCup2014](http://odds.cs.stonybrook.edu/twitterworldcup2014-dataset)
|
58
|
+
- [TwitterSecurity](http://odds.cs.stonybrook.edu/twittersecurity-dataset)
|
26
59
|
|
27
60
|
## Demo
|
28
|
-
1. Run `./demo.sh` to compile the code and run it on example dataset.
|
29
61
|
|
62
|
+
If you use Windows:
|
63
|
+
|
64
|
+
1. Open a Visual Studio developer command prompt, we want their toolchain
|
65
|
+
1. `cd` to the project root `MIDAS/`
|
66
|
+
1. `cmake -DCMAKE_BUILD_TYPE=Release -GNinja -S . -B build/release`
|
67
|
+
1. `cmake --build build/release --target Demo`
|
68
|
+
1. `cd` to `MIDAS/build/release/`
|
69
|
+
1. `.\Demo.exe`
|
70
|
+
|
71
|
+
If you use Linux/macOS:
|
72
|
+
|
73
|
+
1. Open a terminal
|
74
|
+
1. `cd` to the project root `MIDAS/`
|
75
|
+
1. `cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/release`
|
76
|
+
1. `cmake --build build/release --target Demo`
|
77
|
+
1. `cd` to `MIDAS/build/release/`
|
78
|
+
1. `./Demo`
|
79
|
+
|
80
|
+
The demo runs on `MIDAS/data/DARPA/darpa_processed.csv`, which has 4.5M records, with the filtering core (MIDAS-F).
|
81
|
+
|
82
|
+
The scores will be exported to `MIDAS/temp/Score.txt`, higher means more anomalous.
|
83
|
+
|
84
|
+
All file paths are absolute and "hardcoded" by CMake, but it's suggested NOT to run by double clicking on the executable file.
|
85
|
+
|
86
|
+
### Requirements
|
87
|
+
|
88
|
+
Core
|
89
|
+
- C++11
|
90
|
+
- C++ standard libraries
|
91
|
+
|
92
|
+
Demo
|
93
|
+
- Python 3 (`MIDAS/util/EvaluateScore.py`)
|
94
|
+
- `pandas`: I/O
|
95
|
+
- `scikit-learn`: Compute ROC-AUC
|
96
|
+
|
97
|
+
Experiment
|
98
|
+
- (Optional) Intel TBB: Parallelization
|
99
|
+
- (Optional) OpenMP: Parallelization
|
100
|
+
|
101
|
+
Other python utility scripts
|
102
|
+
- Python 3
|
103
|
+
- `pandas`
|
104
|
+
- `scikit-learn`
|
105
|
+
|
106
|
+
## Customization
|
107
|
+
|
108
|
+
### Switch Cores
|
109
|
+
|
110
|
+
Cores are instantiated at `MIDAS/example/Demo.cpp:67-69`, uncomment the chosen one.
|
111
|
+
|
112
|
+
### Custom Dataset + `Demo.cpp`
|
113
|
+
|
114
|
+
You need to prepare three files:
|
30
115
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
116
|
+
- Meta file
|
117
|
+
- Only includes an integer `N`, the number of records in the dataset
|
118
|
+
- Use its path for `pathMeta`
|
119
|
+
- E.g. `MIDAS/data/DARPA/darpa_shape.txt`
|
120
|
+
- Data file
|
121
|
+
- A header-less csv format file of shape `[N,3]`
|
122
|
+
- Columns are sources, destinations, timestamps
|
123
|
+
- Use its path for `pathData`
|
124
|
+
- E.g. `MIDAS/data/DARPA/darpa_processed.csv`
|
125
|
+
- Label file
|
126
|
+
- A header-less csv format file of shape `[N,1]`
|
127
|
+
- The corresponding label for data records
|
128
|
+
- 0 means normal record
|
129
|
+
- 1 means anomalous record
|
130
|
+
- Use its path for `pathGroundTruth`
|
131
|
+
- E.g. `MIDAS/data/DARPA/darpa_ground_truth.csv`
|
40
132
|
|
133
|
+
### Custom Dataset + Custom Runner
|
41
134
|
|
42
|
-
|
43
|
-
|
44
|
-
1. `
|
45
|
-
2. `destination (int)`: destination ID of the edge
|
46
|
-
3. `time (int)`: time stamp of the edge
|
135
|
+
1. Include the header `MIDAS/src/NormalCore.hpp`, `MIDAS/src/RelationalCore.hpp` or `MIDAS/src/FilteringCore.hpp`
|
136
|
+
1. Instantiate cores with required parameters
|
137
|
+
1. Call `operator()` on individual data records, it returns the anomaly score for the input record
|
47
138
|
|
48
|
-
|
139
|
+
## Other Files
|
49
140
|
|
141
|
+
### `example/`
|
50
142
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
143
|
+
#### `Experiment.cpp`
|
144
|
+
|
145
|
+
The code we used for experiments.
|
146
|
+
It will try to use Intel TBB or OpenMP for parallelization.
|
147
|
+
You should comment all but only one runner function call in the `main()` as most results are exported to `MIDAS/temp/Experiiment.csv` together with many intermediate files.
|
148
|
+
|
149
|
+
#### `Reproducible.cpp`
|
150
|
+
|
151
|
+
Similar to `Demo.cpp`, but with all random parameters hardcoded and always produce the same result.
|
152
|
+
It's for other developers and us to test if the implementation in other languages can produce acceptable results.
|
153
|
+
|
154
|
+
### `util/`
|
155
|
+
|
156
|
+
`DeleteTempFile.py`, `EvaluateScore.py` and `ReproduceROC.py` will show their usage and a short description when executed without any argument.
|
157
|
+
|
158
|
+
#### `PreprocessData.py`
|
159
|
+
|
160
|
+
The code to process the raw dataset into an easy-to-read format.
|
161
|
+
Datasets are always assumed to be in a folder in `MIDAS/data/`.
|
162
|
+
It can process the following dataset(s)
|
163
|
+
|
164
|
+
- `DARPA/darpa_original.csv` -> `DARPA/darpa_processed.csv`, `DARPA/darpa_ground_truth.csv`, `DARPA/darpa_shape.txt`
|
165
|
+
|
166
|
+
## In Other Languages
|
167
|
+
|
168
|
+
1. Python: [Rui Liu's MIDAS.Python](https://github.com/liurui39660/MIDAS.Python), [Ritesh Kumar's pyMIDAS](https://github.com/ritesh99rakesh/pyMIDAS)
|
169
|
+
1. Golang: [Steve Tan's midas](https://github.com/steve0hh/midas)
|
170
|
+
1. Ruby: [Andrew Kane's midas](https://github.com/ankane/midas)
|
171
|
+
1. Rust: [Scott Steele's midas_rs](https://github.com/scooter-dangle/midas_rs)
|
172
|
+
1. R: [Tobias Heidler's MIDASwrappeR](https://github.com/pteridin/MIDASwrappeR)
|
173
|
+
1. Java: [Joshua Tokle's MIDAS-Java](https://github.com/jotok/MIDAS-Java)
|
174
|
+
1. Julia: [Ashrya Agrawal's MIDAS.jl](https://github.com/ashryaagr/MIDAS.jl)
|
175
|
+
|
176
|
+
## Online Coverage
|
177
|
+
|
178
|
+
1. [ACM TechNews](https://technews.acm.org/archives.cfm?fo=2020-05-may/may-06-2020.html)
|
179
|
+
1. [AIhub](https://aihub.org/2020/05/01/interview-with-siddharth-bhatia-a-new-approach-for-anomaly-detection/)
|
180
|
+
1. [Hacker News](https://news.ycombinator.com/item?id=22802604)
|
181
|
+
1. [KDnuggets](https://www.kdnuggets.com/2020/04/midas-new-baseline-anomaly-detection-graphs.html)
|
182
|
+
1. [Microsoft](https://techcommunity.microsoft.com/t5/azure-sentinel/announcing-the-azure-sentinel-hackathon-winners/ba-p/1548240)
|
183
|
+
1. [Towards Data Science](https://towardsdatascience.com/controlling-fake-news-using-graphs-and-statistics-31ed116a986f)
|
55
184
|
|
56
185
|
## Citation
|
57
|
-
|
186
|
+
|
187
|
+
If you use this code for your research, please consider citing our arXiv preprint
|
188
|
+
|
189
|
+
```bibtex
|
190
|
+
@misc{bhatia2020realtime,
|
191
|
+
title={Real-Time Streaming Anomaly Detection in Dynamic Graphs},
|
192
|
+
author={Siddharth Bhatia and Rui Liu and Bryan Hooi and Minji Yoon and Kijung Shin and Christos Faloutsos},
|
193
|
+
year={2020},
|
194
|
+
eprint={2009.08452},
|
195
|
+
archivePrefix={arXiv},
|
196
|
+
primaryClass={cs.LG}
|
197
|
+
}
|
58
198
|
|
59
199
|
```
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
200
|
+
|
201
|
+
or our AAAI paper
|
202
|
+
|
203
|
+
|
204
|
+
```bibtex
|
205
|
+
@inproceedings{bhatia2020midas,
|
206
|
+
title="MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams",
|
207
|
+
author="Siddharth {Bhatia} and Bryan {Hooi} and Minji {Yoon} and Kijung {Shin} and Christos {Faloutsos}",
|
208
|
+
booktitle="AAAI 2020 : The Thirty-Fourth AAAI Conference on Artificial Intelligence",
|
209
|
+
year="2020"
|
65
210
|
}
|
66
211
|
```
|