midas-edge 0.1.0 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/NOTICE.txt +1 -0
- data/README.md +10 -7
- data/ext/midas/ext.cpp +32 -16
- data/ext/midas/extconf.rb +2 -8
- data/lib/midas/detector.rb +7 -4
- data/lib/midas/version.rb +1 -1
- data/vendor/MIDAS/LICENSE +0 -25
- data/vendor/MIDAS/README.md +185 -40
- data/vendor/MIDAS/src/CountMinSketch.hpp +105 -0
- data/vendor/MIDAS/src/FilteringCore.hpp +98 -0
- data/vendor/MIDAS/src/NormalCore.hpp +53 -0
- data/vendor/MIDAS/src/RelationalCore.hpp +79 -0
- metadata +11 -16
- data/lib/midas/ext.bundle +0 -0
- data/vendor/MIDAS/anom.cpp +0 -88
- data/vendor/MIDAS/anom.hpp +0 -10
- data/vendor/MIDAS/argparse.hpp +0 -539
- data/vendor/MIDAS/edgehash.cpp +0 -63
- data/vendor/MIDAS/edgehash.hpp +0 -25
- data/vendor/MIDAS/main.cpp +0 -127
- data/vendor/MIDAS/nodehash.cpp +0 -63
- data/vendor/MIDAS/nodehash.hpp +0 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71b32a1851488ba989ccc792af8922d1148f0c0f4585574821ea1759c908ad41
|
4
|
+
data.tar.gz: a4b9105e96a3407708d8e64a31c33e93dbbb8c9127c1f99f802044cdd4aa6818
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28149251ec92a2b24e07c7745ad0b759c23ff4d957fa56298d6c6a92384f798e0651086a524b3b95e7911ed5500405929ef6627059429d41f3126381b73e479e
|
7
|
+
data.tar.gz: 2146d83339cd7c466d053f82f937a44ceb16d609d31034cd740b90425601ea12861e9bd1377a8696c5655af929a8dc1201fae6e79bed3addfd96125931e2f674
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,27 @@
|
|
1
|
+
## 0.2.3 (2020-11-17)
|
2
|
+
|
3
|
+
- Updated MIDAS to 1.1.2
|
4
|
+
|
5
|
+
## 0.2.2 (2020-09-23)
|
6
|
+
|
7
|
+
- Updated MIDAS to 1.1.0
|
8
|
+
|
9
|
+
## 0.2.1 (2020-06-17)
|
10
|
+
|
11
|
+
- Fixed installation (missing header files)
|
12
|
+
|
13
|
+
## 0.2.0 (2020-06-17)
|
14
|
+
|
15
|
+
- Updated MIDAS to 1.0.0
|
16
|
+
- Added `threshold` option
|
17
|
+
- Added `seed` option
|
18
|
+
- Changed default `alpha` to 0.5
|
19
|
+
- Fixed reading data from files with `directed: false`
|
20
|
+
|
21
|
+
## 0.1.1 (2020-02-19)
|
22
|
+
|
23
|
+
- Fixed installation on Linux
|
24
|
+
|
1
25
|
## 0.1.0 (2020-02-17)
|
2
26
|
|
3
27
|
- First release
|
data/NOTICE.txt
CHANGED
data/README.md
CHANGED
@@ -2,13 +2,14 @@
|
|
2
2
|
|
3
3
|
[MIDAS](https://github.com/bhatiasiddharth/MIDAS) - edge stream anomaly detection - for Ruby
|
4
4
|
|
5
|
+
[![Build Status](https://travis-ci.org/ankane/midas.svg?branch=master)](https://travis-ci.org/ankane/midas)
|
6
|
+
|
5
7
|
## Installation
|
6
8
|
|
7
|
-
Add
|
9
|
+
Add these lines to your application’s Gemfile:
|
8
10
|
|
9
11
|
```ruby
|
10
12
|
gem 'midas-edge'
|
11
|
-
gem 'rice', github: 'jasonroelofs/rice' # for now for c++17
|
12
13
|
```
|
13
14
|
|
14
15
|
## Getting Started
|
@@ -41,9 +42,11 @@ Pass parameters - default values below
|
|
41
42
|
Midas.new(
|
42
43
|
rows: 2, # number of hash functions
|
43
44
|
buckets: 769, # number of buckets
|
44
|
-
alpha: 0.
|
45
|
+
alpha: 0.5, # temporal decay factor
|
46
|
+
threshold: nil, # todo
|
45
47
|
relations: true, # whether to use MIDAS-R or MIDAS
|
46
|
-
directed: true
|
48
|
+
directed: true, # treat the graph as directed or undirected
|
49
|
+
seed: 0 # random seed
|
47
50
|
)
|
48
51
|
```
|
49
52
|
|
@@ -55,10 +58,10 @@ Data can be an array of arrays
|
|
55
58
|
[[1, 2, 3], [4, 5, 6]]
|
56
59
|
```
|
57
60
|
|
58
|
-
Or a Numo
|
61
|
+
Or a Numo array
|
59
62
|
|
60
63
|
```ruby
|
61
|
-
Numo::
|
64
|
+
Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
|
62
65
|
```
|
63
66
|
|
64
67
|
## Performance
|
@@ -89,7 +92,7 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
|
|
89
92
|
To get started with development:
|
90
93
|
|
91
94
|
```sh
|
92
|
-
git clone https://github.com/ankane/midas.git
|
95
|
+
git clone --recursive https://github.com/ankane/midas.git
|
93
96
|
cd midas
|
94
97
|
bundle install
|
95
98
|
bundle exec rake compile
|
data/ext/midas/ext.cpp
CHANGED
@@ -3,7 +3,9 @@
|
|
3
3
|
#include <vector>
|
4
4
|
|
5
5
|
// midas
|
6
|
-
#include <
|
6
|
+
#include <FilteringCore.hpp>
|
7
|
+
#include <NormalCore.hpp>
|
8
|
+
#include <RelationalCore.hpp>
|
7
9
|
|
8
10
|
// rice
|
9
11
|
#include <rice/Module.hpp>
|
@@ -14,7 +16,7 @@ using Rice::String;
|
|
14
16
|
using Rice::define_module;
|
15
17
|
using Rice::define_class_under;
|
16
18
|
|
17
|
-
void load_str(vector<int>& src, vector<int>& dst, vector<int>& times, std::string input, bool directed) {
|
19
|
+
void load_str(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, std::string input, bool directed) {
|
18
20
|
int* input_ptr = (int*) input.data();
|
19
21
|
size_t n = input.size() / sizeof(int);
|
20
22
|
|
@@ -39,7 +41,7 @@ void load_str(vector<int>& src, vector<int>& dst, vector<int>& times, std::strin
|
|
39
41
|
// load_data from main.cpp
|
40
42
|
// modified to throw std::runtime_error when cannot find file
|
41
43
|
// instead of exiting
|
42
|
-
void load_file(vector<int>& src, vector<int>& dst, vector<int>& times, std::string input_file, bool undirected)
|
44
|
+
void load_file(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, std::string input_file, bool undirected)
|
43
45
|
{
|
44
46
|
FILE* infile = fopen(input_file.c_str(), "r");
|
45
47
|
if (infile == NULL) {
|
@@ -56,7 +58,7 @@ void load_file(vector<int>& src, vector<int>& dst, vector<int>& times, std::stri
|
|
56
58
|
}
|
57
59
|
}
|
58
60
|
else {
|
59
|
-
while (fscanf(infile, "%d
|
61
|
+
while (fscanf(infile, "%d,%d,%d", &s, &d, &t) == 3) {
|
60
62
|
src.push_back(s);
|
61
63
|
dst.push_back(d);
|
62
64
|
times.push_back(t);
|
@@ -67,14 +69,28 @@ void load_file(vector<int>& src, vector<int>& dst, vector<int>& times, std::stri
|
|
67
69
|
}
|
68
70
|
}
|
69
71
|
|
70
|
-
std::string fit_predict(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets,
|
71
|
-
|
72
|
-
|
73
|
-
|
72
|
+
std::string fit_predict(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, int num_rows, int num_buckets, float factor, float threshold, bool relations, int seed) {
|
73
|
+
srand(seed);
|
74
|
+
size_t n = src.size();
|
75
|
+
const auto result = new float[n];
|
76
|
+
|
77
|
+
if (!std::isnan(threshold)) {
|
78
|
+
MIDAS::FilteringCore midas(num_rows, num_buckets, threshold, factor);
|
79
|
+
for (size_t i = 0; i < n; i++) {
|
80
|
+
result[i] = midas(src[i], dst[i], times[i]);
|
81
|
+
}
|
82
|
+
} else if (relations) {
|
83
|
+
MIDAS::RelationalCore midas(num_rows, num_buckets, factor);
|
84
|
+
for (size_t i = 0; i < n; i++) {
|
85
|
+
result[i] = midas(src[i], dst[i], times[i]);
|
86
|
+
}
|
74
87
|
} else {
|
75
|
-
|
88
|
+
MIDAS::NormalCore midas(num_rows, num_buckets);
|
89
|
+
for (size_t i = 0; i < n; i++) {
|
90
|
+
result[i] = midas(src[i], dst[i], times[i]);
|
91
|
+
}
|
76
92
|
}
|
77
|
-
return std::string((char*) result
|
93
|
+
return std::string((char*) result, sizeof(float) / sizeof(char) * n);
|
78
94
|
}
|
79
95
|
|
80
96
|
extern "C"
|
@@ -85,16 +101,16 @@ void Init_ext()
|
|
85
101
|
define_class_under(rb_mMidas, "Detector")
|
86
102
|
.define_method(
|
87
103
|
"_fit_predict_str",
|
88
|
-
*[](std::string input, int num_rows, int num_buckets,
|
89
|
-
vector<int> src, dst, times;
|
104
|
+
*[](std::string input, int num_rows, int num_buckets, float factor, float threshold, bool relations, bool directed, int seed) {
|
105
|
+
std::vector<int> src, dst, times;
|
90
106
|
load_str(src, dst, times, input, directed);
|
91
|
-
return fit_predict(src, dst, times, num_rows, num_buckets, factor, relations);
|
107
|
+
return fit_predict(src, dst, times, num_rows, num_buckets, factor, threshold, relations, seed);
|
92
108
|
})
|
93
109
|
.define_method(
|
94
110
|
"_fit_predict_file",
|
95
|
-
*[](std::string input, int num_rows, int num_buckets,
|
96
|
-
vector<int> src, dst, times;
|
111
|
+
*[](std::string input, int num_rows, int num_buckets, float factor, float threshold, bool relations, bool directed, int seed) {
|
112
|
+
std::vector<int> src, dst, times;
|
97
113
|
load_file(src, dst, times, input, !directed);
|
98
|
-
return fit_predict(src, dst, times, num_rows, num_buckets, factor, relations);
|
114
|
+
return fit_predict(src, dst, times, num_rows, num_buckets, factor, threshold, relations, seed);
|
99
115
|
});
|
100
116
|
}
|
data/ext/midas/extconf.rb
CHANGED
@@ -1,14 +1,8 @@
|
|
1
1
|
require "mkmf-rice"
|
2
2
|
|
3
|
-
|
3
|
+
$CXXFLAGS << " -std=c++11"
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
ext = File.expand_path(".", __dir__)
|
8
|
-
midas = File.expand_path("../../vendor/midas", __dir__)
|
9
|
-
|
10
|
-
$srcs = Dir["{#{ext},#{midas}}/*.{cc,cpp}"]
|
5
|
+
midas = File.expand_path("../../vendor/MIDAS/src", __dir__)
|
11
6
|
$INCFLAGS << " -I#{midas}"
|
12
|
-
$VPATH << midas
|
13
7
|
|
14
8
|
create_makefile("midas/ext")
|
data/lib/midas/detector.rb
CHANGED
@@ -1,25 +1,28 @@
|
|
1
1
|
module Midas
|
2
2
|
class Detector
|
3
|
-
def initialize(rows: 2, buckets: 769, alpha: 0.
|
3
|
+
def initialize(rows: 2, buckets: 769, alpha: 0.5, threshold: nil, relations: true, directed: true, seed: 0)
|
4
4
|
@rows = rows
|
5
5
|
@buckets = buckets
|
6
6
|
@alpha = alpha
|
7
|
+
@threshold = threshold
|
7
8
|
@relations = relations
|
8
9
|
@directed = directed
|
10
|
+
@seed = seed
|
9
11
|
end
|
10
12
|
|
11
13
|
def fit_predict(x)
|
14
|
+
threshold = @threshold || Float::NAN
|
12
15
|
result =
|
13
16
|
if x.is_a?(String)
|
14
|
-
_fit_predict_file(x, @rows, @buckets, @alpha, @relations, @directed)
|
17
|
+
_fit_predict_file(x, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
|
15
18
|
else
|
16
19
|
x = Numo::Int32.cast(x) unless x.is_a?(Numo::NArray)
|
17
20
|
x = x.cast_to(Numo::Int32) unless x.is_a?(Numo::Int32)
|
18
21
|
raise ArgumentError, "Bad shape: #{x.shape}" unless x.rank == 2 && x.shape[1] == 3
|
19
|
-
_fit_predict_str(x.to_binary, @rows, @buckets, @alpha, @relations, @directed)
|
22
|
+
_fit_predict_str(x.to_binary, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
|
20
23
|
end
|
21
24
|
|
22
|
-
Numo::
|
25
|
+
Numo::SFloat.from_binary(result)
|
23
26
|
end
|
24
27
|
end
|
25
28
|
end
|
data/lib/midas/version.rb
CHANGED
data/vendor/MIDAS/LICENSE
CHANGED
@@ -174,28 +174,3 @@
|
|
174
174
|
of your accepting any such warranty or additional liability.
|
175
175
|
|
176
176
|
END OF TERMS AND CONDITIONS
|
177
|
-
|
178
|
-
APPENDIX: How to apply the Apache License to your work.
|
179
|
-
|
180
|
-
To apply the Apache License to your work, attach the following
|
181
|
-
boilerplate notice, with the fields enclosed by brackets "[]"
|
182
|
-
replaced with your own identifying information. (Don't include
|
183
|
-
the brackets!) The text should be enclosed in the appropriate
|
184
|
-
comment syntax for the file format. We also recommend that a
|
185
|
-
file or class name and description of purpose be included on the
|
186
|
-
same "printed page" as the copyright notice for easier
|
187
|
-
identification within third-party archives.
|
188
|
-
|
189
|
-
Copyright [yyyy] [name of copyright owner]
|
190
|
-
|
191
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
192
|
-
you may not use this file except in compliance with the License.
|
193
|
-
You may obtain a copy of the License at
|
194
|
-
|
195
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
196
|
-
|
197
|
-
Unless required by applicable law or agreed to in writing, software
|
198
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
199
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200
|
-
See the License for the specific language governing permissions and
|
201
|
-
limitations under the License.
|
data/vendor/MIDAS/README.md
CHANGED
@@ -1,66 +1,211 @@
|
|
1
1
|
# MIDAS
|
2
|
-
[![Conference](http://img.shields.io/badge/AAAI-2020-red.svg)](https://aaai.org/Conferences/AAAI-20/)
|
3
|
-
[![Paper](http://img.shields.io/badge/Paper-pdf-brightgreen.svg)](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midas.pdf)
|
4
|
-
[![Poster](http://img.shields.io/badge/Poster-pdf-blueviolet.svg)](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasposter.pdf)
|
5
|
-
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE)
|
6
2
|
|
7
|
-
<p
|
8
|
-
<
|
3
|
+
<p>
|
4
|
+
<a href="https://aaai.org/Conferences/AAAI-20/">
|
5
|
+
<img src="http://img.shields.io/badge/AAAI-2020-red.svg">
|
6
|
+
</a>
|
7
|
+
<a href="https://arxiv.org/pdf/2009.08452.pdf"><img src="http://img.shields.io/badge/Paper-PDF-brightgreen.svg"></a>
|
8
|
+
<a href="https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasslides.pdf">
|
9
|
+
<img src="http://img.shields.io/badge/Slides-PDF-ff9e18.svg">
|
10
|
+
</a>
|
11
|
+
<a href="https://youtu.be/Bd4PyLCHrto">
|
12
|
+
<img src="http://img.shields.io/badge/Talk-Youtube-ff69b4.svg">
|
13
|
+
</a>
|
14
|
+
<a href="https://www.youtube.com/watch?v=DPmN-uPW8qU">
|
15
|
+
<img src="https://img.shields.io/badge/Overview-Youtube-orange.svg">
|
16
|
+
</a>
|
17
|
+
<a href="https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE">
|
18
|
+
<img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg">
|
19
|
+
</a>
|
9
20
|
</p>
|
10
21
|
|
22
|
+
C++ implementation of
|
11
23
|
|
12
|
-
|
24
|
+
- [Real-time Streaming Anomaly Detection in Dynamic Graphs](https://arxiv.org/pdf/2009.08452.pdf). *Siddharth Bhatia, Rui Liu, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. (Under Review)
|
25
|
+
- [MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams](https://arxiv.org/pdf/1911.04464.pdf). *Siddharth Bhatia, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. AAAI 2020.
|
13
26
|
|
14
|
-
|
27
|
+
The old implementation is in another branch `OldImplementation`, it should be considered as being archived and will hardly receive feature updates.
|
15
28
|
|
16
|
-
|
29
|
+
![](asset/Intro.png)
|
17
30
|
|
18
|
-
|
31
|
+
## Table of Contents
|
19
32
|
|
33
|
+
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
|
34
|
+
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
20
35
|
|
21
|
-
## Getting started
|
22
|
-
1. Run `make` to compile code and create the binary.
|
23
|
-
2. Run `./midas -i ` followed by the input file path and name.
|
24
|
-
3. Run `make clean` to clean binaries.
|
25
36
|
|
37
|
+
- [Features](#features)
|
38
|
+
- [Demo](#demo)
|
39
|
+
- [Customization](#customization)
|
40
|
+
- [Other Files](#other-files)
|
41
|
+
- [In Other Languages](#in-other-languages)
|
42
|
+
- [Online Coverage](#online-coverage)
|
43
|
+
- [Citation](#citation)
|
44
|
+
|
45
|
+
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
|
46
|
+
|
47
|
+
## Features
|
48
|
+
|
49
|
+
- Finds Anomalies in Dynamic/Time-Evolving Graph: (Intrusion Detection, Fake Ratings, Financial Fraud)
|
50
|
+
- Detects Microcluster Anomalies (suddenly arriving groups of suspiciously similar edges e.g. DoS attack)
|
51
|
+
- Theoretical Guarantees on False Positive Probability
|
52
|
+
- Constant Memory (independent of graph size)
|
53
|
+
- Constant Update Time (real-time anomaly detection to minimize harm)
|
54
|
+
- Up to 55% more accurate and 929 times faster than the state of the art approaches
|
55
|
+
- Experiments are performed using the following datasets:
|
56
|
+
- [DARPA](https://www.ll.mit.edu/r-d/datasets/1998-darpa-intrusion-detection-evaluation-dataset)
|
57
|
+
- [TwitterWorldCup2014](http://odds.cs.stonybrook.edu/twitterworldcup2014-dataset)
|
58
|
+
- [TwitterSecurity](http://odds.cs.stonybrook.edu/twittersecurity-dataset)
|
26
59
|
|
27
60
|
## Demo
|
28
|
-
1. Run `./demo.sh` to compile the code and run it on example dataset.
|
29
61
|
|
62
|
+
If you use Windows:
|
63
|
+
|
64
|
+
1. Open a Visual Studio developer command prompt, we want their toolchain
|
65
|
+
1. `cd` to the project root `MIDAS/`
|
66
|
+
1. `cmake -DCMAKE_BUILD_TYPE=Release -GNinja -S . -B build/release`
|
67
|
+
1. `cmake --build build/release --target Demo`
|
68
|
+
1. `cd` to `MIDAS/build/release/`
|
69
|
+
1. `.\Demo.exe`
|
70
|
+
|
71
|
+
If you use Linux/macOS:
|
72
|
+
|
73
|
+
1. Open a terminal
|
74
|
+
1. `cd` to the project root `MIDAS/`
|
75
|
+
1. `cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/release`
|
76
|
+
1. `cmake --build build/release --target Demo`
|
77
|
+
1. `cd` to `MIDAS/build/release/`
|
78
|
+
1. `./Demo`
|
79
|
+
|
80
|
+
The demo runs on `MIDAS/data/DARPA/darpa_processed.csv`, which has 4.5M records, with the filtering core (MIDAS-F).
|
81
|
+
|
82
|
+
The scores will be exported to `MIDAS/temp/Score.txt`, higher means more anomalous.
|
83
|
+
|
84
|
+
All file paths are absolute and "hardcoded" by CMake, but it's suggested NOT to run by double clicking on the executable file.
|
85
|
+
|
86
|
+
### Requirements
|
87
|
+
|
88
|
+
Core
|
89
|
+
- C++11
|
90
|
+
- C++ standard libraries
|
91
|
+
|
92
|
+
Demo
|
93
|
+
- Python 3 (`MIDAS/util/EvaluateScore.py`)
|
94
|
+
- `pandas`: I/O
|
95
|
+
- `scikit-learn`: Compute ROC-AUC
|
96
|
+
|
97
|
+
Experiment
|
98
|
+
- (Optional) Intel TBB: Parallelization
|
99
|
+
- (Optional) OpenMP: Parallelization
|
100
|
+
|
101
|
+
Other python utility scripts
|
102
|
+
- Python 3
|
103
|
+
- `pandas`
|
104
|
+
- `scikit-learn`
|
105
|
+
|
106
|
+
## Customization
|
107
|
+
|
108
|
+
### Switch Cores
|
109
|
+
|
110
|
+
Cores are instantiated at `MIDAS/example/Demo.cpp:67-69`, uncomment the chosen one.
|
111
|
+
|
112
|
+
### Custom Dataset + `Demo.cpp`
|
113
|
+
|
114
|
+
You need to prepare three files:
|
30
115
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
116
|
+
- Meta file
|
117
|
+
- Only includes an integer `N`, the number of records in the dataset
|
118
|
+
- Use its path for `pathMeta`
|
119
|
+
- E.g. `MIDAS/data/DARPA/darpa_shape.txt`
|
120
|
+
- Data file
|
121
|
+
- A header-less csv format file of shape `[N,3]`
|
122
|
+
- Columns are sources, destinations, timestamps
|
123
|
+
- Use its path for `pathData`
|
124
|
+
- E.g. `MIDAS/data/DARPA/darpa_processed.csv`
|
125
|
+
- Label file
|
126
|
+
- A header-less csv format file of shape `[N,1]`
|
127
|
+
- The corresponding label for data records
|
128
|
+
- 0 means normal record
|
129
|
+
- 1 means anomalous record
|
130
|
+
- Use its path for `pathGroundTruth`
|
131
|
+
- E.g. `MIDAS/data/DARPA/darpa_ground_truth.csv`
|
40
132
|
|
133
|
+
### Custom Dataset + Custom Runner
|
41
134
|
|
42
|
-
|
43
|
-
|
44
|
-
1. `
|
45
|
-
2. `destination (int)`: destination ID of the edge
|
46
|
-
3. `time (int)`: time stamp of the edge
|
135
|
+
1. Include the header `MIDAS/src/NormalCore.hpp`, `MIDAS/src/RelationalCore.hpp` or `MIDAS/src/FilteringCore.hpp`
|
136
|
+
1. Instantiate cores with required parameters
|
137
|
+
1. Call `operator()` on individual data records, it returns the anomaly score for the input record
|
47
138
|
|
48
|
-
|
139
|
+
## Other Files
|
49
140
|
|
141
|
+
### `example/`
|
50
142
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
143
|
+
#### `Experiment.cpp`
|
144
|
+
|
145
|
+
The code we used for experiments.
|
146
|
+
It will try to use Intel TBB or OpenMP for parallelization.
|
147
|
+
You should comment all but only one runner function call in the `main()` as most results are exported to `MIDAS/temp/Experiiment.csv` together with many intermediate files.
|
148
|
+
|
149
|
+
#### `Reproducible.cpp`
|
150
|
+
|
151
|
+
Similar to `Demo.cpp`, but with all random parameters hardcoded and always produce the same result.
|
152
|
+
It's for other developers and us to test if the implementation in other languages can produce acceptable results.
|
153
|
+
|
154
|
+
### `util/`
|
155
|
+
|
156
|
+
`DeleteTempFile.py`, `EvaluateScore.py` and `ReproduceROC.py` will show their usage and a short description when executed without any argument.
|
157
|
+
|
158
|
+
#### `PreprocessData.py`
|
159
|
+
|
160
|
+
The code to process the raw dataset into an easy-to-read format.
|
161
|
+
Datasets are always assumed to be in a folder in `MIDAS/data/`.
|
162
|
+
It can process the following dataset(s)
|
163
|
+
|
164
|
+
- `DARPA/darpa_original.csv` -> `DARPA/darpa_processed.csv`, `DARPA/darpa_ground_truth.csv`, `DARPA/darpa_shape.txt`
|
165
|
+
|
166
|
+
## In Other Languages
|
167
|
+
|
168
|
+
1. Python: [Rui Liu's MIDAS.Python](https://github.com/liurui39660/MIDAS.Python), [Ritesh Kumar's pyMIDAS](https://github.com/ritesh99rakesh/pyMIDAS)
|
169
|
+
1. Golang: [Steve Tan's midas](https://github.com/steve0hh/midas)
|
170
|
+
1. Ruby: [Andrew Kane's midas](https://github.com/ankane/midas)
|
171
|
+
1. Rust: [Scott Steele's midas_rs](https://github.com/scooter-dangle/midas_rs)
|
172
|
+
1. R: [Tobias Heidler's MIDASwrappeR](https://github.com/pteridin/MIDASwrappeR)
|
173
|
+
1. Java: [Joshua Tokle's MIDAS-Java](https://github.com/jotok/MIDAS-Java)
|
174
|
+
1. Julia: [Ashrya Agrawal's MIDAS.jl](https://github.com/ashryaagr/MIDAS.jl)
|
175
|
+
|
176
|
+
## Online Coverage
|
177
|
+
|
178
|
+
1. [ACM TechNews](https://technews.acm.org/archives.cfm?fo=2020-05-may/may-06-2020.html)
|
179
|
+
1. [AIhub](https://aihub.org/2020/05/01/interview-with-siddharth-bhatia-a-new-approach-for-anomaly-detection/)
|
180
|
+
1. [Hacker News](https://news.ycombinator.com/item?id=22802604)
|
181
|
+
1. [KDnuggets](https://www.kdnuggets.com/2020/04/midas-new-baseline-anomaly-detection-graphs.html)
|
182
|
+
1. [Microsoft](https://techcommunity.microsoft.com/t5/azure-sentinel/announcing-the-azure-sentinel-hackathon-winners/ba-p/1548240)
|
183
|
+
1. [Towards Data Science](https://towardsdatascience.com/controlling-fake-news-using-graphs-and-statistics-31ed116a986f)
|
55
184
|
|
56
185
|
## Citation
|
57
|
-
|
186
|
+
|
187
|
+
If you use this code for your research, please consider citing our arXiv preprint
|
188
|
+
|
189
|
+
```bibtex
|
190
|
+
@misc{bhatia2020realtime,
|
191
|
+
title={Real-Time Streaming Anomaly Detection in Dynamic Graphs},
|
192
|
+
author={Siddharth Bhatia and Rui Liu and Bryan Hooi and Minji Yoon and Kijung Shin and Christos Faloutsos},
|
193
|
+
year={2020},
|
194
|
+
eprint={2009.08452},
|
195
|
+
archivePrefix={arXiv},
|
196
|
+
primaryClass={cs.LG}
|
197
|
+
}
|
58
198
|
|
59
199
|
```
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
200
|
+
|
201
|
+
or our AAAI paper
|
202
|
+
|
203
|
+
|
204
|
+
```bibtex
|
205
|
+
@inproceedings{bhatia2020midas,
|
206
|
+
title="MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams",
|
207
|
+
author="Siddharth {Bhatia} and Bryan {Hooi} and Minji {Yoon} and Kijung {Shin} and Christos {Faloutsos}",
|
208
|
+
booktitle="AAAI 2020 : The Thirty-Fourth AAAI Conference on Artificial Intelligence",
|
209
|
+
year="2020"
|
65
210
|
}
|
66
211
|
```
|