midas-edge 0.1.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 51024d37d243a503b528b846e60e46b36b52e4b77a70b14d71feb262a00372d7
4
- data.tar.gz: e2fce29bba281e179123bb558a8deeba5808f9e27dbb2d24cf2ee4423c3632c9
3
+ metadata.gz: 5d4056b117cffbbbf0e5f9f97988789f9b2d53fc78753122ce7a03a45f532eff
4
+ data.tar.gz: f218bd317db9c4b3a792924e4312e35d84233bcb81198ea0a8f2a2551ef83452
5
5
  SHA512:
6
- metadata.gz: 39ee7a3b1228fb63838a121c180b9b9edd2f9a207da18810dbdb24a4335879872c6d265c2706154fa40dcb7414b3cb49b7b6a98efeb34ed0dde0379206a4d131
7
- data.tar.gz: 68c43621d46cc1a4fc6ccdad3f4d24a95428a68da59837f55377556d9a60f20f878a8db83a7e74c6ec35ac869e49aa19810c791db8a4b09c2ed18039dc2d00bd
6
+ metadata.gz: eede2417d3bce099af436eee2fb97497675c7056ae7adf5b496cfaf7e170533242a32f0dcda017c58847eb279af48f2dc1b61bff0bab81e0397250f8200518ed
7
+ data.tar.gz: 5b2bf24281919ae2f80f460bb7566bdf9d25b8bec145f2b43e59c8abb24a4f30e62c0d5ac2a8b6ecb8561d76e993a0facd6b0b2f177c3cee476daf2809bc4f7c
data/CHANGELOG.md CHANGED
@@ -1,3 +1,28 @@
1
+ ## 0.3.0 (2021-05-17)
2
+
3
+ - Updated to Rice 4
4
+ - Dropped support for Ruby < 2.6
5
+
6
+ ## 0.2.3 (2020-11-17)
7
+
8
+ - Updated MIDAS to 1.1.2
9
+
10
+ ## 0.2.2 (2020-09-23)
11
+
12
+ - Updated MIDAS to 1.1.0
13
+
14
+ ## 0.2.1 (2020-06-17)
15
+
16
+ - Fixed installation (missing header files)
17
+
18
+ ## 0.2.0 (2020-06-17)
19
+
20
+ - Updated MIDAS to 1.0.0
21
+ - Added `threshold` option
22
+ - Added `seed` option
23
+ - Changed default `alpha` to 0.5
24
+ - Fixed reading data from files with `directed: false`
25
+
1
26
  ## 0.1.1 (2020-02-19)
2
27
 
3
28
  - Fixed installation on Linux
data/NOTICE.txt CHANGED
@@ -1,4 +1,5 @@
1
- Copyright 2020 Andrew Kane
1
+ Copyright 2020 Rui Liu (liurui39660) and Siddharth Bhatia (bhatiasiddharth)
2
+ Copyright 2020-2021 Andrew Kane
2
3
 
3
4
  Licensed under the Apache License, Version 2.0 (the "License");
4
5
  you may not use this file except in compliance with the License.
data/README.md CHANGED
@@ -2,15 +2,14 @@
2
2
 
3
3
  [MIDAS](https://github.com/bhatiasiddharth/MIDAS) - edge stream anomaly detection - for Ruby
4
4
 
5
- [![Build Status](https://travis-ci.org/ankane/midas.svg?branch=master)](https://travis-ci.org/ankane/midas)
5
+ [![Build Status](https://github.com/ankane/midas/workflows/build/badge.svg?branch=master)](https://github.com/ankane/midas/actions)
6
6
 
7
7
  ## Installation
8
8
 
9
- Add these lines to your application’s Gemfile:
9
+ Add this line to your application’s Gemfile:
10
10
 
11
11
  ```ruby
12
12
  gem 'midas-edge'
13
- gem 'rice', github: 'jasonroelofs/rice' # for now for c++17
14
13
  ```
15
14
 
16
15
  ## Getting Started
@@ -43,9 +42,11 @@ Pass parameters - default values below
43
42
  Midas.new(
44
43
  rows: 2, # number of hash functions
45
44
  buckets: 769, # number of buckets
46
- alpha: 0.6, # temporal decay factor
45
+ alpha: 0.5, # temporal decay factor
46
+ threshold: nil, # todo
47
47
  relations: true, # whether to use MIDAS-R or MIDAS
48
- directed: true # treat the graph as directed or undirected
48
+ directed: true, # treat the graph as directed or undirected
49
+ seed: 0 # random seed
49
50
  )
50
51
  ```
51
52
 
@@ -57,10 +58,10 @@ Data can be an array of arrays
57
58
  [[1, 2, 3], [4, 5, 6]]
58
59
  ```
59
60
 
60
- Or a Numo NArray
61
+ Or a Numo array
61
62
 
62
63
  ```ruby
63
- Numo::Int32.new(3, 2).seq
64
+ Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
64
65
  ```
65
66
 
66
67
  ## Performance
data/ext/midas/ext.cpp CHANGED
@@ -3,18 +3,15 @@
3
3
  #include <vector>
4
4
 
5
5
  // midas
6
- #include <anom.hpp>
6
+ #include <FilteringCore.hpp>
7
+ #include <NormalCore.hpp>
8
+ #include <RelationalCore.hpp>
7
9
 
8
10
  // rice
9
- #include <rice/Module.hpp>
10
- #include <rice/String.hpp>
11
+ #include <rice/rice.hpp>
12
+ #include <rice/stl.hpp>
11
13
 
12
- using Rice::Module;
13
- using Rice::String;
14
- using Rice::define_module;
15
- using Rice::define_class_under;
16
-
17
- void load_str(vector<int>& src, vector<int>& dst, vector<int>& times, std::string input, bool directed) {
14
+ void load_str(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, const std::string& input, bool directed) {
18
15
  int* input_ptr = (int*) input.data();
19
16
  size_t n = input.size() / sizeof(int);
20
17
 
@@ -39,8 +36,7 @@ void load_str(vector<int>& src, vector<int>& dst, vector<int>& times, std::strin
39
36
  // load_data from main.cpp
40
37
  // modified to throw std::runtime_error when cannot find file
41
38
  // instead of exiting
42
- void load_file(vector<int>& src, vector<int>& dst, vector<int>& times, std::string input_file, bool undirected)
43
- {
39
+ void load_file(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, const std::string& input_file, bool undirected) {
44
40
  FILE* infile = fopen(input_file.c_str(), "r");
45
41
  if (infile == NULL) {
46
42
  throw std::runtime_error("Could not read file: " + input_file);
@@ -54,9 +50,8 @@ void load_file(vector<int>& src, vector<int>& dst, vector<int>& times, std::stri
54
50
  dst.push_back(d);
55
51
  times.push_back(t);
56
52
  }
57
- }
58
- else {
59
- while (fscanf(infile, "%d:%d:%d", &s, &d, &t) == 3) {
53
+ } else {
54
+ while (fscanf(infile, "%d,%d,%d", &s, &d, &t) == 3) {
60
55
  src.push_back(s);
61
56
  dst.push_back(d);
62
57
  times.push_back(t);
@@ -65,36 +60,54 @@ void load_file(vector<int>& src, vector<int>& dst, vector<int>& times, std::stri
65
60
  times.push_back(t);
66
61
  }
67
62
  }
63
+
64
+ fclose(infile);
68
65
  }
69
66
 
70
- std::string fit_predict(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets, double factor, bool relations) {
71
- vector<double>* result;
72
- if (relations) {
73
- result = midasR(src, dst, times, num_rows, num_buckets, factor);
67
+ std::string fit_predict(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, int num_rows, int num_buckets, float factor, float threshold, bool relations, int seed) {
68
+ srand(seed);
69
+ size_t n = src.size();
70
+ std::vector<float> result;
71
+ result.reserve(n);
72
+
73
+ if (!std::isnan(threshold)) {
74
+ MIDAS::FilteringCore midas(num_rows, num_buckets, threshold, factor);
75
+ for (size_t i = 0; i < n; i++) {
76
+ result[i] = midas(src[i], dst[i], times[i]);
77
+ }
78
+ } else if (relations) {
79
+ MIDAS::RelationalCore midas(num_rows, num_buckets, factor);
80
+ for (size_t i = 0; i < n; i++) {
81
+ result[i] = midas(src[i], dst[i], times[i]);
82
+ }
74
83
  } else {
75
- result = midas(src, dst, times, num_rows, num_buckets);
84
+ MIDAS::NormalCore midas(num_rows, num_buckets);
85
+ for (size_t i = 0; i < n; i++) {
86
+ result[i] = midas(src[i], dst[i], times[i]);
87
+ }
76
88
  }
77
- return std::string((char*) result->data(), sizeof(double) / sizeof(char) * result->size());
89
+
90
+ // std::string copies data
91
+ return std::string((char*) result.data(), sizeof(float) / sizeof(char) * n);
78
92
  }
79
93
 
80
94
  extern "C"
81
- void Init_ext()
82
- {
83
- Module rb_mMidas = define_module("Midas");
95
+ void Init_ext() {
96
+ auto rb_mMidas = Rice::define_module("Midas");
84
97
 
85
- define_class_under(rb_mMidas, "Detector")
86
- .define_method(
98
+ Rice::define_class_under(rb_mMidas, "Detector")
99
+ .define_function(
87
100
  "_fit_predict_str",
88
- *[](std::string input, int num_rows, int num_buckets, double factor, bool relations, bool directed) {
89
- vector<int> src, dst, times;
101
+ [](const std::string& input, int num_rows, int num_buckets, float factor, float threshold, bool relations, bool directed, int seed) {
102
+ std::vector<int> src, dst, times;
90
103
  load_str(src, dst, times, input, directed);
91
- return fit_predict(src, dst, times, num_rows, num_buckets, factor, relations);
104
+ return fit_predict(src, dst, times, num_rows, num_buckets, factor, threshold, relations, seed);
92
105
  })
93
- .define_method(
106
+ .define_function(
94
107
  "_fit_predict_file",
95
- *[](std::string input, int num_rows, int num_buckets, double factor, bool relations, bool directed) {
96
- vector<int> src, dst, times;
108
+ [](const std::string& input, int num_rows, int num_buckets, float factor, float threshold, bool relations, bool directed, int seed) {
109
+ std::vector<int> src, dst, times;
97
110
  load_file(src, dst, times, input, !directed);
98
- return fit_predict(src, dst, times, num_rows, num_buckets, factor, relations);
111
+ return fit_predict(src, dst, times, num_rows, num_buckets, factor, threshold, relations, seed);
99
112
  });
100
113
  }
data/ext/midas/extconf.rb CHANGED
@@ -2,11 +2,7 @@ require "mkmf-rice"
2
2
 
3
3
  $CXXFLAGS << " -std=c++17"
4
4
 
5
- ext = File.expand_path(".", __dir__)
6
- midas = File.expand_path("../../vendor/MIDAS", __dir__)
7
-
8
- $srcs = Dir["{#{ext},#{midas}}/*.{cc,cpp}"]
5
+ midas = File.expand_path("../../vendor/MIDAS/src", __dir__)
9
6
  $INCFLAGS << " -I#{midas}"
10
- $VPATH << midas
11
7
 
12
8
  create_makefile("midas/ext")
@@ -1,25 +1,28 @@
1
1
  module Midas
2
2
  class Detector
3
- def initialize(rows: 2, buckets: 769, alpha: 0.6, relations: true, directed: true)
3
+ def initialize(rows: 2, buckets: 769, alpha: 0.5, threshold: nil, relations: true, directed: true, seed: 0)
4
4
  @rows = rows
5
5
  @buckets = buckets
6
6
  @alpha = alpha
7
+ @threshold = threshold
7
8
  @relations = relations
8
9
  @directed = directed
10
+ @seed = seed
9
11
  end
10
12
 
11
13
  def fit_predict(x)
14
+ threshold = @threshold || Float::NAN
12
15
  result =
13
16
  if x.is_a?(String)
14
- _fit_predict_file(x, @rows, @buckets, @alpha, @relations, @directed)
17
+ _fit_predict_file(x, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
15
18
  else
16
19
  x = Numo::Int32.cast(x) unless x.is_a?(Numo::NArray)
17
20
  x = x.cast_to(Numo::Int32) unless x.is_a?(Numo::Int32)
18
21
  raise ArgumentError, "Bad shape: #{x.shape}" unless x.rank == 2 && x.shape[1] == 3
19
- _fit_predict_str(x.to_binary, @rows, @buckets, @alpha, @relations, @directed)
22
+ _fit_predict_str(x.to_binary, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
20
23
  end
21
24
 
22
- Numo::DFloat.from_binary(result)
25
+ Numo::SFloat.from_binary(result)
23
26
  end
24
27
  end
25
28
  end
data/lib/midas/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Midas
2
- VERSION = "0.1.1"
2
+ VERSION = "0.3.0"
3
3
  end
data/vendor/MIDAS/LICENSE CHANGED
@@ -174,28 +174,3 @@
174
174
  of your accepting any such warranty or additional liability.
175
175
 
176
176
  END OF TERMS AND CONDITIONS
177
-
178
- APPENDIX: How to apply the Apache License to your work.
179
-
180
- To apply the Apache License to your work, attach the following
181
- boilerplate notice, with the fields enclosed by brackets "[]"
182
- replaced with your own identifying information. (Don't include
183
- the brackets!) The text should be enclosed in the appropriate
184
- comment syntax for the file format. We also recommend that a
185
- file or class name and description of purpose be included on the
186
- same "printed page" as the copyright notice for easier
187
- identification within third-party archives.
188
-
189
- Copyright [yyyy] [name of copyright owner]
190
-
191
- Licensed under the Apache License, Version 2.0 (the "License");
192
- you may not use this file except in compliance with the License.
193
- You may obtain a copy of the License at
194
-
195
- http://www.apache.org/licenses/LICENSE-2.0
196
-
197
- Unless required by applicable law or agreed to in writing, software
198
- distributed under the License is distributed on an "AS IS" BASIS,
199
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
- See the License for the specific language governing permissions and
201
- limitations under the License.
@@ -1,66 +1,211 @@
1
1
  # MIDAS
2
- [![Conference](http://img.shields.io/badge/AAAI-2020-red.svg)](https://aaai.org/Conferences/AAAI-20/)
3
- [![Paper](http://img.shields.io/badge/Paper-pdf-brightgreen.svg)](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midas.pdf)
4
- [![Poster](http://img.shields.io/badge/Poster-pdf-blueviolet.svg)](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasposter.pdf)
5
- [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE)
6
2
 
7
- <p align="center">
8
- <img align="center" src="https://www.comp.nus.edu.sg/~sbhatia/assets/img/midasstream.png" alt="...">
3
+ <p>
4
+ <a href="https://aaai.org/Conferences/AAAI-20/">
5
+ <img src="http://img.shields.io/badge/AAAI-2020-red.svg">
6
+ </a>
7
+ <a href="https://arxiv.org/pdf/2009.08452.pdf"><img src="http://img.shields.io/badge/Paper-PDF-brightgreen.svg"></a>
8
+ <a href="https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasslides.pdf">
9
+ <img src="http://img.shields.io/badge/Slides-PDF-ff9e18.svg">
10
+ </a>
11
+ <a href="https://youtu.be/Bd4PyLCHrto">
12
+ <img src="http://img.shields.io/badge/Talk-Youtube-ff69b4.svg">
13
+ </a>
14
+ <a href="https://www.youtube.com/watch?v=DPmN-uPW8qU">
15
+ <img src="https://img.shields.io/badge/Overview-Youtube-orange.svg">
16
+ </a>
17
+ <a href="https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE">
18
+ <img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg">
19
+ </a>
9
20
  </p>
10
21
 
22
+ C++ implementation of
11
23
 
12
- Anomaly detection in graphs is a critical problem for finding suspicious behavior in innumerable systems, such as intrusion detection, fake ratings, and financial fraud. This has been a well-researched problem with majority of the proposed approaches focusing on static graphs. However, many real-world graphs are dynamic in nature, and methods based on static connections may miss temporal characteristics of the graphs and anomalies.
24
+ - [Real-time Streaming Anomaly Detection in Dynamic Graphs](https://arxiv.org/pdf/2009.08452.pdf). *Siddharth Bhatia, Rui Liu, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. (Under Review)
25
+ - [MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams](https://arxiv.org/pdf/1911.04464.pdf). *Siddharth Bhatia, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. AAAI 2020.
13
26
 
14
- Among the methods focusing on dynamic graphs, most of them have edges aggregated into graph snapshots. However, to minimize the effect of malicious activities and start recovery as soon as possible, we need to detect anomalies in real-time or near real-time i.e. to identify whether an incoming edge is anomalous or not, as soon as we receive it. In addition, since the number of vertices can increase as we process the stream of edges, we need an algorithm which uses constant memory in graph size. Moreover, fraudulent or anomalous events in many applications occur in microclusters or suddenly arriving groups of suspiciously similar edges e.g. denial of service attacks in network traffic data and lockstep behavior.
27
+ The old implementation is in another branch `OldImplementation`, it should be considered as being archived and will hardly receive feature updates.
15
28
 
16
- In this work, we propose MIDAS, short for Microcluster-Based Detector of Anomalies in Edge Streams, which detects microcluster anomalies, or suddenly arriving groups of suspiciously similar edges, in edge streams, using constant time and memory. In addition, by using a principled hypothesis testing framework, MIDAS provides theoretical bounds on the false positive probability, which earlier methods do not provide. Also, we are up to 48% more accurate while being up to 644 times faster than state of the art approaches.
29
+ ![](asset/Intro.png)
17
30
 
18
- For more details, please read the paper - [MIDAS:Microcluster-Based Detector of Anomalies in Edge Streams](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midas.pdf). *Siddharth Bhatia, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. AAAI 2020.
31
+ ## Table of Contents
19
32
 
33
+ <!-- START doctoc generated TOC please keep comment here to allow auto update -->
34
+ <!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
20
35
 
21
- ## Getting started
22
- 1. Run `make` to compile code and create the binary.
23
- 2. Run `./midas -i ` followed by the input file path and name.
24
- 3. Run `make clean` to clean binaries.
25
36
 
37
+ - [Features](#features)
38
+ - [Demo](#demo)
39
+ - [Customization](#customization)
40
+ - [Other Files](#other-files)
41
+ - [In Other Languages](#in-other-languages)
42
+ - [Online Coverage](#online-coverage)
43
+ - [Citation](#citation)
44
+
45
+ <!-- END doctoc generated TOC please keep comment here to allow auto update -->
46
+
47
+ ## Features
48
+
49
+ - Finds Anomalies in Dynamic/Time-Evolving Graph: (Intrusion Detection, Fake Ratings, Financial Fraud)
50
+ - Detects Microcluster Anomalies (suddenly arriving groups of suspiciously similar edges e.g. DoS attack)
51
+ - Theoretical Guarantees on False Positive Probability
52
+ - Constant Memory (independent of graph size)
53
+ - Constant Update Time (real-time anomaly detection to minimize harm)
54
+ - Up to 55% more accurate and 929 times faster than the state of the art approaches
55
+ - Experiments are performed using the following datasets:
56
+ - [DARPA](https://www.ll.mit.edu/r-d/datasets/1998-darpa-intrusion-detection-evaluation-dataset)
57
+ - [TwitterWorldCup2014](http://odds.cs.stonybrook.edu/twitterworldcup2014-dataset)
58
+ - [TwitterSecurity](http://odds.cs.stonybrook.edu/twittersecurity-dataset)
26
59
 
27
60
  ## Demo
28
- 1. Run `./demo.sh` to compile the code and run it on example dataset.
29
61
 
62
+ If you use Windows:
63
+
64
+ 1. Open a Visual Studio developer command prompt, we want their toolchain
65
+ 1. `cd` to the project root `MIDAS/`
66
+ 1. `cmake -DCMAKE_BUILD_TYPE=Release -GNinja -S . -B build/release`
67
+ 1. `cmake --build build/release --target Demo`
68
+ 1. `cd` to `MIDAS/build/release/`
69
+ 1. `.\Demo.exe`
70
+
71
+ If you use Linux/macOS:
72
+
73
+ 1. Open a terminal
74
+ 1. `cd` to the project root `MIDAS/`
75
+ 1. `cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/release`
76
+ 1. `cmake --build build/release --target Demo`
77
+ 1. `cd` to `MIDAS/build/release/`
78
+ 1. `./Demo`
79
+
80
+ The demo runs on `MIDAS/data/DARPA/darpa_processed.csv`, which has 4.5M records, with the filtering core (MIDAS-F).
81
+
82
+ The scores will be exported to `MIDAS/temp/Score.txt`, higher means more anomalous.
83
+
84
+ All file paths are absolute and "hardcoded" by CMake, but it's suggested NOT to run by double clicking on the executable file.
85
+
86
+ ### Requirements
87
+
88
+ Core
89
+ - C++11
90
+ - C++ standard libraries
91
+
92
+ Demo
93
+ - Python 3 (`MIDAS/util/EvaluateScore.py`)
94
+ - `pandas`: I/O
95
+ - `scikit-learn`: Compute ROC-AUC
96
+
97
+ Experiment
98
+ - (Optional) Intel TBB: Parallelization
99
+ - (Optional) OpenMP: Parallelization
100
+
101
+ Other python utility scripts
102
+ - Python 3
103
+ - `pandas`
104
+ - `scikit-learn`
105
+
106
+ ## Customization
107
+
108
+ ### Switch Cores
109
+
110
+ Cores are instantiated at `MIDAS/example/Demo.cpp:67-69`, uncomment the chosen one.
111
+
112
+ ### Custom Dataset + `Demo.cpp`
113
+
114
+ You need to prepare three files:
30
115
 
31
- ## Command line options
32
- * `-h --help`: produce help message
33
- * `-i --input`: input file name
34
- * `-o --output`: output file name (default: scores.txt)
35
- * `-r --rows`: Number of Hash Functions (default: 2)
36
- * `-b --buckets`: Number of Buckets (default: 769)
37
- * `-a --alpha`: Temporal Decay Factor (default: 0.6)
38
- * `--norelations` : Run MIDAS instead of MIDAS-R
39
- * `--undirected` : Treat graph as undirected instead of directed
116
+ - Meta file
117
+ - Only includes an integer `N`, the number of records in the dataset
118
+ - Use its path for `pathMeta`
119
+ - E.g. `MIDAS/data/DARPA/darpa_shape.txt`
120
+ - Data file
121
+ - A header-less csv format file of shape `[N,3]`
122
+ - Columns are sources, destinations, timestamps
123
+ - Use its path for `pathData`
124
+ - E.g. `MIDAS/data/DARPA/darpa_processed.csv`
125
+ - Label file
126
+ - A header-less csv format file of shape `[N,1]`
127
+ - The corresponding label for data records
128
+ - 0 means normal record
129
+ - 1 means anomalous record
130
+ - Use its path for `pathGroundTruth`
131
+ - E.g. `MIDAS/data/DARPA/darpa_ground_truth.csv`
40
132
 
133
+ ### Custom Dataset + Custom Runner
41
134
 
42
- ## Input file format
43
- MIDAS expects the input edge stream to be stored in a single file containing the following three columns in order:
44
- 1. `source (int)`: source ID of the edge
45
- 2. `destination (int)`: destination ID of the edge
46
- 3. `time (int)`: time stamp of the edge
135
+ 1. Include the header `MIDAS/src/NormalCore.hpp`, `MIDAS/src/RelationalCore.hpp` or `MIDAS/src/FilteringCore.hpp`
136
+ 1. Instantiate cores with required parameters
137
+ 1. Call `operator()` on individual data records, it returns the anomaly score for the input record
47
138
 
48
- Thus, each line represents an edge. Edges should be sorted in non-decreasing order of their time stamps and the column delimiter should be `,`
139
+ ## Other Files
49
140
 
141
+ ### `example/`
50
142
 
51
- ## Datasets
52
- 1. [DARPA](https://www.ll.mit.edu/r-d/datasets/1998-darpa-intrusion-detection-evaluation-dataset)
53
- 2. [TwitterWorldCup2014](http://odds.cs.stonybrook.edu/twitterworldcup2014-dataset)
54
- 3. [TwitterSecurity](http://odds.cs.stonybrook.edu/twittersecurity-dataset)
143
+ #### `Experiment.cpp`
144
+
145
+ The code we used for experiments.
146
+ It will try to use Intel TBB or OpenMP for parallelization.
147
+ You should comment all but only one runner function call in the `main()` as most results are exported to `MIDAS/temp/Experiiment.csv` together with many intermediate files.
148
+
149
+ #### `Reproducible.cpp`
150
+
151
+ Similar to `Demo.cpp`, but with all random parameters hardcoded and always produce the same result.
152
+ It's for other developers and us to test if the implementation in other languages can produce acceptable results.
153
+
154
+ ### `util/`
155
+
156
+ `DeleteTempFile.py`, `EvaluateScore.py` and `ReproduceROC.py` will show their usage and a short description when executed without any argument.
157
+
158
+ #### `PreprocessData.py`
159
+
160
+ The code to process the raw dataset into an easy-to-read format.
161
+ Datasets are always assumed to be in a folder in `MIDAS/data/`.
162
+ It can process the following dataset(s)
163
+
164
+ - `DARPA/darpa_original.csv` -> `DARPA/darpa_processed.csv`, `DARPA/darpa_ground_truth.csv`, `DARPA/darpa_shape.txt`
165
+
166
+ ## In Other Languages
167
+
168
+ 1. Python: [Rui Liu's MIDAS.Python](https://github.com/liurui39660/MIDAS.Python), [Ritesh Kumar's pyMIDAS](https://github.com/ritesh99rakesh/pyMIDAS)
169
+ 1. Golang: [Steve Tan's midas](https://github.com/steve0hh/midas)
170
+ 1. Ruby: [Andrew Kane's midas](https://github.com/ankane/midas)
171
+ 1. Rust: [Scott Steele's midas_rs](https://github.com/scooter-dangle/midas_rs)
172
+ 1. R: [Tobias Heidler's MIDASwrappeR](https://github.com/pteridin/MIDASwrappeR)
173
+ 1. Java: [Joshua Tokle's MIDAS-Java](https://github.com/jotok/MIDAS-Java)
174
+ 1. Julia: [Ashrya Agrawal's MIDAS.jl](https://github.com/ashryaagr/MIDAS.jl)
175
+
176
+ ## Online Coverage
177
+
178
+ 1. [ACM TechNews](https://technews.acm.org/archives.cfm?fo=2020-05-may/may-06-2020.html)
179
+ 1. [AIhub](https://aihub.org/2020/05/01/interview-with-siddharth-bhatia-a-new-approach-for-anomaly-detection/)
180
+ 1. [Hacker News](https://news.ycombinator.com/item?id=22802604)
181
+ 1. [KDnuggets](https://www.kdnuggets.com/2020/04/midas-new-baseline-anomaly-detection-graphs.html)
182
+ 1. [Microsoft](https://techcommunity.microsoft.com/t5/azure-sentinel/announcing-the-azure-sentinel-hackathon-winners/ba-p/1548240)
183
+ 1. [Towards Data Science](https://towardsdatascience.com/controlling-fake-news-using-graphs-and-statistics-31ed116a986f)
55
184
 
56
185
  ## Citation
57
- If you use this code for your research, please consider citing our paper.
186
+
187
+ If you use this code for your research, please consider citing our arXiv preprint
188
+
189
+ ```bibtex
190
+ @misc{bhatia2020realtime,
191
+ title={Real-Time Streaming Anomaly Detection in Dynamic Graphs},
192
+ author={Siddharth Bhatia and Rui Liu and Bryan Hooi and Minji Yoon and Kijung Shin and Christos Faloutsos},
193
+ year={2020},
194
+ eprint={2009.08452},
195
+ archivePrefix={arXiv},
196
+ primaryClass={cs.LG}
197
+ }
58
198
 
59
199
  ```
60
- @article{bhatia2019midas,
61
- title={MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams},
62
- author={Bhatia, Siddharth and Hooi, Bryan and Yoon, Minji and Shin, Kijung and Faloutsos, Christos},
63
- journal={arXiv preprint arXiv:1911.04464},
64
- year={2019}
200
+
201
+ or our AAAI paper
202
+
203
+
204
+ ```bibtex
205
+ @inproceedings{bhatia2020midas,
206
+ title="MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams",
207
+ author="Siddharth {Bhatia} and Bryan {Hooi} and Minji {Yoon} and Kijung {Shin} and Christos {Faloutsos}",
208
+ booktitle="AAAI 2020 : The Thirty-Fourth AAAI Conference on Artificial Intelligence",
209
+ year="2020"
65
210
  }
66
211
  ```