midas-edge 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 51024d37d243a503b528b846e60e46b36b52e4b77a70b14d71feb262a00372d7
4
- data.tar.gz: e2fce29bba281e179123bb558a8deeba5808f9e27dbb2d24cf2ee4423c3632c9
3
+ metadata.gz: 9e4c7b4cac2e7a9dac3a6085b40d79a48b9f14dad32834e0d267f7bb667d86b9
4
+ data.tar.gz: abd0836d284e7a9c34c7733f2f195e38faca2fb1ac6ab01687c3790bc96d3cfa
5
5
  SHA512:
6
- metadata.gz: 39ee7a3b1228fb63838a121c180b9b9edd2f9a207da18810dbdb24a4335879872c6d265c2706154fa40dcb7414b3cb49b7b6a98efeb34ed0dde0379206a4d131
7
- data.tar.gz: 68c43621d46cc1a4fc6ccdad3f4d24a95428a68da59837f55377556d9a60f20f878a8db83a7e74c6ec35ac869e49aa19810c791db8a4b09c2ed18039dc2d00bd
6
+ metadata.gz: 48c92dbdbff039a514ca99207a88276fada4a1391497a1fd4595176f90bec0ac46f0fc89126d36c7ccfc3a1afe212aa11f2142fb7d3d836734f64ab912a88ffb
7
+ data.tar.gz: 9a2a140ed5d5a120969ee5fc0a599cccc6d590755c9eddf949c2e0a5897aaf39b6f8fe8137c9dde3fd418c78f04cdd8801d1353ca61009d4fbae5a610ad2edc5
@@ -1,3 +1,11 @@
1
+ ## 0.2.0 (2020-06-17)
2
+
3
+ - Updated MIDAS to 1.0.0
4
+ - Added `threshold` option
5
+ - Added `seed` option
6
+ - Changed default `alpha` to 0.5
7
+ - Fixed reading data from files with `directed: false`
8
+
1
9
  ## 0.1.1 (2020-02-19)
2
10
 
3
11
  - Fixed installation on Linux
data/README.md CHANGED
@@ -43,9 +43,11 @@ Pass parameters - default values below
43
43
  Midas.new(
44
44
  rows: 2, # number of hash functions
45
45
  buckets: 769, # number of buckets
46
- alpha: 0.6, # temporal decay factor
46
+ alpha: 0.5, # temporal decay factor
47
+ threshold: nil, # todo
47
48
  relations: true, # whether to use MIDAS-R or MIDAS
48
- directed: true # treat the graph as directed or undirected
49
+ directed: true, # treat the graph as directed or undirected
50
+ seed: 0 # random seed
49
51
  )
50
52
  ```
51
53
 
@@ -57,10 +59,10 @@ Data can be an array of arrays
57
59
  [[1, 2, 3], [4, 5, 6]]
58
60
  ```
59
61
 
60
- Or a Numo NArray
62
+ Or a Numo array
61
63
 
62
64
  ```ruby
63
- Numo::Int32.new(3, 2).seq
65
+ Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
64
66
  ```
65
67
 
66
68
  ## Performance
@@ -3,7 +3,9 @@
3
3
  #include <vector>
4
4
 
5
5
  // midas
6
- #include <anom.hpp>
6
+ #include <FilteringCore.hpp>
7
+ #include <NormalCore.hpp>
8
+ #include <RelationalCore.hpp>
7
9
 
8
10
  // rice
9
11
  #include <rice/Module.hpp>
@@ -14,7 +16,7 @@ using Rice::String;
14
16
  using Rice::define_module;
15
17
  using Rice::define_class_under;
16
18
 
17
- void load_str(vector<int>& src, vector<int>& dst, vector<int>& times, std::string input, bool directed) {
19
+ void load_str(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, std::string input, bool directed) {
18
20
  int* input_ptr = (int*) input.data();
19
21
  size_t n = input.size() / sizeof(int);
20
22
 
@@ -39,7 +41,7 @@ void load_str(vector& src, vector& dst, vector& times, std::strin
39
41
  // load_data from main.cpp
40
42
  // modified to throw std::runtime_error when cannot find file
41
43
  // instead of exiting
42
- void load_file(vector<int>& src, vector<int>& dst, vector<int>& times, std::string input_file, bool undirected)
44
+ void load_file(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, std::string input_file, bool undirected)
43
45
  {
44
46
  FILE* infile = fopen(input_file.c_str(), "r");
45
47
  if (infile == NULL) {
@@ -56,7 +58,7 @@ void load_file(vector& src, vector& dst, vector& times, std::stri
56
58
  }
57
59
  }
58
60
  else {
59
- while (fscanf(infile, "%d:%d:%d", &s, &d, &t) == 3) {
61
+ while (fscanf(infile, "%d,%d,%d", &s, &d, &t) == 3) {
60
62
  src.push_back(s);
61
63
  dst.push_back(d);
62
64
  times.push_back(t);
@@ -67,14 +69,28 @@ void load_file(vector& src, vector& dst, vector& times, std::stri
67
69
  }
68
70
  }
69
71
 
70
- std::string fit_predict(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets, double factor, bool relations) {
71
- vector<double>* result;
72
- if (relations) {
73
- result = midasR(src, dst, times, num_rows, num_buckets, factor);
72
+ std::string fit_predict(std::vector<int>& src, std::vector<int>& dst, std::vector<int>& times, int num_rows, int num_buckets, float factor, float threshold, bool relations, int seed) {
73
+ srand(seed);
74
+ size_t n = src.size();
75
+ const auto result = new float[n];
76
+
77
+ if (!std::isnan(threshold)) {
78
+ MIDAS::FilteringCore midas(num_rows, num_buckets, threshold, factor);
79
+ for (size_t i = 0; i < n; i++) {
80
+ result[i] = midas(src[i], dst[i], times[i]);
81
+ }
82
+ } else if (relations) {
83
+ MIDAS::RelationalCore midas(num_rows, num_buckets, factor);
84
+ for (size_t i = 0; i < n; i++) {
85
+ result[i] = midas(src[i], dst[i], times[i]);
86
+ }
74
87
  } else {
75
- result = midas(src, dst, times, num_rows, num_buckets);
88
+ MIDAS::NormalCore midas(num_rows, num_buckets);
89
+ for (size_t i = 0; i < n; i++) {
90
+ result[i] = midas(src[i], dst[i], times[i]);
91
+ }
76
92
  }
77
- return std::string((char*) result->data(), sizeof(double) / sizeof(char) * result->size());
93
+ return std::string((char*) result, sizeof(float) / sizeof(char) * n);
78
94
  }
79
95
 
80
96
  extern "C"
@@ -85,16 +101,16 @@ void Init_ext()
85
101
  define_class_under(rb_mMidas, "Detector")
86
102
  .define_method(
87
103
  "_fit_predict_str",
88
- *[](std::string input, int num_rows, int num_buckets, double factor, bool relations, bool directed) {
89
- vector<int> src, dst, times;
104
+ *[](std::string input, int num_rows, int num_buckets, float factor, float threshold, bool relations, bool directed, int seed) {
105
+ std::vector<int> src, dst, times;
90
106
  load_str(src, dst, times, input, directed);
91
- return fit_predict(src, dst, times, num_rows, num_buckets, factor, relations);
107
+ return fit_predict(src, dst, times, num_rows, num_buckets, factor, threshold, relations, seed);
92
108
  })
93
109
  .define_method(
94
110
  "_fit_predict_file",
95
- *[](std::string input, int num_rows, int num_buckets, double factor, bool relations, bool directed) {
96
- vector<int> src, dst, times;
111
+ *[](std::string input, int num_rows, int num_buckets, float factor, float threshold, bool relations, bool directed, int seed) {
112
+ std::vector<int> src, dst, times;
97
113
  load_file(src, dst, times, input, !directed);
98
- return fit_predict(src, dst, times, num_rows, num_buckets, factor, relations);
114
+ return fit_predict(src, dst, times, num_rows, num_buckets, factor, threshold, relations, seed);
99
115
  });
100
116
  }
@@ -2,11 +2,7 @@ require "mkmf-rice"
2
2
 
3
3
  $CXXFLAGS << " -std=c++17"
4
4
 
5
- ext = File.expand_path(".", __dir__)
6
- midas = File.expand_path("../../vendor/MIDAS", __dir__)
7
-
8
- $srcs = Dir["{#{ext},#{midas}}/*.{cc,cpp}"]
5
+ midas = File.expand_path("../../vendor/MIDAS/src", __dir__)
9
6
  $INCFLAGS << " -I#{midas}"
10
- $VPATH << midas
11
7
 
12
8
  create_makefile("midas/ext")
@@ -1,25 +1,28 @@
1
1
  module Midas
2
2
  class Detector
3
- def initialize(rows: 2, buckets: 769, alpha: 0.6, relations: true, directed: true)
3
+ def initialize(rows: 2, buckets: 769, alpha: 0.5, threshold: nil, relations: true, directed: true, seed: 0)
4
4
  @rows = rows
5
5
  @buckets = buckets
6
6
  @alpha = alpha
7
+ @threshold = threshold
7
8
  @relations = relations
8
9
  @directed = directed
10
+ @seed = seed
9
11
  end
10
12
 
11
13
  def fit_predict(x)
14
+ threshold = @threshold || Float::NAN
12
15
  result =
13
16
  if x.is_a?(String)
14
- _fit_predict_file(x, @rows, @buckets, @alpha, @relations, @directed)
17
+ _fit_predict_file(x, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
15
18
  else
16
19
  x = Numo::Int32.cast(x) unless x.is_a?(Numo::NArray)
17
20
  x = x.cast_to(Numo::Int32) unless x.is_a?(Numo::Int32)
18
21
  raise ArgumentError, "Bad shape: #{x.shape}" unless x.rank == 2 && x.shape[1] == 3
19
- _fit_predict_str(x.to_binary, @rows, @buckets, @alpha, @relations, @directed)
22
+ _fit_predict_str(x.to_binary, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
20
23
  end
21
24
 
22
- Numo::DFloat.from_binary(result)
25
+ Numo::SFloat.from_binary(result)
23
26
  end
24
27
  end
25
28
  end
@@ -1,3 +1,3 @@
1
1
  module Midas
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -174,28 +174,3 @@
174
174
  of your accepting any such warranty or additional liability.
175
175
 
176
176
  END OF TERMS AND CONDITIONS
177
-
178
- APPENDIX: How to apply the Apache License to your work.
179
-
180
- To apply the Apache License to your work, attach the following
181
- boilerplate notice, with the fields enclosed by brackets "[]"
182
- replaced with your own identifying information. (Don't include
183
- the brackets!) The text should be enclosed in the appropriate
184
- comment syntax for the file format. We also recommend that a
185
- file or class name and description of purpose be included on the
186
- same "printed page" as the copyright notice for easier
187
- identification within third-party archives.
188
-
189
- Copyright [yyyy] [name of copyright owner]
190
-
191
- Licensed under the Apache License, Version 2.0 (the "License");
192
- you may not use this file except in compliance with the License.
193
- You may obtain a copy of the License at
194
-
195
- http://www.apache.org/licenses/LICENSE-2.0
196
-
197
- Unless required by applicable law or agreed to in writing, software
198
- distributed under the License is distributed on an "AS IS" BASIS,
199
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
- See the License for the specific language governing permissions and
201
- limitations under the License.
@@ -1,66 +1,136 @@
1
1
  # MIDAS
2
- [![Conference](http://img.shields.io/badge/AAAI-2020-red.svg)](https://aaai.org/Conferences/AAAI-20/)
3
- [![Paper](http://img.shields.io/badge/Paper-pdf-brightgreen.svg)](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midas.pdf)
4
- [![Poster](http://img.shields.io/badge/Poster-pdf-blueviolet.svg)](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasposter.pdf)
5
- [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE)
6
2
 
7
- <p align="center">
8
- <img align="center" src="https://www.comp.nus.edu.sg/~sbhatia/assets/img/midasstream.png" alt="...">
3
+ <p>
4
+ <a href="https://aaai.org/Conferences/AAAI-20/">
5
+ <img src="http://img.shields.io/badge/AAAI-2020-red.svg">
6
+ </a>
7
+ <a href="https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midas.pdf"><img src="http://img.shields.io/badge/Paper-PDF-brightgreen.svg"></a>
8
+ <a href="https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasslides.pdf">
9
+ <img src="http://img.shields.io/badge/Slides-PDF-ff9e18.svg">
10
+ </a>
11
+ <a href="https://youtu.be/Bd4PyLCHrto">
12
+ <img src="http://img.shields.io/badge/Talk-Youtube-ff69b4.svg">
13
+ </a>
14
+ <a href="https://www.kdnuggets.com/2020/04/midas-new-baseline-anomaly-detection-graphs.html">
15
+ <img src="https://img.shields.io/badge/Press-KDnuggets-orange.svg">
16
+ </a>
17
+ <a href="https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE">
18
+ <img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg">
19
+ </a>
9
20
  </p>
10
21
 
22
+ C++ implementation of
11
23
 
12
- Anomaly detection in graphs is a critical problem for finding suspicious behavior in innumerable systems, such as intrusion detection, fake ratings, and financial fraud. This has been a well-researched problem with majority of the proposed approaches focusing on static graphs. However, many real-world graphs are dynamic in nature, and methods based on static connections may miss temporal characteristics of the graphs and anomalies.
24
+ - Real-time Streaming Anomaly Detection in Dynamic Graphs. *Siddharth Bhatia, Rui Liu, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. (Under Review)
25
+ - [MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams](asset/Conference.pdf). *Siddharth Bhatia, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. AAAI 2020.
13
26
 
14
- Among the methods focusing on dynamic graphs, most of them have edges aggregated into graph snapshots. However, to minimize the effect of malicious activities and start recovery as soon as possible, we need to detect anomalies in real-time or near real-time i.e. to identify whether an incoming edge is anomalous or not, as soon as we receive it. In addition, since the number of vertices can increase as we process the stream of edges, we need an algorithm which uses constant memory in graph size. Moreover, fraudulent or anomalous events in many applications occur in microclusters or suddenly arriving groups of suspiciously similar edges e.g. denial of service attacks in network traffic data and lockstep behavior.
27
+ The old implementation is in another branch `OldImplementation`, it should be considered as being archived and will hardly receive feature updates.
15
28
 
16
- In this work, we propose MIDAS, short for Microcluster-Based Detector of Anomalies in Edge Streams, which detects microcluster anomalies, or suddenly arriving groups of suspiciously similar edges, in edge streams, using constant time and memory. In addition, by using a principled hypothesis testing framework, MIDAS provides theoretical bounds on the false positive probability, which earlier methods do not provide. Also, we are up to 48% more accurate while being up to 644 times faster than state of the art approaches.
29
+ ![](asset/Intro.png)
17
30
 
18
- For more details, please read the paper - [MIDAS:Microcluster-Based Detector of Anomalies in Edge Streams](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midas.pdf). *Siddharth Bhatia, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. AAAI 2020.
31
+ ## Table of Contents
19
32
 
33
+ - [Features](#features)
34
+ - [Demo](#demo)
35
+ - [Customization](#customization)
36
+ - [Online Articles](#online-articles)
37
+ - [MIDAS in other Languages](#midas-in-other-languages)
38
+ - [Citation](#citation)
20
39
 
21
- ## Getting started
22
- 1. Run `make` to compile code and create the binary.
23
- 2. Run `./midas -i ` followed by the input file path and name.
24
- 3. Run `make clean` to clean binaries.
40
+ ## Features
25
41
 
42
+ - Finds Anomalies in Dynamic/Time-Evolving Graph: (Intrusion Detection, Fake Ratings, Financial Fraud)
43
+ - Detects Microcluster Anomalies (suddenly arriving groups of suspiciously similar edges e.g. DoS attack)
44
+ - Theoretical Guarantees on False Positive Probability
45
+ - Constant Memory (independent of graph size)
46
+ - Constant Update Time (real-time anomaly detection to minimize harm)
47
+ - Up to 55% more accurate and 929 times faster than the state of the art approaches
48
+ - Some experiments are performed on the following datasets:
49
+ - [DARPA](https://www.ll.mit.edu/r-d/datasets/1998-darpa-intrusion-detection-evaluation-dataset)
50
+ - [TwitterWorldCup2014](http://odds.cs.stonybrook.edu/twitterworldcup2014-dataset)
51
+ - [TwitterSecurity](http://odds.cs.stonybrook.edu/twittersecurity-dataset)
26
52
 
27
53
  ## Demo
28
- 1. Run `./demo.sh` to compile the code and run it on example dataset.
29
54
 
55
+ If you use Windows:
30
56
 
31
- ## Command line options
32
- * `-h --help`: produce help message
33
- * `-i --input`: input file name
34
- * `-o --output`: output file name (default: scores.txt)
35
- * `-r --rows`: Number of Hash Functions (default: 2)
36
- * `-b --buckets`: Number of Buckets (default: 769)
37
- * `-a --alpha`: Temporal Decay Factor (default: 0.6)
38
- * `--norelations` : Run MIDAS instead of MIDAS-R
39
- * `--undirected` : Treat graph as undirected instead of directed
57
+ 1. Open a Visual Studio developer command prompt, we want their toolchain
58
+ 1. `cd` to the project root `MIDAS/`
59
+ 1. `cmake -DCMAKE_BUILD_TYPE=Release -G "NMake Makefiles" -S . -B build/release`
60
+ 1. `cmake --build build/release --target Demo`
61
+ 1. `cd` to `MIDAS/build/release/src`
62
+ 1. `.\Demo.exe`
40
63
 
64
+ If you use Linux/macOS systems:
41
65
 
42
- ## Input file format
43
- MIDAS expects the input edge stream to be stored in a single file containing the following three columns in order:
44
- 1. `source (int)`: source ID of the edge
45
- 2. `destination (int)`: destination ID of the edge
46
- 3. `time (int)`: time stamp of the edge
66
+ 1. Open a terminal
67
+ 1. `cd` to the project root `MIDAS/`
68
+ 1. `cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/release`
69
+ 1. `cmake --build build/release --target Demo`
70
+ 1. `cd` to `MIDAS/build/release/src`
71
+ 1. `./Demo`
47
72
 
48
- Thus, each line represents an edge. Edges should be sorted in non-decreasing order of their time stamps and the column delimiter should be `,`
73
+ The demo runs on `MIDAS/data/DARPA/darpa_processed.csv`, which has 4.5M records, with the filtering core.
49
74
 
75
+ The scores will be exported to `MIDAS/temp/Score.txt`, higher means more anomalous.
50
76
 
51
- ## Datasets
52
- 1. [DARPA](https://www.ll.mit.edu/r-d/datasets/1998-darpa-intrusion-detection-evaluation-dataset)
53
- 2. [TwitterWorldCup2014](http://odds.cs.stonybrook.edu/twitterworldcup2014-dataset)
54
- 3. [TwitterSecurity](http://odds.cs.stonybrook.edu/twittersecurity-dataset)
77
+ All file paths are absolute and "hardcoded" by CMake, but it's suggested NOT to run by double-click on the executable file.
78
+
79
+ ## Customization
80
+
81
+ ### Switch Cores
82
+
83
+ Cores are instantiated at `MIDAS/example/Demo.cpp:64-66`, uncomment the chosen one.
84
+
85
+ ### Custom Dataset + `Demo.cpp`
86
+
87
+ You need to prepare three files:
88
+
89
+ - Meta file
90
+ - Only includes an integer `N`, the number of records in the dataset
91
+ - Use its path for `pathMeta`
92
+ - Data file
93
+ - A header-less csv format file of shape `[N,3]`
94
+ - Columns are sources, destinations, timestamps
95
+ - Use its path for `pathData`
96
+ - Label file
97
+ - A header-less csv format file of shape `[N,1]`
98
+ - The corresponding label for data records
99
+ - 0 means normal record
100
+ - 1 means anomalous record
101
+ - Use its path for `pathGroundTruth`
102
+
103
+ ### Custom Dataset + Custom Runner
104
+
105
+ 1. Include the header `MIDAS/CPU/NormalCore.hpp`, `MIDAS/CPU/RelationalCore.hpp` or `MIDAS/CPU/FilteringCore.hpp`
106
+ 1. Instantiate cores with required parameters
107
+ 1. Call `operator()` on individual data records, it returns the anomaly score for the input record.
108
+
109
+ ## Online Articles
110
+
111
+ 1. KDnuggets: [Introducing MIDAS: A New Baseline for Anomaly Detection in Graphs](https://www.kdnuggets.com/2020/04/midas-new-baseline-anomaly-detection-graphs.html)
112
+ 2. Towards Data Science: [Controlling Fake News using Graphs and Statistics](https://towardsdatascience.com/controlling-fake-news-using-graphs-and-statistics-31ed116a986f)
113
+ 2. Towards Data Science: [Anomaly detection in dynamic graphs using MIDAS](https://towardsdatascience.com/anomaly-detection-in-dynamic-graphs-using-midas-e4f8d0b1db45)
114
+ 4. Towards AI: [Anomaly Detection with MIDAS](https://medium.com/towards-artificial-intelligence/anomaly-detection-with-midas-2735a2e6dce8)
115
+ 5. [AIhub Interview](https://aihub.org/2020/05/01/interview-with-siddharth-bhatia-a-new-approach-for-anomaly-detection/)
116
+
117
+ ## MIDAS in Other Languages
118
+
119
+ 1. [Golang](https://github.com/steve0hh/midas) by [Steve Tan](https://github.com/steve0hh)
120
+ 2. [Ruby](https://github.com/ankane/midas) by [Andrew Kane](https://github.com/ankane)
121
+ 3. [Rust](https://github.com/scooter-dangle/midas_rs) by [Scott Steele](https://github.com/scooter-dangle)
122
+ 4. [R](https://github.com/pteridin/MIDASwrappeR) by [Tobias Heidler](https://github.com/pteridin)
123
+ 5. [Python](https://github.com/ritesh99rakesh/pyMIDAS) by [Ritesh Kumar](https://github.com/ritesh99rakesh)
55
124
 
56
125
  ## Citation
126
+
57
127
  If you use this code for your research, please consider citing our paper.
58
128
 
59
129
  ```
60
- @article{bhatia2019midas,
61
- title={MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams},
62
- author={Bhatia, Siddharth and Hooi, Bryan and Yoon, Minji and Shin, Kijung and Faloutsos, Christos},
63
- journal={arXiv preprint arXiv:1911.04464},
64
- year={2019}
130
+ @inproceedings{bhatia2020midas,
131
+ title="MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams",
132
+ author="Siddharth {Bhatia} and Bryan {Hooi} and Minji {Yoon} and Kijung {Shin} and Christos {Faloutsos}",
133
+ booktitle="AAAI 2020 : The Thirty-Fourth AAAI Conference on Artificial Intelligence",
134
+ year="2020"
65
135
  }
66
- ```
136
+ ```
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: midas-edge
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-02-19 00:00:00.000000000 Z
11
+ date: 2020-06-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -112,14 +112,6 @@ files:
112
112
  - lib/midas/version.rb
113
113
  - vendor/MIDAS/LICENSE
114
114
  - vendor/MIDAS/README.md
115
- - vendor/MIDAS/anom.cpp
116
- - vendor/MIDAS/anom.hpp
117
- - vendor/MIDAS/argparse.hpp
118
- - vendor/MIDAS/edgehash.cpp
119
- - vendor/MIDAS/edgehash.hpp
120
- - vendor/MIDAS/main.cpp
121
- - vendor/MIDAS/nodehash.cpp
122
- - vendor/MIDAS/nodehash.hpp
123
115
  homepage: https://github.com/ankane/midas
124
116
  licenses:
125
117
  - MIT
@@ -1,88 +0,0 @@
1
- #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
2
- #define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
3
-
4
- #include <iostream>
5
- #include <math.h>
6
- #include <algorithm>
7
- #include <vector>
8
- #include "anom.hpp"
9
- #include "edgehash.hpp"
10
- #include "nodehash.hpp"
11
-
12
- vector<double>* midas(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets)
13
- {
14
- int m = *max_element(src.begin(), src.end());
15
- Edgehash cur_count(num_rows, num_buckets, m);
16
- Edgehash total_count(num_rows, num_buckets, m);
17
- vector<double>* anom_score = new vector<double>(src.size());
18
- int cur_t = 1, size = src.size(), cur_src, cur_dst;
19
- double cur_mean, sqerr, cur_score;
20
- for (int i = 0; i < size; i++) {
21
-
22
- if (i == 0 || times[i] > cur_t) {
23
- cur_count.clear();
24
- cur_t = times[i];
25
- }
26
-
27
- cur_src = src[i];
28
- cur_dst = dst[i];
29
- cur_count.insert(cur_src, cur_dst, 1);
30
- total_count.insert(cur_src, cur_dst, 1);
31
- cur_mean = total_count.get_count(cur_src, cur_dst) / cur_t;
32
- sqerr = pow(cur_count.get_count(cur_src, cur_dst) - cur_mean, 2);
33
- if (cur_t == 1) cur_score = 0;
34
- else cur_score = sqerr / cur_mean + sqerr / (cur_mean * (cur_t - 1));
35
- (*anom_score)[i] = cur_score;
36
- }
37
-
38
- return anom_score;
39
- }
40
-
41
- double counts_to_anom(double tot, double cur, int cur_t)
42
- {
43
- double cur_mean = tot / cur_t;
44
- double sqerr = pow(MAX(0, cur - cur_mean), 2);
45
- return sqerr / cur_mean + sqerr / (cur_mean * MAX(1, cur_t - 1));
46
- }
47
-
48
- vector<double>* midasR(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets, double factor)
49
- {
50
- int m = *max_element(src.begin(), src.end());
51
- Edgehash cur_count(num_rows, num_buckets, m);
52
- Edgehash total_count(num_rows, num_buckets, m);
53
- Nodehash src_score(num_rows, num_buckets);
54
- Nodehash dst_score(num_rows, num_buckets);
55
- Nodehash src_total(num_rows, num_buckets);
56
- Nodehash dst_total(num_rows, num_buckets);
57
- vector<double>* anom_score = new vector<double>(src.size());
58
- int cur_t = 1, size = src.size(), cur_src, cur_dst;
59
- double cur_score, cur_score_src, cur_score_dst, combined_score;
60
-
61
- for (int i = 0; i < size; i++) {
62
-
63
- if (i == 0 || times[i] > cur_t) {
64
- cur_count.lower(factor);
65
- src_score.lower(factor);
66
- dst_score.lower(factor);
67
- cur_t = times[i];
68
- }
69
-
70
- cur_src = src[i];
71
- cur_dst = dst[i];
72
- cur_count.insert(cur_src, cur_dst, 1);
73
- total_count.insert(cur_src, cur_dst, 1);
74
- src_score.insert(cur_src, 1);
75
- dst_score.insert(cur_dst, 1);
76
- src_total.insert(cur_src, 1);
77
- dst_total.insert(cur_dst, 1);
78
- cur_score = counts_to_anom(total_count.get_count(cur_src, cur_dst), cur_count.get_count(cur_src, cur_dst), cur_t);
79
- cur_score_src = counts_to_anom(src_total.get_count(cur_src), src_score.get_count(cur_src), cur_t);
80
- cur_score_dst = counts_to_anom(dst_total.get_count(cur_dst), dst_score.get_count(cur_dst), cur_t);
81
- //combined_score = MAX(cur_score_src, cur_score_dst) + cur_score;
82
- //combined_score = cur_score_src + cur_score_dst + cur_score;
83
- combined_score = MAX(MAX(cur_score_src, cur_score_dst), cur_score);
84
- (*anom_score)[i] = log(1 + combined_score);
85
- }
86
-
87
- return anom_score;
88
- }