midas-edge 0.2.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/NOTICE.txt +2 -1
- data/README.md +2 -3
- data/ext/midas/ext.cpp +49 -36
- data/ext/midas/extconf.rb +7 -2
- data/ext/midas/numo.hpp +867 -0
- data/lib/midas/detector.rb +1 -12
- data/lib/midas/version.rb +1 -1
- data/vendor/MIDAS/README.md +107 -32
- data/vendor/MIDAS/src/{EdgeHash.hpp → CountMinSketch.hpp} +12 -11
- data/vendor/MIDAS/src/FilteringCore.hpp +29 -26
- data/vendor/MIDAS/src/NormalCore.hpp +6 -6
- data/vendor/MIDAS/src/RelationalCore.hpp +11 -13
- metadata +13 -69
- data/vendor/MIDAS/src/NodeHash.hpp +0 -104
data/lib/midas/detector.rb
CHANGED
@@ -11,18 +11,7 @@ module Midas
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def fit_predict(x)
|
14
|
-
|
15
|
-
result =
|
16
|
-
if x.is_a?(String)
|
17
|
-
_fit_predict_file(x, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
|
18
|
-
else
|
19
|
-
x = Numo::Int32.cast(x) unless x.is_a?(Numo::NArray)
|
20
|
-
x = x.cast_to(Numo::Int32) unless x.is_a?(Numo::Int32)
|
21
|
-
raise ArgumentError, "Bad shape: #{x.shape}" unless x.rank == 2 && x.shape[1] == 3
|
22
|
-
_fit_predict_str(x.to_binary, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
|
23
|
-
end
|
24
|
-
|
25
|
-
Numo::SFloat.from_binary(result)
|
14
|
+
_fit_predict(x, @rows, @buckets, @alpha, @threshold || Float::NAN, @relations, @directed, @seed)
|
26
15
|
end
|
27
16
|
end
|
28
17
|
end
|
data/lib/midas/version.rb
CHANGED
data/vendor/MIDAS/README.md
CHANGED
@@ -4,15 +4,15 @@
|
|
4
4
|
<a href="https://aaai.org/Conferences/AAAI-20/">
|
5
5
|
<img src="http://img.shields.io/badge/AAAI-2020-red.svg">
|
6
6
|
</a>
|
7
|
-
<a href="https://
|
7
|
+
<a href="https://arxiv.org/pdf/2009.08452.pdf"><img src="http://img.shields.io/badge/Paper-PDF-brightgreen.svg"></a>
|
8
8
|
<a href="https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasslides.pdf">
|
9
9
|
<img src="http://img.shields.io/badge/Slides-PDF-ff9e18.svg">
|
10
10
|
</a>
|
11
11
|
<a href="https://youtu.be/Bd4PyLCHrto">
|
12
12
|
<img src="http://img.shields.io/badge/Talk-Youtube-ff69b4.svg">
|
13
13
|
</a>
|
14
|
-
<a href="https://www.
|
15
|
-
<img src="https://img.shields.io/badge/
|
14
|
+
<a href="https://www.youtube.com/watch?v=DPmN-uPW8qU">
|
15
|
+
<img src="https://img.shields.io/badge/Overview-Youtube-orange.svg">
|
16
16
|
</a>
|
17
17
|
<a href="https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE">
|
18
18
|
<img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg">
|
@@ -21,8 +21,8 @@
|
|
21
21
|
|
22
22
|
C++ implementation of
|
23
23
|
|
24
|
-
- Real-time Streaming Anomaly Detection in Dynamic Graphs. *Siddharth Bhatia, Rui Liu, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. (Under Review)
|
25
|
-
- [MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams](
|
24
|
+
- [Real-time Streaming Anomaly Detection in Dynamic Graphs](https://arxiv.org/pdf/2009.08452.pdf). *Siddharth Bhatia, Rui Liu, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. (Under Review)
|
25
|
+
- [MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams](https://arxiv.org/pdf/1911.04464.pdf). *Siddharth Bhatia, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. AAAI 2020.
|
26
26
|
|
27
27
|
The old implementation is in another branch `OldImplementation`, it should be considered as being archived and will hardly receive feature updates.
|
28
28
|
|
@@ -30,13 +30,20 @@ The old implementation is in another branch `OldImplementation`, it should be co
|
|
30
30
|
|
31
31
|
## Table of Contents
|
32
32
|
|
33
|
+
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
|
34
|
+
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
35
|
+
|
36
|
+
|
33
37
|
- [Features](#features)
|
34
38
|
- [Demo](#demo)
|
35
39
|
- [Customization](#customization)
|
36
|
-
- [
|
37
|
-
- [
|
40
|
+
- [Other Files](#other-files)
|
41
|
+
- [In Other Languages](#in-other-languages)
|
42
|
+
- [Online Coverage](#online-coverage)
|
38
43
|
- [Citation](#citation)
|
39
44
|
|
45
|
+
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
|
46
|
+
|
40
47
|
## Features
|
41
48
|
|
42
49
|
- Finds Anomalies in Dynamic/Time-Evolving Graph: (Intrusion Detection, Fake Ratings, Financial Fraud)
|
@@ -45,7 +52,7 @@ The old implementation is in another branch `OldImplementation`, it should be co
|
|
45
52
|
- Constant Memory (independent of graph size)
|
46
53
|
- Constant Update Time (real-time anomaly detection to minimize harm)
|
47
54
|
- Up to 55% more accurate and 929 times faster than the state of the art approaches
|
48
|
-
-
|
55
|
+
- Experiments are performed using the following datasets:
|
49
56
|
- [DARPA](https://www.ll.mit.edu/r-d/datasets/1998-darpa-intrusion-detection-evaluation-dataset)
|
50
57
|
- [TwitterWorldCup2014](http://odds.cs.stonybrook.edu/twitterworldcup2014-dataset)
|
51
58
|
- [TwitterSecurity](http://odds.cs.stonybrook.edu/twittersecurity-dataset)
|
@@ -56,31 +63,51 @@ If you use Windows:
|
|
56
63
|
|
57
64
|
1. Open a Visual Studio developer command prompt, we want their toolchain
|
58
65
|
1. `cd` to the project root `MIDAS/`
|
59
|
-
1. `cmake -DCMAKE_BUILD_TYPE=Release -
|
66
|
+
1. `cmake -DCMAKE_BUILD_TYPE=Release -GNinja -S . -B build/release`
|
60
67
|
1. `cmake --build build/release --target Demo`
|
61
|
-
1. `cd` to `MIDAS/build/release
|
68
|
+
1. `cd` to `MIDAS/build/release/`
|
62
69
|
1. `.\Demo.exe`
|
63
70
|
|
64
|
-
If you use Linux/macOS
|
71
|
+
If you use Linux/macOS:
|
65
72
|
|
66
73
|
1. Open a terminal
|
67
74
|
1. `cd` to the project root `MIDAS/`
|
68
75
|
1. `cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/release`
|
69
76
|
1. `cmake --build build/release --target Demo`
|
70
|
-
1. `cd` to `MIDAS/build/release
|
77
|
+
1. `cd` to `MIDAS/build/release/`
|
71
78
|
1. `./Demo`
|
72
79
|
|
73
|
-
The demo runs on `MIDAS/data/DARPA/darpa_processed.csv`, which has 4.5M records, with the filtering core.
|
80
|
+
The demo runs on `MIDAS/data/DARPA/darpa_processed.csv`, which has 4.5M records, with the filtering core (MIDAS-F).
|
74
81
|
|
75
82
|
The scores will be exported to `MIDAS/temp/Score.txt`, higher means more anomalous.
|
76
83
|
|
77
|
-
All file paths are absolute and "hardcoded" by CMake, but it's suggested NOT to run by double
|
84
|
+
All file paths are absolute and "hardcoded" by CMake, but it's suggested NOT to run by double clicking on the executable file.
|
85
|
+
|
86
|
+
### Requirements
|
87
|
+
|
88
|
+
Core
|
89
|
+
- C++11
|
90
|
+
- C++ standard libraries
|
91
|
+
|
92
|
+
Demo
|
93
|
+
- Python 3 (`MIDAS/util/EvaluateScore.py`)
|
94
|
+
- `pandas`: I/O
|
95
|
+
- `scikit-learn`: Compute ROC-AUC
|
96
|
+
|
97
|
+
Experiment
|
98
|
+
- (Optional) Intel TBB: Parallelization
|
99
|
+
- (Optional) OpenMP: Parallelization
|
100
|
+
|
101
|
+
Other python utility scripts
|
102
|
+
- Python 3
|
103
|
+
- `pandas`
|
104
|
+
- `scikit-learn`
|
78
105
|
|
79
106
|
## Customization
|
80
107
|
|
81
108
|
### Switch Cores
|
82
109
|
|
83
|
-
Cores are instantiated at `MIDAS/example/Demo.cpp:
|
110
|
+
Cores are instantiated at `MIDAS/example/Demo.cpp:67-69`, uncomment the chosen one.
|
84
111
|
|
85
112
|
### Custom Dataset + `Demo.cpp`
|
86
113
|
|
@@ -89,48 +116,96 @@ You need to prepare three files:
|
|
89
116
|
- Meta file
|
90
117
|
- Only includes an integer `N`, the number of records in the dataset
|
91
118
|
- Use its path for `pathMeta`
|
119
|
+
- E.g. `MIDAS/data/DARPA/darpa_shape.txt`
|
92
120
|
- Data file
|
93
121
|
- A header-less csv format file of shape `[N,3]`
|
94
122
|
- Columns are sources, destinations, timestamps
|
95
123
|
- Use its path for `pathData`
|
124
|
+
- E.g. `MIDAS/data/DARPA/darpa_processed.csv`
|
96
125
|
- Label file
|
97
126
|
- A header-less csv format file of shape `[N,1]`
|
98
127
|
- The corresponding label for data records
|
99
128
|
- 0 means normal record
|
100
129
|
- 1 means anomalous record
|
101
|
-
- Use its path for `pathGroundTruth`
|
130
|
+
- Use its path for `pathGroundTruth`
|
131
|
+
- E.g. `MIDAS/data/DARPA/darpa_ground_truth.csv`
|
102
132
|
|
103
133
|
### Custom Dataset + Custom Runner
|
104
134
|
|
105
|
-
1. Include the header `MIDAS/
|
135
|
+
1. Include the header `MIDAS/src/NormalCore.hpp`, `MIDAS/src/RelationalCore.hpp` or `MIDAS/src/FilteringCore.hpp`
|
106
136
|
1. Instantiate cores with required parameters
|
107
|
-
1. Call `operator()` on individual data records, it returns the anomaly score for the input record
|
137
|
+
1. Call `operator()` on individual data records, it returns the anomaly score for the input record
|
138
|
+
|
139
|
+
## Other Files
|
140
|
+
|
141
|
+
### `example/`
|
142
|
+
|
143
|
+
#### `Experiment.cpp`
|
144
|
+
|
145
|
+
The code we used for experiments.
|
146
|
+
It will try to use Intel TBB or OpenMP for parallelization.
|
147
|
+
You should comment all but only one runner function call in the `main()` as most results are exported to `MIDAS/temp/Experiiment.csv` together with many intermediate files.
|
148
|
+
|
149
|
+
#### `Reproducible.cpp`
|
108
150
|
|
109
|
-
|
151
|
+
Similar to `Demo.cpp`, but with all random parameters hardcoded and always produce the same result.
|
152
|
+
It's for other developers and us to test if the implementation in other languages can produce acceptable results.
|
110
153
|
|
111
|
-
|
112
|
-
2. Towards Data Science: [Controlling Fake News using Graphs and Statistics](https://towardsdatascience.com/controlling-fake-news-using-graphs-and-statistics-31ed116a986f)
|
113
|
-
2. Towards Data Science: [Anomaly detection in dynamic graphs using MIDAS](https://towardsdatascience.com/anomaly-detection-in-dynamic-graphs-using-midas-e4f8d0b1db45)
|
114
|
-
4. Towards AI: [Anomaly Detection with MIDAS](https://medium.com/towards-artificial-intelligence/anomaly-detection-with-midas-2735a2e6dce8)
|
115
|
-
5. [AIhub Interview](https://aihub.org/2020/05/01/interview-with-siddharth-bhatia-a-new-approach-for-anomaly-detection/)
|
154
|
+
### `util/`
|
116
155
|
|
117
|
-
|
156
|
+
`DeleteTempFile.py`, `EvaluateScore.py` and `ReproduceROC.py` will show their usage and a short description when executed without any argument.
|
118
157
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
158
|
+
#### `PreprocessData.py`
|
159
|
+
|
160
|
+
The code to process the raw dataset into an easy-to-read format.
|
161
|
+
Datasets are always assumed to be in a folder in `MIDAS/data/`.
|
162
|
+
It can process the following dataset(s)
|
163
|
+
|
164
|
+
- `DARPA/darpa_original.csv` -> `DARPA/darpa_processed.csv`, `DARPA/darpa_ground_truth.csv`, `DARPA/darpa_shape.txt`
|
165
|
+
|
166
|
+
## In Other Languages
|
167
|
+
|
168
|
+
1. Python: [Rui Liu's MIDAS.Python](https://github.com/liurui39660/MIDAS.Python), [Ritesh Kumar's pyMIDAS](https://github.com/ritesh99rakesh/pyMIDAS)
|
169
|
+
1. Golang: [Steve Tan's midas](https://github.com/steve0hh/midas)
|
170
|
+
1. Ruby: [Andrew Kane's midas](https://github.com/ankane/midas)
|
171
|
+
1. Rust: [Scott Steele's midas_rs](https://github.com/scooter-dangle/midas_rs)
|
172
|
+
1. R: [Tobias Heidler's MIDASwrappeR](https://github.com/pteridin/MIDASwrappeR)
|
173
|
+
1. Java: [Joshua Tokle's MIDAS-Java](https://github.com/jotok/MIDAS-Java)
|
174
|
+
1. Julia: [Ashrya Agrawal's MIDAS.jl](https://github.com/ashryaagr/MIDAS.jl)
|
175
|
+
|
176
|
+
## Online Coverage
|
177
|
+
|
178
|
+
1. [ACM TechNews](https://technews.acm.org/archives.cfm?fo=2020-05-may/may-06-2020.html)
|
179
|
+
1. [AIhub](https://aihub.org/2020/05/01/interview-with-siddharth-bhatia-a-new-approach-for-anomaly-detection/)
|
180
|
+
1. [Hacker News](https://news.ycombinator.com/item?id=22802604)
|
181
|
+
1. [KDnuggets](https://www.kdnuggets.com/2020/04/midas-new-baseline-anomaly-detection-graphs.html)
|
182
|
+
1. [Microsoft](https://techcommunity.microsoft.com/t5/azure-sentinel/announcing-the-azure-sentinel-hackathon-winners/ba-p/1548240)
|
183
|
+
1. [Towards Data Science](https://towardsdatascience.com/controlling-fake-news-using-graphs-and-statistics-31ed116a986f)
|
124
184
|
|
125
185
|
## Citation
|
126
186
|
|
127
|
-
If you use this code for your research, please consider citing our
|
187
|
+
If you use this code for your research, please consider citing our arXiv preprint
|
188
|
+
|
189
|
+
```bibtex
|
190
|
+
@misc{bhatia2020realtime,
|
191
|
+
title={Real-Time Streaming Anomaly Detection in Dynamic Graphs},
|
192
|
+
author={Siddharth Bhatia and Rui Liu and Bryan Hooi and Minji Yoon and Kijung Shin and Christos Faloutsos},
|
193
|
+
year={2020},
|
194
|
+
eprint={2009.08452},
|
195
|
+
archivePrefix={arXiv},
|
196
|
+
primaryClass={cs.LG}
|
197
|
+
}
|
128
198
|
|
129
199
|
```
|
200
|
+
|
201
|
+
or our AAAI paper
|
202
|
+
|
203
|
+
|
204
|
+
```bibtex
|
130
205
|
@inproceedings{bhatia2020midas,
|
131
206
|
title="MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams",
|
132
207
|
author="Siddharth {Bhatia} and Bryan {Hooi} and Minji {Yoon} and Kijung {Shin} and Christos {Faloutsos}",
|
133
208
|
booktitle="AAAI 2020 : The Thirty-Fourth AAAI Conference on Artificial Intelligence",
|
134
209
|
year="2020"
|
135
210
|
}
|
136
|
-
```
|
211
|
+
```
|
@@ -19,7 +19,7 @@
|
|
19
19
|
#include <algorithm>
|
20
20
|
|
21
21
|
namespace MIDAS {
|
22
|
-
struct
|
22
|
+
struct CountMinSketch {
|
23
23
|
// Fields
|
24
24
|
// --------------------------------------------------------------------------------
|
25
25
|
|
@@ -33,10 +33,10 @@ struct EdgeHash {
|
|
33
33
|
// Methods
|
34
34
|
// --------------------------------------------------------------------------------
|
35
35
|
|
36
|
-
|
37
|
-
|
36
|
+
CountMinSketch() = delete;
|
37
|
+
CountMinSketch& operator=(const CountMinSketch& b) = delete;
|
38
38
|
|
39
|
-
|
39
|
+
CountMinSketch(int numRow, int numColumn):
|
40
40
|
r(numRow),
|
41
41
|
c(numColumn),
|
42
42
|
lenData(r * c),
|
@@ -50,7 +50,7 @@ struct EdgeHash {
|
|
50
50
|
std::fill(data, data + lenData, 0);
|
51
51
|
}
|
52
52
|
|
53
|
-
|
53
|
+
CountMinSketch(const CountMinSketch& b):
|
54
54
|
r(b.r),
|
55
55
|
c(b.c),
|
56
56
|
lenData(b.lenData),
|
@@ -62,7 +62,7 @@ struct EdgeHash {
|
|
62
62
|
std::copy(b.data, b.data + lenData, data);
|
63
63
|
}
|
64
64
|
|
65
|
-
~
|
65
|
+
~CountMinSketch() {
|
66
66
|
delete[] param1;
|
67
67
|
delete[] param2;
|
68
68
|
delete[] data;
|
@@ -73,10 +73,11 @@ struct EdgeHash {
|
|
73
73
|
}
|
74
74
|
|
75
75
|
void MultiplyAll(float by) const {
|
76
|
-
|
76
|
+
for (int i = 0, I = lenData; i < I; i++) // Vectorization
|
77
|
+
data[i] *= by;
|
77
78
|
}
|
78
79
|
|
79
|
-
void Hash(int
|
80
|
+
void Hash(int* indexOut, int a, int b = 0) const {
|
80
81
|
for (int i = 0; i < r; i++) {
|
81
82
|
indexOut[i] = ((a + m * b) * param1[i] + param2[i]) % c;
|
82
83
|
indexOut[i] += i * c + (indexOut[i] < 0 ? c : 0);
|
@@ -90,10 +91,10 @@ struct EdgeHash {
|
|
90
91
|
return least;
|
91
92
|
}
|
92
93
|
|
93
|
-
float Assign(const int* index, float
|
94
|
+
float Assign(const int* index, float with) const {
|
94
95
|
for (int i = 0; i < r; i++)
|
95
|
-
data[index[i]] =
|
96
|
-
return
|
96
|
+
data[index[i]] = with;
|
97
|
+
return with;
|
97
98
|
}
|
98
99
|
|
99
100
|
void Add(const int* index, float by = 1) const {
|
@@ -17,26 +17,28 @@
|
|
17
17
|
#pragma once
|
18
18
|
|
19
19
|
#include <cmath>
|
20
|
-
#include <algorithm>
|
21
20
|
|
22
|
-
#include "
|
23
|
-
#include "NodeHash.hpp"
|
21
|
+
#include "CountMinSketch.hpp"
|
24
22
|
|
25
23
|
namespace MIDAS {
|
26
24
|
struct FilteringCore {
|
27
25
|
const float threshold;
|
28
|
-
int
|
26
|
+
int timestamp = 1;
|
29
27
|
const float factor;
|
30
|
-
int
|
28
|
+
const int lenData;
|
29
|
+
int* const indexEdge; // Pre-compute the index to-be-modified, thanks to the Same-Layout Assumption
|
31
30
|
int* const indexSource;
|
32
31
|
int* const indexDestination;
|
33
|
-
|
34
|
-
|
35
|
-
|
32
|
+
CountMinSketch numCurrentEdge, numTotalEdge, scoreEdge;
|
33
|
+
CountMinSketch numCurrentSource, numTotalSource, scoreSource;
|
34
|
+
CountMinSketch numCurrentDestination, numTotalDestination, scoreDestination;
|
35
|
+
float timestampReciprocal = 0;
|
36
|
+
bool* const shouldMerge;
|
36
37
|
|
37
38
|
FilteringCore(int numRow, int numColumn, float threshold, float factor = 0.5):
|
38
39
|
threshold(threshold),
|
39
40
|
factor(factor),
|
41
|
+
lenData(numRow * numColumn), // I assume all CMSs have same size, but Same-Layout Assumption is not that strict
|
40
42
|
indexEdge(new int[numRow]),
|
41
43
|
indexSource(new int[numRow]),
|
42
44
|
indexDestination(new int[numRow]),
|
@@ -48,42 +50,43 @@ struct FilteringCore {
|
|
48
50
|
scoreSource(numCurrentSource),
|
49
51
|
numCurrentDestination(numRow, numColumn),
|
50
52
|
numTotalDestination(numCurrentDestination),
|
51
|
-
scoreDestination(numCurrentDestination)
|
53
|
+
scoreDestination(numCurrentDestination),
|
54
|
+
shouldMerge(new bool[numRow * numColumn]) { }
|
52
55
|
|
53
56
|
virtual ~FilteringCore() {
|
54
57
|
delete[] indexEdge;
|
55
58
|
delete[] indexSource;
|
56
59
|
delete[] indexDestination;
|
60
|
+
delete[] shouldMerge;
|
57
61
|
}
|
58
62
|
|
59
63
|
static float ComputeScore(float a, float s, float t) {
|
60
64
|
return s == 0 ? 0 : pow(a + s - a * t, 2) / (s * (t - 1)); // If t == 1, then s == 0, so no need to check twice
|
61
65
|
}
|
62
66
|
|
67
|
+
void ConditionalMerge(const float* current, float* total, const float* score) const {
|
68
|
+
for (int i = 0; i < lenData; i++)
|
69
|
+
shouldMerge[i] = score[i] < threshold;
|
70
|
+
for (int i = 0, I = lenData; i < I; i++) // Vectorization
|
71
|
+
total[i] += shouldMerge[i] * current[i] + (true - shouldMerge[i]) * total[i] * timestampReciprocal;
|
72
|
+
}
|
73
|
+
|
63
74
|
float operator()(int source, int destination, int timestamp) {
|
64
|
-
if (timestamp
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
numTotalEdge.data[i] / (timestampCurrent - 1) : 0;
|
69
|
-
for (int i = 0; i < numCurrentSource.lenData; i++)
|
70
|
-
numTotalSource.data[i] += scoreSource.data[i] < threshold ?
|
71
|
-
numCurrentSource.data[i] : timestampCurrent - 1 ?
|
72
|
-
numTotalSource.data[i] / (timestampCurrent - 1) : 0;
|
73
|
-
for (int i = 0; i < numCurrentDestination.lenData; i++)
|
74
|
-
numTotalDestination.data[i] += scoreDestination.data[i] < threshold ?
|
75
|
-
numCurrentDestination.data[i] : timestampCurrent - 1 ?
|
76
|
-
numTotalDestination.data[i] / (timestampCurrent - 1) : 0;
|
75
|
+
if (this->timestamp < timestamp) {
|
76
|
+
ConditionalMerge(numCurrentEdge.data, numTotalEdge.data, scoreEdge.data);
|
77
|
+
ConditionalMerge(numCurrentSource.data, numTotalSource.data, scoreSource.data);
|
78
|
+
ConditionalMerge(numCurrentDestination.data, numTotalDestination.data, scoreDestination.data);
|
77
79
|
numCurrentEdge.MultiplyAll(factor);
|
78
80
|
numCurrentSource.MultiplyAll(factor);
|
79
81
|
numCurrentDestination.MultiplyAll(factor);
|
80
|
-
|
82
|
+
timestampReciprocal = 1.f / (timestamp - 1); // So I can skip an if-statement
|
83
|
+
this->timestamp = timestamp;
|
81
84
|
}
|
82
|
-
numCurrentEdge.Hash(source, destination
|
85
|
+
numCurrentEdge.Hash(indexEdge, source, destination);
|
83
86
|
numCurrentEdge.Add(indexEdge);
|
84
|
-
numCurrentSource.Hash(
|
87
|
+
numCurrentSource.Hash(indexSource, source);
|
85
88
|
numCurrentSource.Add(indexSource);
|
86
|
-
numCurrentDestination.Hash(
|
89
|
+
numCurrentDestination.Hash(indexDestination, destination);
|
87
90
|
numCurrentDestination.Add(indexDestination);
|
88
91
|
return std::max({
|
89
92
|
scoreEdge.Assign(indexEdge, ComputeScore(numCurrentEdge(indexEdge), numTotalEdge(indexEdge), timestamp)),
|
@@ -18,13 +18,13 @@
|
|
18
18
|
|
19
19
|
#include <cmath>
|
20
20
|
|
21
|
-
#include "
|
21
|
+
#include "CountMinSketch.hpp"
|
22
22
|
|
23
23
|
namespace MIDAS {
|
24
24
|
struct NormalCore {
|
25
|
-
int
|
25
|
+
int timestamp = 1;
|
26
26
|
int* const index; // Pre-compute the index to-be-modified, thanks to the same structure of CMSs
|
27
|
-
|
27
|
+
CountMinSketch numCurrent, numTotal;
|
28
28
|
|
29
29
|
NormalCore(int numRow, int numColumn):
|
30
30
|
index(new int[numRow]),
|
@@ -40,11 +40,11 @@ struct NormalCore {
|
|
40
40
|
}
|
41
41
|
|
42
42
|
float operator()(int source, int destination, int timestamp) {
|
43
|
-
if (timestamp
|
43
|
+
if (this->timestamp < timestamp) {
|
44
44
|
numCurrent.ClearAll();
|
45
|
-
|
45
|
+
this->timestamp = timestamp;
|
46
46
|
}
|
47
|
-
numCurrent.Hash(source, destination
|
47
|
+
numCurrent.Hash(index, source, destination);
|
48
48
|
numCurrent.Add(index);
|
49
49
|
numTotal.Add(index);
|
50
50
|
return ComputeScore(numCurrent(index), numTotal(index), timestamp);
|