midas-edge 0.2.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/NOTICE.txt +2 -1
- data/README.md +2 -3
- data/ext/midas/ext.cpp +49 -36
- data/ext/midas/extconf.rb +6 -2
- data/ext/midas/numo.hpp +867 -0
- data/lib/midas/detector.rb +1 -12
- data/lib/midas/version.rb +1 -1
- data/vendor/MIDAS/README.md +107 -32
- data/vendor/MIDAS/src/CountMinSketch.hpp +105 -0
- data/vendor/MIDAS/src/FilteringCore.hpp +98 -0
- data/vendor/MIDAS/src/NormalCore.hpp +53 -0
- data/vendor/MIDAS/src/RelationalCore.hpp +79 -0
- metadata +16 -67
data/lib/midas/detector.rb
CHANGED
@@ -11,18 +11,7 @@ module Midas
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def fit_predict(x)
|
14
|
-
|
15
|
-
result =
|
16
|
-
if x.is_a?(String)
|
17
|
-
_fit_predict_file(x, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
|
18
|
-
else
|
19
|
-
x = Numo::Int32.cast(x) unless x.is_a?(Numo::NArray)
|
20
|
-
x = x.cast_to(Numo::Int32) unless x.is_a?(Numo::Int32)
|
21
|
-
raise ArgumentError, "Bad shape: #{x.shape}" unless x.rank == 2 && x.shape[1] == 3
|
22
|
-
_fit_predict_str(x.to_binary, @rows, @buckets, @alpha, threshold, @relations, @directed, @seed)
|
23
|
-
end
|
24
|
-
|
25
|
-
Numo::SFloat.from_binary(result)
|
14
|
+
_fit_predict(x, @rows, @buckets, @alpha, @threshold || Float::NAN, @relations, @directed, @seed)
|
26
15
|
end
|
27
16
|
end
|
28
17
|
end
|
data/lib/midas/version.rb
CHANGED
data/vendor/MIDAS/README.md
CHANGED
@@ -4,15 +4,15 @@
|
|
4
4
|
<a href="https://aaai.org/Conferences/AAAI-20/">
|
5
5
|
<img src="http://img.shields.io/badge/AAAI-2020-red.svg">
|
6
6
|
</a>
|
7
|
-
<a href="https://
|
7
|
+
<a href="https://arxiv.org/pdf/2009.08452.pdf"><img src="http://img.shields.io/badge/Paper-PDF-brightgreen.svg"></a>
|
8
8
|
<a href="https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasslides.pdf">
|
9
9
|
<img src="http://img.shields.io/badge/Slides-PDF-ff9e18.svg">
|
10
10
|
</a>
|
11
11
|
<a href="https://youtu.be/Bd4PyLCHrto">
|
12
12
|
<img src="http://img.shields.io/badge/Talk-Youtube-ff69b4.svg">
|
13
13
|
</a>
|
14
|
-
<a href="https://www.
|
15
|
-
<img src="https://img.shields.io/badge/
|
14
|
+
<a href="https://www.youtube.com/watch?v=DPmN-uPW8qU">
|
15
|
+
<img src="https://img.shields.io/badge/Overview-Youtube-orange.svg">
|
16
16
|
</a>
|
17
17
|
<a href="https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE">
|
18
18
|
<img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg">
|
@@ -21,8 +21,8 @@
|
|
21
21
|
|
22
22
|
C++ implementation of
|
23
23
|
|
24
|
-
- Real-time Streaming Anomaly Detection in Dynamic Graphs. *Siddharth Bhatia, Rui Liu, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. (Under Review)
|
25
|
-
- [MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams](
|
24
|
+
- [Real-time Streaming Anomaly Detection in Dynamic Graphs](https://arxiv.org/pdf/2009.08452.pdf). *Siddharth Bhatia, Rui Liu, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. (Under Review)
|
25
|
+
- [MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams](https://arxiv.org/pdf/1911.04464.pdf). *Siddharth Bhatia, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. AAAI 2020.
|
26
26
|
|
27
27
|
The old implementation is in another branch `OldImplementation`, it should be considered as being archived and will hardly receive feature updates.
|
28
28
|
|
@@ -30,13 +30,20 @@ The old implementation is in another branch `OldImplementation`, it should be co
|
|
30
30
|
|
31
31
|
## Table of Contents
|
32
32
|
|
33
|
+
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
|
34
|
+
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
35
|
+
|
36
|
+
|
33
37
|
- [Features](#features)
|
34
38
|
- [Demo](#demo)
|
35
39
|
- [Customization](#customization)
|
36
|
-
- [
|
37
|
-
- [
|
40
|
+
- [Other Files](#other-files)
|
41
|
+
- [In Other Languages](#in-other-languages)
|
42
|
+
- [Online Coverage](#online-coverage)
|
38
43
|
- [Citation](#citation)
|
39
44
|
|
45
|
+
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
|
46
|
+
|
40
47
|
## Features
|
41
48
|
|
42
49
|
- Finds Anomalies in Dynamic/Time-Evolving Graph: (Intrusion Detection, Fake Ratings, Financial Fraud)
|
@@ -45,7 +52,7 @@ The old implementation is in another branch `OldImplementation`, it should be co
|
|
45
52
|
- Constant Memory (independent of graph size)
|
46
53
|
- Constant Update Time (real-time anomaly detection to minimize harm)
|
47
54
|
- Up to 55% more accurate and 929 times faster than the state of the art approaches
|
48
|
-
-
|
55
|
+
- Experiments are performed using the following datasets:
|
49
56
|
- [DARPA](https://www.ll.mit.edu/r-d/datasets/1998-darpa-intrusion-detection-evaluation-dataset)
|
50
57
|
- [TwitterWorldCup2014](http://odds.cs.stonybrook.edu/twitterworldcup2014-dataset)
|
51
58
|
- [TwitterSecurity](http://odds.cs.stonybrook.edu/twittersecurity-dataset)
|
@@ -56,31 +63,51 @@ If you use Windows:
|
|
56
63
|
|
57
64
|
1. Open a Visual Studio developer command prompt, we want their toolchain
|
58
65
|
1. `cd` to the project root `MIDAS/`
|
59
|
-
1. `cmake -DCMAKE_BUILD_TYPE=Release -
|
66
|
+
1. `cmake -DCMAKE_BUILD_TYPE=Release -GNinja -S . -B build/release`
|
60
67
|
1. `cmake --build build/release --target Demo`
|
61
|
-
1. `cd` to `MIDAS/build/release
|
68
|
+
1. `cd` to `MIDAS/build/release/`
|
62
69
|
1. `.\Demo.exe`
|
63
70
|
|
64
|
-
If you use Linux/macOS
|
71
|
+
If you use Linux/macOS:
|
65
72
|
|
66
73
|
1. Open a terminal
|
67
74
|
1. `cd` to the project root `MIDAS/`
|
68
75
|
1. `cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/release`
|
69
76
|
1. `cmake --build build/release --target Demo`
|
70
|
-
1. `cd` to `MIDAS/build/release
|
77
|
+
1. `cd` to `MIDAS/build/release/`
|
71
78
|
1. `./Demo`
|
72
79
|
|
73
|
-
The demo runs on `MIDAS/data/DARPA/darpa_processed.csv`, which has 4.5M records, with the filtering core.
|
80
|
+
The demo runs on `MIDAS/data/DARPA/darpa_processed.csv`, which has 4.5M records, with the filtering core (MIDAS-F).
|
74
81
|
|
75
82
|
The scores will be exported to `MIDAS/temp/Score.txt`, higher means more anomalous.
|
76
83
|
|
77
|
-
All file paths are absolute and "hardcoded" by CMake, but it's suggested NOT to run by double
|
84
|
+
All file paths are absolute and "hardcoded" by CMake, but it's suggested NOT to run by double clicking on the executable file.
|
85
|
+
|
86
|
+
### Requirements
|
87
|
+
|
88
|
+
Core
|
89
|
+
- C++11
|
90
|
+
- C++ standard libraries
|
91
|
+
|
92
|
+
Demo
|
93
|
+
- Python 3 (`MIDAS/util/EvaluateScore.py`)
|
94
|
+
- `pandas`: I/O
|
95
|
+
- `scikit-learn`: Compute ROC-AUC
|
96
|
+
|
97
|
+
Experiment
|
98
|
+
- (Optional) Intel TBB: Parallelization
|
99
|
+
- (Optional) OpenMP: Parallelization
|
100
|
+
|
101
|
+
Other python utility scripts
|
102
|
+
- Python 3
|
103
|
+
- `pandas`
|
104
|
+
- `scikit-learn`
|
78
105
|
|
79
106
|
## Customization
|
80
107
|
|
81
108
|
### Switch Cores
|
82
109
|
|
83
|
-
Cores are instantiated at `MIDAS/example/Demo.cpp:
|
110
|
+
Cores are instantiated at `MIDAS/example/Demo.cpp:67-69`, uncomment the chosen one.
|
84
111
|
|
85
112
|
### Custom Dataset + `Demo.cpp`
|
86
113
|
|
@@ -89,48 +116,96 @@ You need to prepare three files:
|
|
89
116
|
- Meta file
|
90
117
|
- Only includes an integer `N`, the number of records in the dataset
|
91
118
|
- Use its path for `pathMeta`
|
119
|
+
- E.g. `MIDAS/data/DARPA/darpa_shape.txt`
|
92
120
|
- Data file
|
93
121
|
- A header-less csv format file of shape `[N,3]`
|
94
122
|
- Columns are sources, destinations, timestamps
|
95
123
|
- Use its path for `pathData`
|
124
|
+
- E.g. `MIDAS/data/DARPA/darpa_processed.csv`
|
96
125
|
- Label file
|
97
126
|
- A header-less csv format file of shape `[N,1]`
|
98
127
|
- The corresponding label for data records
|
99
128
|
- 0 means normal record
|
100
129
|
- 1 means anomalous record
|
101
|
-
- Use its path for `pathGroundTruth`
|
130
|
+
- Use its path for `pathGroundTruth`
|
131
|
+
- E.g. `MIDAS/data/DARPA/darpa_ground_truth.csv`
|
102
132
|
|
103
133
|
### Custom Dataset + Custom Runner
|
104
134
|
|
105
|
-
1. Include the header `MIDAS/
|
135
|
+
1. Include the header `MIDAS/src/NormalCore.hpp`, `MIDAS/src/RelationalCore.hpp` or `MIDAS/src/FilteringCore.hpp`
|
106
136
|
1. Instantiate cores with required parameters
|
107
|
-
1. Call `operator()` on individual data records, it returns the anomaly score for the input record
|
137
|
+
1. Call `operator()` on individual data records, it returns the anomaly score for the input record
|
138
|
+
|
139
|
+
## Other Files
|
140
|
+
|
141
|
+
### `example/`
|
142
|
+
|
143
|
+
#### `Experiment.cpp`
|
144
|
+
|
145
|
+
The code we used for experiments.
|
146
|
+
It will try to use Intel TBB or OpenMP for parallelization.
|
147
|
+
You should comment all but only one runner function call in the `main()` as most results are exported to `MIDAS/temp/Experiiment.csv` together with many intermediate files.
|
148
|
+
|
149
|
+
#### `Reproducible.cpp`
|
108
150
|
|
109
|
-
|
151
|
+
Similar to `Demo.cpp`, but with all random parameters hardcoded and always produce the same result.
|
152
|
+
It's for other developers and us to test if the implementation in other languages can produce acceptable results.
|
110
153
|
|
111
|
-
|
112
|
-
2. Towards Data Science: [Controlling Fake News using Graphs and Statistics](https://towardsdatascience.com/controlling-fake-news-using-graphs-and-statistics-31ed116a986f)
|
113
|
-
2. Towards Data Science: [Anomaly detection in dynamic graphs using MIDAS](https://towardsdatascience.com/anomaly-detection-in-dynamic-graphs-using-midas-e4f8d0b1db45)
|
114
|
-
4. Towards AI: [Anomaly Detection with MIDAS](https://medium.com/towards-artificial-intelligence/anomaly-detection-with-midas-2735a2e6dce8)
|
115
|
-
5. [AIhub Interview](https://aihub.org/2020/05/01/interview-with-siddharth-bhatia-a-new-approach-for-anomaly-detection/)
|
154
|
+
### `util/`
|
116
155
|
|
117
|
-
|
156
|
+
`DeleteTempFile.py`, `EvaluateScore.py` and `ReproduceROC.py` will show their usage and a short description when executed without any argument.
|
118
157
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
158
|
+
#### `PreprocessData.py`
|
159
|
+
|
160
|
+
The code to process the raw dataset into an easy-to-read format.
|
161
|
+
Datasets are always assumed to be in a folder in `MIDAS/data/`.
|
162
|
+
It can process the following dataset(s)
|
163
|
+
|
164
|
+
- `DARPA/darpa_original.csv` -> `DARPA/darpa_processed.csv`, `DARPA/darpa_ground_truth.csv`, `DARPA/darpa_shape.txt`
|
165
|
+
|
166
|
+
## In Other Languages
|
167
|
+
|
168
|
+
1. Python: [Rui Liu's MIDAS.Python](https://github.com/liurui39660/MIDAS.Python), [Ritesh Kumar's pyMIDAS](https://github.com/ritesh99rakesh/pyMIDAS)
|
169
|
+
1. Golang: [Steve Tan's midas](https://github.com/steve0hh/midas)
|
170
|
+
1. Ruby: [Andrew Kane's midas](https://github.com/ankane/midas)
|
171
|
+
1. Rust: [Scott Steele's midas_rs](https://github.com/scooter-dangle/midas_rs)
|
172
|
+
1. R: [Tobias Heidler's MIDASwrappeR](https://github.com/pteridin/MIDASwrappeR)
|
173
|
+
1. Java: [Joshua Tokle's MIDAS-Java](https://github.com/jotok/MIDAS-Java)
|
174
|
+
1. Julia: [Ashrya Agrawal's MIDAS.jl](https://github.com/ashryaagr/MIDAS.jl)
|
175
|
+
|
176
|
+
## Online Coverage
|
177
|
+
|
178
|
+
1. [ACM TechNews](https://technews.acm.org/archives.cfm?fo=2020-05-may/may-06-2020.html)
|
179
|
+
1. [AIhub](https://aihub.org/2020/05/01/interview-with-siddharth-bhatia-a-new-approach-for-anomaly-detection/)
|
180
|
+
1. [Hacker News](https://news.ycombinator.com/item?id=22802604)
|
181
|
+
1. [KDnuggets](https://www.kdnuggets.com/2020/04/midas-new-baseline-anomaly-detection-graphs.html)
|
182
|
+
1. [Microsoft](https://techcommunity.microsoft.com/t5/azure-sentinel/announcing-the-azure-sentinel-hackathon-winners/ba-p/1548240)
|
183
|
+
1. [Towards Data Science](https://towardsdatascience.com/controlling-fake-news-using-graphs-and-statistics-31ed116a986f)
|
124
184
|
|
125
185
|
## Citation
|
126
186
|
|
127
|
-
If you use this code for your research, please consider citing our
|
187
|
+
If you use this code for your research, please consider citing our arXiv preprint
|
188
|
+
|
189
|
+
```bibtex
|
190
|
+
@misc{bhatia2020realtime,
|
191
|
+
title={Real-Time Streaming Anomaly Detection in Dynamic Graphs},
|
192
|
+
author={Siddharth Bhatia and Rui Liu and Bryan Hooi and Minji Yoon and Kijung Shin and Christos Faloutsos},
|
193
|
+
year={2020},
|
194
|
+
eprint={2009.08452},
|
195
|
+
archivePrefix={arXiv},
|
196
|
+
primaryClass={cs.LG}
|
197
|
+
}
|
128
198
|
|
129
199
|
```
|
200
|
+
|
201
|
+
or our AAAI paper
|
202
|
+
|
203
|
+
|
204
|
+
```bibtex
|
130
205
|
@inproceedings{bhatia2020midas,
|
131
206
|
title="MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams",
|
132
207
|
author="Siddharth {Bhatia} and Bryan {Hooi} and Minji {Yoon} and Kijung {Shin} and Christos {Faloutsos}",
|
133
208
|
booktitle="AAAI 2020 : The Thirty-Fourth AAAI Conference on Artificial Intelligence",
|
134
209
|
year="2020"
|
135
210
|
}
|
136
|
-
```
|
211
|
+
```
|
@@ -0,0 +1,105 @@
|
|
1
|
+
// -----------------------------------------------------------------------------
|
2
|
+
// Copyright 2020 Rui Liu (liurui39660) and Siddharth Bhatia (bhatiasiddharth)
|
3
|
+
//
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
// you may not use this file except in compliance with the License.
|
6
|
+
// You may obtain a copy of the License at
|
7
|
+
//
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
//
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
// See the License for the specific language governing permissions and
|
14
|
+
// limitations under the License.
|
15
|
+
// -----------------------------------------------------------------------------
|
16
|
+
|
17
|
+
#pragma once
|
18
|
+
|
19
|
+
#include <algorithm>
|
20
|
+
|
21
|
+
namespace MIDAS {
|
22
|
+
struct CountMinSketch {
|
23
|
+
// Fields
|
24
|
+
// --------------------------------------------------------------------------------
|
25
|
+
|
26
|
+
const int r, c, m = 104729; // Yes, a magic number, I just pick a random prime
|
27
|
+
const int lenData;
|
28
|
+
int* const param1;
|
29
|
+
int* const param2;
|
30
|
+
float* const data;
|
31
|
+
constexpr static float infinity = std::numeric_limits<float>::infinity();
|
32
|
+
|
33
|
+
// Methods
|
34
|
+
// --------------------------------------------------------------------------------
|
35
|
+
|
36
|
+
CountMinSketch() = delete;
|
37
|
+
CountMinSketch& operator=(const CountMinSketch& b) = delete;
|
38
|
+
|
39
|
+
CountMinSketch(int numRow, int numColumn):
|
40
|
+
r(numRow),
|
41
|
+
c(numColumn),
|
42
|
+
lenData(r * c),
|
43
|
+
param1(new int[r]),
|
44
|
+
param2(new int[r]),
|
45
|
+
data(new float[lenData]) {
|
46
|
+
for (int i = 0; i < r; i++) {
|
47
|
+
param1[i] = rand() + 1; // ×0 is not a good idea, see Hash()
|
48
|
+
param2[i] = rand();
|
49
|
+
}
|
50
|
+
std::fill(data, data + lenData, 0);
|
51
|
+
}
|
52
|
+
|
53
|
+
CountMinSketch(const CountMinSketch& b):
|
54
|
+
r(b.r),
|
55
|
+
c(b.c),
|
56
|
+
lenData(b.lenData),
|
57
|
+
param1(new int[r]),
|
58
|
+
param2(new int[r]),
|
59
|
+
data(new float[lenData]) {
|
60
|
+
std::copy(b.param1, b.param1 + r, param1);
|
61
|
+
std::copy(b.param2, b.param2 + r, param2);
|
62
|
+
std::copy(b.data, b.data + lenData, data);
|
63
|
+
}
|
64
|
+
|
65
|
+
~CountMinSketch() {
|
66
|
+
delete[] param1;
|
67
|
+
delete[] param2;
|
68
|
+
delete[] data;
|
69
|
+
}
|
70
|
+
|
71
|
+
void ClearAll(float with = 0) const {
|
72
|
+
std::fill(data, data + lenData, with);
|
73
|
+
}
|
74
|
+
|
75
|
+
void MultiplyAll(float by) const {
|
76
|
+
for (int i = 0, I = lenData; i < I; i++) // Vectorization
|
77
|
+
data[i] *= by;
|
78
|
+
}
|
79
|
+
|
80
|
+
void Hash(int* indexOut, int a, int b = 0) const {
|
81
|
+
for (int i = 0; i < r; i++) {
|
82
|
+
indexOut[i] = ((a + m * b) * param1[i] + param2[i]) % c;
|
83
|
+
indexOut[i] += i * c + (indexOut[i] < 0 ? c : 0);
|
84
|
+
}
|
85
|
+
}
|
86
|
+
|
87
|
+
float operator()(const int* index) const {
|
88
|
+
float least = infinity;
|
89
|
+
for (int i = 0; i < r; i++)
|
90
|
+
least = std::min(least, data[index[i]]);
|
91
|
+
return least;
|
92
|
+
}
|
93
|
+
|
94
|
+
float Assign(const int* index, float with) const {
|
95
|
+
for (int i = 0; i < r; i++)
|
96
|
+
data[index[i]] = with;
|
97
|
+
return with;
|
98
|
+
}
|
99
|
+
|
100
|
+
void Add(const int* index, float by = 1) const {
|
101
|
+
for (int i = 0; i < r; i++)
|
102
|
+
data[index[i]] += by;
|
103
|
+
}
|
104
|
+
};
|
105
|
+
}
|
@@ -0,0 +1,98 @@
|
|
1
|
+
// -----------------------------------------------------------------------------
|
2
|
+
// Copyright 2020 Rui Liu (liurui39660) and Siddharth Bhatia (bhatiasiddharth)
|
3
|
+
//
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
// you may not use this file except in compliance with the License.
|
6
|
+
// You may obtain a copy of the License at
|
7
|
+
//
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
//
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
// See the License for the specific language governing permissions and
|
14
|
+
// limitations under the License.
|
15
|
+
// -----------------------------------------------------------------------------
|
16
|
+
|
17
|
+
#pragma once
|
18
|
+
|
19
|
+
#include <cmath>
|
20
|
+
|
21
|
+
#include "CountMinSketch.hpp"
|
22
|
+
|
23
|
+
namespace MIDAS {
|
24
|
+
struct FilteringCore {
|
25
|
+
const float threshold;
|
26
|
+
int timestamp = 1;
|
27
|
+
const float factor;
|
28
|
+
const int lenData;
|
29
|
+
int* const indexEdge; // Pre-compute the index to-be-modified, thanks to the Same-Layout Assumption
|
30
|
+
int* const indexSource;
|
31
|
+
int* const indexDestination;
|
32
|
+
CountMinSketch numCurrentEdge, numTotalEdge, scoreEdge;
|
33
|
+
CountMinSketch numCurrentSource, numTotalSource, scoreSource;
|
34
|
+
CountMinSketch numCurrentDestination, numTotalDestination, scoreDestination;
|
35
|
+
float timestampReciprocal = 0;
|
36
|
+
bool* const shouldMerge;
|
37
|
+
|
38
|
+
FilteringCore(int numRow, int numColumn, float threshold, float factor = 0.5):
|
39
|
+
threshold(threshold),
|
40
|
+
factor(factor),
|
41
|
+
lenData(numRow * numColumn), // I assume all CMSs have same size, but Same-Layout Assumption is not that strict
|
42
|
+
indexEdge(new int[numRow]),
|
43
|
+
indexSource(new int[numRow]),
|
44
|
+
indexDestination(new int[numRow]),
|
45
|
+
numCurrentEdge(numRow, numColumn),
|
46
|
+
numTotalEdge(numCurrentEdge),
|
47
|
+
scoreEdge(numCurrentEdge),
|
48
|
+
numCurrentSource(numRow, numColumn),
|
49
|
+
numTotalSource(numCurrentSource),
|
50
|
+
scoreSource(numCurrentSource),
|
51
|
+
numCurrentDestination(numRow, numColumn),
|
52
|
+
numTotalDestination(numCurrentDestination),
|
53
|
+
scoreDestination(numCurrentDestination),
|
54
|
+
shouldMerge(new bool[numRow * numColumn]) { }
|
55
|
+
|
56
|
+
virtual ~FilteringCore() {
|
57
|
+
delete[] indexEdge;
|
58
|
+
delete[] indexSource;
|
59
|
+
delete[] indexDestination;
|
60
|
+
delete[] shouldMerge;
|
61
|
+
}
|
62
|
+
|
63
|
+
static float ComputeScore(float a, float s, float t) {
|
64
|
+
return s == 0 ? 0 : pow(a + s - a * t, 2) / (s * (t - 1)); // If t == 1, then s == 0, so no need to check twice
|
65
|
+
}
|
66
|
+
|
67
|
+
void ConditionalMerge(const float* current, float* total, const float* score) const {
|
68
|
+
for (int i = 0; i < lenData; i++)
|
69
|
+
shouldMerge[i] = score[i] < threshold;
|
70
|
+
for (int i = 0, I = lenData; i < I; i++) // Vectorization
|
71
|
+
total[i] += shouldMerge[i] * current[i] + (true - shouldMerge[i]) * total[i] * timestampReciprocal;
|
72
|
+
}
|
73
|
+
|
74
|
+
float operator()(int source, int destination, int timestamp) {
|
75
|
+
if (this->timestamp < timestamp) {
|
76
|
+
ConditionalMerge(numCurrentEdge.data, numTotalEdge.data, scoreEdge.data);
|
77
|
+
ConditionalMerge(numCurrentSource.data, numTotalSource.data, scoreSource.data);
|
78
|
+
ConditionalMerge(numCurrentDestination.data, numTotalDestination.data, scoreDestination.data);
|
79
|
+
numCurrentEdge.MultiplyAll(factor);
|
80
|
+
numCurrentSource.MultiplyAll(factor);
|
81
|
+
numCurrentDestination.MultiplyAll(factor);
|
82
|
+
timestampReciprocal = 1.f / (timestamp - 1); // So I can skip an if-statement
|
83
|
+
this->timestamp = timestamp;
|
84
|
+
}
|
85
|
+
numCurrentEdge.Hash(indexEdge, source, destination);
|
86
|
+
numCurrentEdge.Add(indexEdge);
|
87
|
+
numCurrentSource.Hash(indexSource, source);
|
88
|
+
numCurrentSource.Add(indexSource);
|
89
|
+
numCurrentDestination.Hash(indexDestination, destination);
|
90
|
+
numCurrentDestination.Add(indexDestination);
|
91
|
+
return std::max({
|
92
|
+
scoreEdge.Assign(indexEdge, ComputeScore(numCurrentEdge(indexEdge), numTotalEdge(indexEdge), timestamp)),
|
93
|
+
scoreSource.Assign(indexSource, ComputeScore(numCurrentSource(indexSource), numTotalSource(indexSource), timestamp)),
|
94
|
+
scoreDestination.Assign(indexDestination, ComputeScore(numCurrentDestination(indexDestination), numTotalDestination(indexDestination), timestamp)),
|
95
|
+
});
|
96
|
+
}
|
97
|
+
};
|
98
|
+
}
|