midas-edge 0.1.0 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,105 @@
1
+ // -----------------------------------------------------------------------------
2
+ // Copyright 2020 Rui Liu (liurui39660) and Siddharth Bhatia (bhatiasiddharth)
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ // -----------------------------------------------------------------------------
16
+
17
+ #pragma once
18
+
19
+ #include <algorithm>
20
+
21
+ namespace MIDAS {
22
+ struct CountMinSketch {
23
+ // Fields
24
+ // --------------------------------------------------------------------------------
25
+
26
+ const int r, c, m = 104729; // Yes, a magic number, I just pick a random prime
27
+ const int lenData;
28
+ int* const param1;
29
+ int* const param2;
30
+ float* const data;
31
+ constexpr static float infinity = std::numeric_limits<float>::infinity();
32
+
33
+ // Methods
34
+ // --------------------------------------------------------------------------------
35
+
36
+ CountMinSketch() = delete;
37
+ CountMinSketch& operator=(const CountMinSketch& b) = delete;
38
+
39
+ CountMinSketch(int numRow, int numColumn):
40
+ r(numRow),
41
+ c(numColumn),
42
+ lenData(r * c),
43
+ param1(new int[r]),
44
+ param2(new int[r]),
45
+ data(new float[lenData]) {
46
+ for (int i = 0; i < r; i++) {
47
+ param1[i] = rand() + 1; // ×0 is not a good idea, see Hash()
48
+ param2[i] = rand();
49
+ }
50
+ std::fill(data, data + lenData, 0);
51
+ }
52
+
53
+ CountMinSketch(const CountMinSketch& b):
54
+ r(b.r),
55
+ c(b.c),
56
+ lenData(b.lenData),
57
+ param1(new int[r]),
58
+ param2(new int[r]),
59
+ data(new float[lenData]) {
60
+ std::copy(b.param1, b.param1 + r, param1);
61
+ std::copy(b.param2, b.param2 + r, param2);
62
+ std::copy(b.data, b.data + lenData, data);
63
+ }
64
+
65
+ ~CountMinSketch() {
66
+ delete[] param1;
67
+ delete[] param2;
68
+ delete[] data;
69
+ }
70
+
71
+ void ClearAll(float with = 0) const {
72
+ std::fill(data, data + lenData, with);
73
+ }
74
+
75
+ void MultiplyAll(float by) const {
76
+ for (int i = 0, I = lenData; i < I; i++) // Vectorization
77
+ data[i] *= by;
78
+ }
79
+
80
+ void Hash(int* indexOut, int a, int b = 0) const {
81
+ for (int i = 0; i < r; i++) {
82
+ indexOut[i] = ((a + m * b) * param1[i] + param2[i]) % c;
83
+ indexOut[i] += i * c + (indexOut[i] < 0 ? c : 0);
84
+ }
85
+ }
86
+
87
+ float operator()(const int* index) const {
88
+ float least = infinity;
89
+ for (int i = 0; i < r; i++)
90
+ least = std::min(least, data[index[i]]);
91
+ return least;
92
+ }
93
+
94
+ float Assign(const int* index, float with) const {
95
+ for (int i = 0; i < r; i++)
96
+ data[index[i]] = with;
97
+ return with;
98
+ }
99
+
100
+ void Add(const int* index, float by = 1) const {
101
+ for (int i = 0; i < r; i++)
102
+ data[index[i]] += by;
103
+ }
104
+ };
105
+ }
@@ -0,0 +1,98 @@
1
+ // -----------------------------------------------------------------------------
2
+ // Copyright 2020 Rui Liu (liurui39660) and Siddharth Bhatia (bhatiasiddharth)
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ // -----------------------------------------------------------------------------
16
+
17
+ #pragma once
18
+
19
+ #include <cmath>
20
+
21
+ #include "CountMinSketch.hpp"
22
+
23
+ namespace MIDAS {
24
+ struct FilteringCore {
25
+ const float threshold;
26
+ int timestamp = 1;
27
+ const float factor;
28
+ const int lenData;
29
+ int* const indexEdge; // Pre-compute the index to-be-modified, thanks to the Same-Layout Assumption
30
+ int* const indexSource;
31
+ int* const indexDestination;
32
+ CountMinSketch numCurrentEdge, numTotalEdge, scoreEdge;
33
+ CountMinSketch numCurrentSource, numTotalSource, scoreSource;
34
+ CountMinSketch numCurrentDestination, numTotalDestination, scoreDestination;
35
+ float timestampReciprocal = 0;
36
+ bool* const shouldMerge;
37
+
38
+ FilteringCore(int numRow, int numColumn, float threshold, float factor = 0.5):
39
+ threshold(threshold),
40
+ factor(factor),
41
+ lenData(numRow * numColumn), // I assume all CMSs have same size, but Same-Layout Assumption is not that strict
42
+ indexEdge(new int[numRow]),
43
+ indexSource(new int[numRow]),
44
+ indexDestination(new int[numRow]),
45
+ numCurrentEdge(numRow, numColumn),
46
+ numTotalEdge(numCurrentEdge),
47
+ scoreEdge(numCurrentEdge),
48
+ numCurrentSource(numRow, numColumn),
49
+ numTotalSource(numCurrentSource),
50
+ scoreSource(numCurrentSource),
51
+ numCurrentDestination(numRow, numColumn),
52
+ numTotalDestination(numCurrentDestination),
53
+ scoreDestination(numCurrentDestination),
54
+ shouldMerge(new bool[numRow * numColumn]) { }
55
+
56
+ virtual ~FilteringCore() {
57
+ delete[] indexEdge;
58
+ delete[] indexSource;
59
+ delete[] indexDestination;
60
+ delete[] shouldMerge;
61
+ }
62
+
63
+ static float ComputeScore(float a, float s, float t) {
64
+ return s == 0 ? 0 : pow(a + s - a * t, 2) / (s * (t - 1)); // If t == 1, then s == 0, so no need to check twice
65
+ }
66
+
67
+ void ConditionalMerge(const float* current, float* total, const float* score) const {
68
+ for (int i = 0; i < lenData; i++)
69
+ shouldMerge[i] = score[i] < threshold;
70
+ for (int i = 0, I = lenData; i < I; i++) // Vectorization
71
+ total[i] += shouldMerge[i] * current[i] + (true - shouldMerge[i]) * total[i] * timestampReciprocal;
72
+ }
73
+
74
+ float operator()(int source, int destination, int timestamp) {
75
+ if (this->timestamp < timestamp) {
76
+ ConditionalMerge(numCurrentEdge.data, numTotalEdge.data, scoreEdge.data);
77
+ ConditionalMerge(numCurrentSource.data, numTotalSource.data, scoreSource.data);
78
+ ConditionalMerge(numCurrentDestination.data, numTotalDestination.data, scoreDestination.data);
79
+ numCurrentEdge.MultiplyAll(factor);
80
+ numCurrentSource.MultiplyAll(factor);
81
+ numCurrentDestination.MultiplyAll(factor);
82
+ timestampReciprocal = 1.f / (timestamp - 1); // So I can skip an if-statement
83
+ this->timestamp = timestamp;
84
+ }
85
+ numCurrentEdge.Hash(indexEdge, source, destination);
86
+ numCurrentEdge.Add(indexEdge);
87
+ numCurrentSource.Hash(indexSource, source);
88
+ numCurrentSource.Add(indexSource);
89
+ numCurrentDestination.Hash(indexDestination, destination);
90
+ numCurrentDestination.Add(indexDestination);
91
+ return std::max({
92
+ scoreEdge.Assign(indexEdge, ComputeScore(numCurrentEdge(indexEdge), numTotalEdge(indexEdge), timestamp)),
93
+ scoreSource.Assign(indexSource, ComputeScore(numCurrentSource(indexSource), numTotalSource(indexSource), timestamp)),
94
+ scoreDestination.Assign(indexDestination, ComputeScore(numCurrentDestination(indexDestination), numTotalDestination(indexDestination), timestamp)),
95
+ });
96
+ }
97
+ };
98
+ }
@@ -0,0 +1,53 @@
1
+ // -----------------------------------------------------------------------------
2
+ // Copyright 2020 Rui Liu (liurui39660) and Siddharth Bhatia (bhatiasiddharth)
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ // -----------------------------------------------------------------------------
16
+
17
+ #pragma once
18
+
19
+ #include <cmath>
20
+
21
+ #include "CountMinSketch.hpp"
22
+
23
+ namespace MIDAS {
24
+ struct NormalCore {
25
+ int timestamp = 1;
26
+ int* const index; // Pre-compute the index to-be-modified, thanks to the same structure of CMSs
27
+ CountMinSketch numCurrent, numTotal;
28
+
29
+ NormalCore(int numRow, int numColumn):
30
+ index(new int[numRow]),
31
+ numCurrent(numRow, numColumn),
32
+ numTotal(numCurrent) { }
33
+
34
+ virtual ~NormalCore() {
35
+ delete[] index;
36
+ }
37
+
38
+ static float ComputeScore(float a, float s, float t) {
39
+ return s == 0 || t - 1 == 0 ? 0 : pow((a - s / t) * t, 2) / (s * (t - 1));
40
+ }
41
+
42
+ float operator()(int source, int destination, int timestamp) {
43
+ if (this->timestamp < timestamp) {
44
+ numCurrent.ClearAll();
45
+ this->timestamp = timestamp;
46
+ }
47
+ numCurrent.Hash(index, source, destination);
48
+ numCurrent.Add(index);
49
+ numTotal.Add(index);
50
+ return ComputeScore(numCurrent(index), numTotal(index), timestamp);
51
+ }
52
+ };
53
+ }
@@ -0,0 +1,79 @@
1
+ // -----------------------------------------------------------------------------
2
+ // Copyright 2020 Rui Liu (liurui39660) and Siddharth Bhatia (bhatiasiddharth)
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ // -----------------------------------------------------------------------------
16
+
17
+ #pragma once
18
+
19
+ #include <cmath>
20
+
21
+ #include "CountMinSketch.hpp"
22
+
23
+ namespace MIDAS {
24
+ struct RelationalCore {
25
+ int timestamp = 1;
26
+ const float factor;
27
+ int* const indexEdge; // Pre-compute the index to-be-modified, thanks to the same structure of CMSs
28
+ int* const indexSource;
29
+ int* const indexDestination;
30
+ CountMinSketch numCurrentEdge, numTotalEdge;
31
+ CountMinSketch numCurrentSource, numTotalSource;
32
+ CountMinSketch numCurrentDestination, numTotalDestination;
33
+
34
+ RelationalCore(int numRow, int numColumn, float factor = 0.5):
35
+ factor(factor),
36
+ indexEdge(new int[numRow]),
37
+ indexSource(new int[numRow]),
38
+ indexDestination(new int[numRow]),
39
+ numCurrentEdge(numRow, numColumn),
40
+ numTotalEdge(numCurrentEdge),
41
+ numCurrentSource(numRow, numColumn),
42
+ numTotalSource(numCurrentSource),
43
+ numCurrentDestination(numRow, numColumn),
44
+ numTotalDestination(numCurrentDestination) { }
45
+
46
+ virtual ~RelationalCore() {
47
+ delete[] indexEdge;
48
+ delete[] indexSource;
49
+ delete[] indexDestination;
50
+ }
51
+
52
+ static float ComputeScore(float a, float s, float t) {
53
+ return s == 0 || t - 1 == 0 ? 0 : pow((a - s / t) * t, 2) / (s * (t - 1));
54
+ }
55
+
56
+ float operator()(int source, int destination, int timestamp) {
57
+ if (this->timestamp < timestamp) {
58
+ numCurrentEdge.MultiplyAll(factor);
59
+ numCurrentSource.MultiplyAll(factor);
60
+ numCurrentDestination.MultiplyAll(factor);
61
+ this->timestamp = timestamp;
62
+ }
63
+ numCurrentEdge.Hash(indexEdge, source, destination);
64
+ numCurrentEdge.Add(indexEdge);
65
+ numTotalEdge.Add(indexEdge);
66
+ numCurrentSource.Hash(indexSource, source);
67
+ numCurrentSource.Add(indexSource);
68
+ numTotalSource.Add(indexSource);
69
+ numCurrentDestination.Hash(indexDestination, destination);
70
+ numCurrentDestination.Add(indexDestination);
71
+ numTotalDestination.Add(indexDestination);
72
+ return std::max({
73
+ ComputeScore(numCurrentEdge(indexEdge), numTotalEdge(indexEdge), timestamp),
74
+ ComputeScore(numCurrentSource(indexSource), numTotalSource(indexSource), timestamp),
75
+ ComputeScore(numCurrentDestination(indexDestination), numTotalDestination(indexDestination), timestamp),
76
+ });
77
+ }
78
+ };
79
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: midas-edge
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-02-18 00:00:00.000000000 Z
11
+ date: 2020-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -94,7 +94,7 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '5'
97
- description:
97
+ description:
98
98
  email: andrew@chartkick.com
99
99
  executables: []
100
100
  extensions:
@@ -109,23 +109,18 @@ files:
109
109
  - ext/midas/extconf.rb
110
110
  - lib/midas-edge.rb
111
111
  - lib/midas/detector.rb
112
- - lib/midas/ext.bundle
113
112
  - lib/midas/version.rb
114
113
  - vendor/MIDAS/LICENSE
115
114
  - vendor/MIDAS/README.md
116
- - vendor/MIDAS/anom.cpp
117
- - vendor/MIDAS/anom.hpp
118
- - vendor/MIDAS/argparse.hpp
119
- - vendor/MIDAS/edgehash.cpp
120
- - vendor/MIDAS/edgehash.hpp
121
- - vendor/MIDAS/main.cpp
122
- - vendor/MIDAS/nodehash.cpp
123
- - vendor/MIDAS/nodehash.hpp
115
+ - vendor/MIDAS/src/CountMinSketch.hpp
116
+ - vendor/MIDAS/src/FilteringCore.hpp
117
+ - vendor/MIDAS/src/NormalCore.hpp
118
+ - vendor/MIDAS/src/RelationalCore.hpp
124
119
  homepage: https://github.com/ankane/midas
125
120
  licenses:
126
121
  - MIT
127
122
  metadata: {}
128
- post_install_message:
123
+ post_install_message:
129
124
  rdoc_options: []
130
125
  require_paths:
131
126
  - lib
@@ -140,8 +135,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
135
  - !ruby/object:Gem::Version
141
136
  version: '0'
142
137
  requirements: []
143
- rubygems_version: 3.1.2
144
- signing_key:
138
+ rubygems_version: 3.1.4
139
+ signing_key:
145
140
  specification_version: 4
146
141
  summary: Edge stream anomaly detection for Ruby
147
142
  test_files: []
Binary file
@@ -1,88 +0,0 @@
1
- #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
2
- #define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
3
-
4
- #include <iostream>
5
- #include <math.h>
6
- #include <algorithm>
7
- #include <vector>
8
- #include "anom.hpp"
9
- #include "edgehash.hpp"
10
- #include "nodehash.hpp"
11
-
12
- vector<double>* midas(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets)
13
- {
14
- int m = *max_element(src.begin(), src.end());
15
- Edgehash cur_count(num_rows, num_buckets, m);
16
- Edgehash total_count(num_rows, num_buckets, m);
17
- vector<double>* anom_score = new vector<double>(src.size());
18
- int cur_t = 1, size = src.size(), cur_src, cur_dst;
19
- double cur_mean, sqerr, cur_score;
20
- for (int i = 0; i < size; i++) {
21
-
22
- if (i == 0 || times[i] > cur_t) {
23
- cur_count.clear();
24
- cur_t = times[i];
25
- }
26
-
27
- cur_src = src[i];
28
- cur_dst = dst[i];
29
- cur_count.insert(cur_src, cur_dst, 1);
30
- total_count.insert(cur_src, cur_dst, 1);
31
- cur_mean = total_count.get_count(cur_src, cur_dst) / cur_t;
32
- sqerr = pow(cur_count.get_count(cur_src, cur_dst) - cur_mean, 2);
33
- if (cur_t == 1) cur_score = 0;
34
- else cur_score = sqerr / cur_mean + sqerr / (cur_mean * (cur_t - 1));
35
- (*anom_score)[i] = cur_score;
36
- }
37
-
38
- return anom_score;
39
- }
40
-
41
- double counts_to_anom(double tot, double cur, int cur_t)
42
- {
43
- double cur_mean = tot / cur_t;
44
- double sqerr = pow(MAX(0, cur - cur_mean), 2);
45
- return sqerr / cur_mean + sqerr / (cur_mean * MAX(1, cur_t - 1));
46
- }
47
-
48
- vector<double>* midasR(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets, double factor)
49
- {
50
- int m = *max_element(src.begin(), src.end());
51
- Edgehash cur_count(num_rows, num_buckets, m);
52
- Edgehash total_count(num_rows, num_buckets, m);
53
- Nodehash src_score(num_rows, num_buckets);
54
- Nodehash dst_score(num_rows, num_buckets);
55
- Nodehash src_total(num_rows, num_buckets);
56
- Nodehash dst_total(num_rows, num_buckets);
57
- vector<double>* anom_score = new vector<double>(src.size());
58
- int cur_t = 1, size = src.size(), cur_src, cur_dst;
59
- double cur_score, cur_score_src, cur_score_dst, combined_score;
60
-
61
- for (int i = 0; i < size; i++) {
62
-
63
- if (i == 0 || times[i] > cur_t) {
64
- cur_count.lower(factor);
65
- src_score.lower(factor);
66
- dst_score.lower(factor);
67
- cur_t = times[i];
68
- }
69
-
70
- cur_src = src[i];
71
- cur_dst = dst[i];
72
- cur_count.insert(cur_src, cur_dst, 1);
73
- total_count.insert(cur_src, cur_dst, 1);
74
- src_score.insert(cur_src, 1);
75
- dst_score.insert(cur_dst, 1);
76
- src_total.insert(cur_src, 1);
77
- dst_total.insert(cur_dst, 1);
78
- cur_score = counts_to_anom(total_count.get_count(cur_src, cur_dst), cur_count.get_count(cur_src, cur_dst), cur_t);
79
- cur_score_src = counts_to_anom(src_total.get_count(cur_src), src_score.get_count(cur_src), cur_t);
80
- cur_score_dst = counts_to_anom(dst_total.get_count(cur_dst), dst_score.get_count(cur_dst), cur_t);
81
- //combined_score = MAX(cur_score_src, cur_score_dst) + cur_score;
82
- //combined_score = cur_score_src + cur_score_dst + cur_score;
83
- combined_score = MAX(MAX(cur_score_src, cur_score_dst), cur_score);
84
- (*anom_score)[i] = log(1 + combined_score);
85
- }
86
-
87
- return anom_score;
88
- }