midas-edge 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/NOTICE.txt +2 -1
- data/README.md +8 -7
- data/ext/midas/ext.cpp +45 -32
- data/ext/midas/extconf.rb +1 -5
- data/lib/midas/detector.rb +7 -4
- data/lib/midas/version.rb +1 -1
- data/vendor/MIDAS/LICENSE +0 -25
- data/vendor/MIDAS/README.md +185 -40
- data/vendor/MIDAS/src/CountMinSketch.hpp +105 -0
- data/vendor/MIDAS/src/FilteringCore.hpp +98 -0
- data/vendor/MIDAS/src/NormalCore.hpp +53 -0
- data/vendor/MIDAS/src/RelationalCore.hpp +79 -0
- metadata +15 -75
- data/vendor/MIDAS/anom.cpp +0 -88
- data/vendor/MIDAS/anom.hpp +0 -10
- data/vendor/MIDAS/argparse.hpp +0 -539
- data/vendor/MIDAS/edgehash.cpp +0 -63
- data/vendor/MIDAS/edgehash.hpp +0 -25
- data/vendor/MIDAS/main.cpp +0 -127
- data/vendor/MIDAS/nodehash.cpp +0 -63
- data/vendor/MIDAS/nodehash.hpp +0 -25
data/vendor/MIDAS/edgehash.cpp
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
|
2
|
-
#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
|
3
|
-
|
4
|
-
#include <iostream>
|
5
|
-
#include "edgehash.hpp"
|
6
|
-
|
7
|
-
Edgehash::Edgehash(int r, int b, int m0)
|
8
|
-
{
|
9
|
-
num_rows = r;
|
10
|
-
num_buckets = b;
|
11
|
-
m = m0;
|
12
|
-
hash_a.resize(num_rows);
|
13
|
-
hash_b.resize(num_rows);
|
14
|
-
for (int i = 0; i < r; i++) {
|
15
|
-
// a is in [1, p-1]; b is in [0, p-1]
|
16
|
-
hash_a[i] = rand() % (num_buckets - 1) + 1;
|
17
|
-
hash_b[i] = rand() % num_buckets;
|
18
|
-
}
|
19
|
-
this->clear();
|
20
|
-
}
|
21
|
-
|
22
|
-
Edgehash::~Edgehash()
|
23
|
-
{
|
24
|
-
}
|
25
|
-
|
26
|
-
int Edgehash::hash(int a, int b, int i)
|
27
|
-
{
|
28
|
-
int resid = ((a + m * b) * hash_a[i] + hash_b[i]) % num_buckets;
|
29
|
-
return resid + (resid < 0 ? num_buckets : 0);
|
30
|
-
}
|
31
|
-
|
32
|
-
void Edgehash::insert(int a, int b, double weight)
|
33
|
-
{
|
34
|
-
for (int i = 0; i < num_rows; i++) {
|
35
|
-
int bucket = hash(a, b, i);
|
36
|
-
count[i][bucket] += weight;
|
37
|
-
}
|
38
|
-
}
|
39
|
-
|
40
|
-
double Edgehash::get_count(int a, int b)
|
41
|
-
{
|
42
|
-
double min_count = numeric_limits<double>::max();
|
43
|
-
int bucket;
|
44
|
-
for (int i = 0; i < num_rows; i++) {
|
45
|
-
bucket = hash(a, b, i);
|
46
|
-
min_count = MIN(min_count, count[i][bucket]);
|
47
|
-
}
|
48
|
-
return min_count;
|
49
|
-
}
|
50
|
-
|
51
|
-
void Edgehash::clear()
|
52
|
-
{
|
53
|
-
count = vector<vector<double> >(num_rows, vector<double>(num_buckets, 0.0));
|
54
|
-
}
|
55
|
-
|
56
|
-
void Edgehash::lower(double factor)
|
57
|
-
{
|
58
|
-
for (int i = 0; i < num_rows; i++) {
|
59
|
-
for (int j = 0; j < num_buckets; j++) {
|
60
|
-
count[i][j] = count[i][j] * factor;
|
61
|
-
}
|
62
|
-
}
|
63
|
-
}
|
data/vendor/MIDAS/edgehash.hpp
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
#ifndef edgehash_hpp
|
2
|
-
#define edgehash_hpp
|
3
|
-
|
4
|
-
#include <vector>
|
5
|
-
using namespace std;
|
6
|
-
|
7
|
-
class Edgehash {
|
8
|
-
public:
|
9
|
-
Edgehash(int r, int b, int m0);
|
10
|
-
~Edgehash();
|
11
|
-
void insert(int a, int b, double weight);
|
12
|
-
double get_count(int a, int b);
|
13
|
-
void clear();
|
14
|
-
void lower(double factor);
|
15
|
-
int num_rows;
|
16
|
-
int num_buckets;
|
17
|
-
int m;
|
18
|
-
|
19
|
-
private:
|
20
|
-
vector<int> hash_a, hash_b;
|
21
|
-
vector<std::vector<double> > count;
|
22
|
-
int hash(int a, int b, int i);
|
23
|
-
};
|
24
|
-
|
25
|
-
#endif /* edgehash_hpp */
|
data/vendor/MIDAS/main.cpp
DELETED
@@ -1,127 +0,0 @@
|
|
1
|
-
#include <iostream>
|
2
|
-
#include <vector>
|
3
|
-
#include "anom.hpp"
|
4
|
-
#include "argparse.hpp"
|
5
|
-
using namespace std;
|
6
|
-
|
7
|
-
void load_data(vector<int>& src, vector<int>& dst, vector<int>& times, string input_file, bool undirected)
|
8
|
-
{
|
9
|
-
FILE* infile = fopen(input_file.c_str(), "r");
|
10
|
-
if (infile == NULL) {
|
11
|
-
cerr << "Could not read file " << input_file << "\n";
|
12
|
-
exit(0);
|
13
|
-
}
|
14
|
-
|
15
|
-
int s, d, t;
|
16
|
-
|
17
|
-
if (undirected == false) {
|
18
|
-
while (fscanf(infile, "%d,%d,%d", &s, &d, &t) == 3) {
|
19
|
-
src.push_back(s);
|
20
|
-
dst.push_back(d);
|
21
|
-
times.push_back(t);
|
22
|
-
}
|
23
|
-
return;
|
24
|
-
}
|
25
|
-
else {
|
26
|
-
while (fscanf(infile, "%d:%d:%d", &s, &d, &t) == 3) {
|
27
|
-
src.push_back(s);
|
28
|
-
dst.push_back(d);
|
29
|
-
times.push_back(t);
|
30
|
-
src.push_back(d);
|
31
|
-
dst.push_back(s);
|
32
|
-
times.push_back(t);
|
33
|
-
}
|
34
|
-
return;
|
35
|
-
}
|
36
|
-
}
|
37
|
-
|
38
|
-
int main(int argc, const char* argv[])
|
39
|
-
{
|
40
|
-
argparse::ArgumentParser program("midas");
|
41
|
-
program.add_argument("-i", "--input")
|
42
|
-
.required()
|
43
|
-
.help("Input File ");
|
44
|
-
program.add_argument("-o", "--output")
|
45
|
-
.default_value(string("scores.txt"))
|
46
|
-
.help("Output File. Default is scores.txt");
|
47
|
-
program.add_argument("-r", "--rows")
|
48
|
-
.default_value(2)
|
49
|
-
.action([](const std::string& value) { return std::stoi(value); })
|
50
|
-
.help("Number of rows/hash functions. Default is 2");
|
51
|
-
program.add_argument("-b", "--buckets")
|
52
|
-
.default_value(769)
|
53
|
-
.action([](const std::string& value) { return std::stoi(value); })
|
54
|
-
.help("Number of buckets. Default is 769");
|
55
|
-
program.add_argument("-a", "--alpha")
|
56
|
-
.default_value(0.6)
|
57
|
-
.action([](const std::string& value) { return std::stod(value); })
|
58
|
-
.help("Alpha: Temporal Decay Factor. Default is 0.6");
|
59
|
-
program.add_argument("--norelations")
|
60
|
-
.default_value(false)
|
61
|
-
.implicit_value(true)
|
62
|
-
.help("To run Midas instead of Midas-R.");
|
63
|
-
program.add_argument("--undirected")
|
64
|
-
.default_value(false)
|
65
|
-
.implicit_value(true)
|
66
|
-
.help("If graph is undirected.");
|
67
|
-
try {
|
68
|
-
program.parse_args(argc, argv);
|
69
|
-
}
|
70
|
-
catch (const std::runtime_error& err) {
|
71
|
-
std::cout << err.what() << std::endl;
|
72
|
-
program.print_help();
|
73
|
-
exit(0);
|
74
|
-
}
|
75
|
-
|
76
|
-
string input_file = program.get<string>("-i");
|
77
|
-
string output_file = program.get<string>("-o");
|
78
|
-
int rows = program.get<int>("-r");
|
79
|
-
int buckets = program.get<int>("-b");
|
80
|
-
double alpha = program.get<double>("-a");
|
81
|
-
|
82
|
-
if (rows < 1) {
|
83
|
-
cerr << "Number of hash functions should be positive." << endl;
|
84
|
-
exit(0);
|
85
|
-
}
|
86
|
-
|
87
|
-
if (buckets < 2) {
|
88
|
-
cerr << "Number of buckets should be atleast 2" << endl;
|
89
|
-
exit(0);
|
90
|
-
}
|
91
|
-
|
92
|
-
if (alpha <= 0 || alpha >= 1) {
|
93
|
-
cerr << "Alpha: Temporal Decay Factor must be between 0 and 1." << endl;
|
94
|
-
exit(0);
|
95
|
-
}
|
96
|
-
|
97
|
-
vector<int> src, dst, times;
|
98
|
-
if (program["--undirected"] == true)
|
99
|
-
load_data(src, dst, times, input_file, true);
|
100
|
-
else
|
101
|
-
load_data(src, dst, times, input_file, false);
|
102
|
-
cout << "Finished Loading Data from " << input_file << endl;
|
103
|
-
|
104
|
-
if (program["--norelations"] == true) {
|
105
|
-
clock_t start_time1 = clock();
|
106
|
-
vector<double>* scores = midas(src, dst, times, rows, buckets);
|
107
|
-
cout << "Time taken: " << ((double)(clock() - start_time1)) / CLOCKS_PER_SEC << " s" << endl;
|
108
|
-
|
109
|
-
cout << "Writing Anomaly Scores to " << output_file << endl;
|
110
|
-
FILE* outfile = fopen(output_file.c_str(), "w");
|
111
|
-
for (int i = 0; i < scores->size(); i++) {
|
112
|
-
fprintf(outfile, "%f\n", scores->at(i));
|
113
|
-
}
|
114
|
-
}
|
115
|
-
else {
|
116
|
-
clock_t start_time2 = clock();
|
117
|
-
vector<double>* scores2 = midasR(src, dst, times, rows, buckets, alpha);
|
118
|
-
cout << "Time taken: " << ((double)(clock() - start_time2)) / CLOCKS_PER_SEC << " s" << endl;
|
119
|
-
cout << "Writing Anomaly Scores to " << output_file << endl;
|
120
|
-
FILE* outfile2 = fopen(output_file.c_str(), "w");
|
121
|
-
for (int i = 0; i < scores2->size(); i++) {
|
122
|
-
fprintf(outfile2, "%f\n", scores2->at(i));
|
123
|
-
}
|
124
|
-
}
|
125
|
-
|
126
|
-
return 0;
|
127
|
-
}
|
data/vendor/MIDAS/nodehash.cpp
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
|
2
|
-
#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
|
3
|
-
|
4
|
-
#include <iostream>
|
5
|
-
#include "nodehash.hpp"
|
6
|
-
|
7
|
-
Nodehash::Nodehash(int r, int b)
|
8
|
-
{
|
9
|
-
num_rows = r;
|
10
|
-
num_buckets = b;
|
11
|
-
hash_a.resize(num_rows);
|
12
|
-
hash_b.resize(num_rows);
|
13
|
-
for (int i = 0; i < r; i++) {
|
14
|
-
// a is in [1, p-1]; b is in [0, p-1]
|
15
|
-
hash_a[i] = rand() % (num_buckets - 1) + 1;
|
16
|
-
hash_b[i] = rand() % num_buckets;
|
17
|
-
}
|
18
|
-
this->clear();
|
19
|
-
}
|
20
|
-
|
21
|
-
Nodehash::~Nodehash()
|
22
|
-
{
|
23
|
-
}
|
24
|
-
|
25
|
-
int Nodehash::hash(int a, int i)
|
26
|
-
{
|
27
|
-
int resid = (a * hash_a[i] + hash_b[i]) % num_buckets;
|
28
|
-
return resid + (resid < 0 ? num_buckets : 0);
|
29
|
-
}
|
30
|
-
|
31
|
-
void Nodehash::insert(int a, double weight)
|
32
|
-
{
|
33
|
-
int bucket;
|
34
|
-
for (int i = 0; i < num_rows; i++) {
|
35
|
-
bucket = hash(a, i);
|
36
|
-
count[i][bucket] += weight;
|
37
|
-
}
|
38
|
-
}
|
39
|
-
|
40
|
-
double Nodehash::get_count(int a)
|
41
|
-
{
|
42
|
-
double min_count = numeric_limits<double>::max();
|
43
|
-
int bucket;
|
44
|
-
for (int i = 0; i < num_rows; i++) {
|
45
|
-
bucket = hash(a, i);
|
46
|
-
min_count = MIN(min_count, count[i][bucket]);
|
47
|
-
}
|
48
|
-
return min_count;
|
49
|
-
}
|
50
|
-
|
51
|
-
void Nodehash::clear()
|
52
|
-
{
|
53
|
-
count = vector<vector<double> >(num_rows, vector<double>(num_buckets, 0.0));
|
54
|
-
}
|
55
|
-
|
56
|
-
void Nodehash::lower(double factor)
|
57
|
-
{
|
58
|
-
for (int i = 0; i < num_rows; i++) {
|
59
|
-
for (int j = 0; j < num_buckets; j++) {
|
60
|
-
count[i][j] = count[i][j] * factor;
|
61
|
-
}
|
62
|
-
}
|
63
|
-
}
|
data/vendor/MIDAS/nodehash.hpp
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
#ifndef nodehash_hpp
|
2
|
-
#define nodehash_hpp
|
3
|
-
|
4
|
-
#include <vector>
|
5
|
-
using namespace std;
|
6
|
-
|
7
|
-
class Nodehash {
|
8
|
-
public:
|
9
|
-
Nodehash(int r, int b);
|
10
|
-
~Nodehash();
|
11
|
-
void insert(int a, double weight);
|
12
|
-
double get_count(int a);
|
13
|
-
void clear();
|
14
|
-
void lower(double factor);
|
15
|
-
int num_rows;
|
16
|
-
int num_buckets;
|
17
|
-
int m;
|
18
|
-
|
19
|
-
private:
|
20
|
-
vector<int> hash_a, hash_b;
|
21
|
-
vector<vector<double> > count;
|
22
|
-
int hash(int a, int i);
|
23
|
-
};
|
24
|
-
|
25
|
-
#endif /* nodehash_hpp */
|