midas-edge 0.1.0 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/NOTICE.txt +1 -0
- data/README.md +10 -7
- data/ext/midas/ext.cpp +32 -16
- data/ext/midas/extconf.rb +2 -8
- data/lib/midas/detector.rb +7 -4
- data/lib/midas/version.rb +1 -1
- data/vendor/MIDAS/LICENSE +0 -25
- data/vendor/MIDAS/README.md +185 -40
- data/vendor/MIDAS/src/CountMinSketch.hpp +105 -0
- data/vendor/MIDAS/src/FilteringCore.hpp +98 -0
- data/vendor/MIDAS/src/NormalCore.hpp +53 -0
- data/vendor/MIDAS/src/RelationalCore.hpp +79 -0
- metadata +11 -16
- data/lib/midas/ext.bundle +0 -0
- data/vendor/MIDAS/anom.cpp +0 -88
- data/vendor/MIDAS/anom.hpp +0 -10
- data/vendor/MIDAS/argparse.hpp +0 -539
- data/vendor/MIDAS/edgehash.cpp +0 -63
- data/vendor/MIDAS/edgehash.hpp +0 -25
- data/vendor/MIDAS/main.cpp +0 -127
- data/vendor/MIDAS/nodehash.cpp +0 -63
- data/vendor/MIDAS/nodehash.hpp +0 -25
data/vendor/MIDAS/edgehash.cpp
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
|
2
|
-
#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
|
3
|
-
|
4
|
-
#include <iostream>
|
5
|
-
#include "edgehash.hpp"
|
6
|
-
|
7
|
-
Edgehash::Edgehash(int r, int b, int m0)
|
8
|
-
{
|
9
|
-
num_rows = r;
|
10
|
-
num_buckets = b;
|
11
|
-
m = m0;
|
12
|
-
hash_a.resize(num_rows);
|
13
|
-
hash_b.resize(num_rows);
|
14
|
-
for (int i = 0; i < r; i++) {
|
15
|
-
// a is in [1, p-1]; b is in [0, p-1]
|
16
|
-
hash_a[i] = rand() % (num_buckets - 1) + 1;
|
17
|
-
hash_b[i] = rand() % num_buckets;
|
18
|
-
}
|
19
|
-
this->clear();
|
20
|
-
}
|
21
|
-
|
22
|
-
Edgehash::~Edgehash()
|
23
|
-
{
|
24
|
-
}
|
25
|
-
|
26
|
-
int Edgehash::hash(int a, int b, int i)
|
27
|
-
{
|
28
|
-
int resid = ((a + m * b) * hash_a[i] + hash_b[i]) % num_buckets;
|
29
|
-
return resid + (resid < 0 ? num_buckets : 0);
|
30
|
-
}
|
31
|
-
|
32
|
-
void Edgehash::insert(int a, int b, double weight)
|
33
|
-
{
|
34
|
-
for (int i = 0; i < num_rows; i++) {
|
35
|
-
int bucket = hash(a, b, i);
|
36
|
-
count[i][bucket] += weight;
|
37
|
-
}
|
38
|
-
}
|
39
|
-
|
40
|
-
double Edgehash::get_count(int a, int b)
|
41
|
-
{
|
42
|
-
double min_count = numeric_limits<double>::max();
|
43
|
-
int bucket;
|
44
|
-
for (int i = 0; i < num_rows; i++) {
|
45
|
-
bucket = hash(a, b, i);
|
46
|
-
min_count = MIN(min_count, count[i][bucket]);
|
47
|
-
}
|
48
|
-
return min_count;
|
49
|
-
}
|
50
|
-
|
51
|
-
void Edgehash::clear()
|
52
|
-
{
|
53
|
-
count = vector<vector<double> >(num_rows, vector<double>(num_buckets, 0.0));
|
54
|
-
}
|
55
|
-
|
56
|
-
void Edgehash::lower(double factor)
|
57
|
-
{
|
58
|
-
for (int i = 0; i < num_rows; i++) {
|
59
|
-
for (int j = 0; j < num_buckets; j++) {
|
60
|
-
count[i][j] = count[i][j] * factor;
|
61
|
-
}
|
62
|
-
}
|
63
|
-
}
|
data/vendor/MIDAS/edgehash.hpp
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
#ifndef edgehash_hpp
|
2
|
-
#define edgehash_hpp
|
3
|
-
|
4
|
-
#include <vector>
|
5
|
-
using namespace std;
|
6
|
-
|
7
|
-
class Edgehash {
|
8
|
-
public:
|
9
|
-
Edgehash(int r, int b, int m0);
|
10
|
-
~Edgehash();
|
11
|
-
void insert(int a, int b, double weight);
|
12
|
-
double get_count(int a, int b);
|
13
|
-
void clear();
|
14
|
-
void lower(double factor);
|
15
|
-
int num_rows;
|
16
|
-
int num_buckets;
|
17
|
-
int m;
|
18
|
-
|
19
|
-
private:
|
20
|
-
vector<int> hash_a, hash_b;
|
21
|
-
vector<std::vector<double> > count;
|
22
|
-
int hash(int a, int b, int i);
|
23
|
-
};
|
24
|
-
|
25
|
-
#endif /* edgehash_hpp */
|
data/vendor/MIDAS/main.cpp
DELETED
@@ -1,127 +0,0 @@
|
|
1
|
-
#include <iostream>
|
2
|
-
#include <vector>
|
3
|
-
#include "anom.hpp"
|
4
|
-
#include "argparse.hpp"
|
5
|
-
using namespace std;
|
6
|
-
|
7
|
-
void load_data(vector<int>& src, vector<int>& dst, vector<int>& times, string input_file, bool undirected)
|
8
|
-
{
|
9
|
-
FILE* infile = fopen(input_file.c_str(), "r");
|
10
|
-
if (infile == NULL) {
|
11
|
-
cerr << "Could not read file " << input_file << "\n";
|
12
|
-
exit(0);
|
13
|
-
}
|
14
|
-
|
15
|
-
int s, d, t;
|
16
|
-
|
17
|
-
if (undirected == false) {
|
18
|
-
while (fscanf(infile, "%d,%d,%d", &s, &d, &t) == 3) {
|
19
|
-
src.push_back(s);
|
20
|
-
dst.push_back(d);
|
21
|
-
times.push_back(t);
|
22
|
-
}
|
23
|
-
return;
|
24
|
-
}
|
25
|
-
else {
|
26
|
-
while (fscanf(infile, "%d:%d:%d", &s, &d, &t) == 3) {
|
27
|
-
src.push_back(s);
|
28
|
-
dst.push_back(d);
|
29
|
-
times.push_back(t);
|
30
|
-
src.push_back(d);
|
31
|
-
dst.push_back(s);
|
32
|
-
times.push_back(t);
|
33
|
-
}
|
34
|
-
return;
|
35
|
-
}
|
36
|
-
}
|
37
|
-
|
38
|
-
int main(int argc, const char* argv[])
|
39
|
-
{
|
40
|
-
argparse::ArgumentParser program("midas");
|
41
|
-
program.add_argument("-i", "--input")
|
42
|
-
.required()
|
43
|
-
.help("Input File ");
|
44
|
-
program.add_argument("-o", "--output")
|
45
|
-
.default_value(string("scores.txt"))
|
46
|
-
.help("Output File. Default is scores.txt");
|
47
|
-
program.add_argument("-r", "--rows")
|
48
|
-
.default_value(2)
|
49
|
-
.action([](const std::string& value) { return std::stoi(value); })
|
50
|
-
.help("Number of rows/hash functions. Default is 2");
|
51
|
-
program.add_argument("-b", "--buckets")
|
52
|
-
.default_value(769)
|
53
|
-
.action([](const std::string& value) { return std::stoi(value); })
|
54
|
-
.help("Number of buckets. Default is 769");
|
55
|
-
program.add_argument("-a", "--alpha")
|
56
|
-
.default_value(0.6)
|
57
|
-
.action([](const std::string& value) { return std::stod(value); })
|
58
|
-
.help("Alpha: Temporal Decay Factor. Default is 0.6");
|
59
|
-
program.add_argument("--norelations")
|
60
|
-
.default_value(false)
|
61
|
-
.implicit_value(true)
|
62
|
-
.help("To run Midas instead of Midas-R.");
|
63
|
-
program.add_argument("--undirected")
|
64
|
-
.default_value(false)
|
65
|
-
.implicit_value(true)
|
66
|
-
.help("If graph is undirected.");
|
67
|
-
try {
|
68
|
-
program.parse_args(argc, argv);
|
69
|
-
}
|
70
|
-
catch (const std::runtime_error& err) {
|
71
|
-
std::cout << err.what() << std::endl;
|
72
|
-
program.print_help();
|
73
|
-
exit(0);
|
74
|
-
}
|
75
|
-
|
76
|
-
string input_file = program.get<string>("-i");
|
77
|
-
string output_file = program.get<string>("-o");
|
78
|
-
int rows = program.get<int>("-r");
|
79
|
-
int buckets = program.get<int>("-b");
|
80
|
-
double alpha = program.get<double>("-a");
|
81
|
-
|
82
|
-
if (rows < 1) {
|
83
|
-
cerr << "Number of hash functions should be positive." << endl;
|
84
|
-
exit(0);
|
85
|
-
}
|
86
|
-
|
87
|
-
if (buckets < 2) {
|
88
|
-
cerr << "Number of buckets should be atleast 2" << endl;
|
89
|
-
exit(0);
|
90
|
-
}
|
91
|
-
|
92
|
-
if (alpha <= 0 || alpha >= 1) {
|
93
|
-
cerr << "Alpha: Temporal Decay Factor must be between 0 and 1." << endl;
|
94
|
-
exit(0);
|
95
|
-
}
|
96
|
-
|
97
|
-
vector<int> src, dst, times;
|
98
|
-
if (program["--undirected"] == true)
|
99
|
-
load_data(src, dst, times, input_file, true);
|
100
|
-
else
|
101
|
-
load_data(src, dst, times, input_file, false);
|
102
|
-
cout << "Finished Loading Data from " << input_file << endl;
|
103
|
-
|
104
|
-
if (program["--norelations"] == true) {
|
105
|
-
clock_t start_time1 = clock();
|
106
|
-
vector<double>* scores = midas(src, dst, times, rows, buckets);
|
107
|
-
cout << "Time taken: " << ((double)(clock() - start_time1)) / CLOCKS_PER_SEC << " s" << endl;
|
108
|
-
|
109
|
-
cout << "Writing Anomaly Scores to " << output_file << endl;
|
110
|
-
FILE* outfile = fopen(output_file.c_str(), "w");
|
111
|
-
for (int i = 0; i < scores->size(); i++) {
|
112
|
-
fprintf(outfile, "%f\n", scores->at(i));
|
113
|
-
}
|
114
|
-
}
|
115
|
-
else {
|
116
|
-
clock_t start_time2 = clock();
|
117
|
-
vector<double>* scores2 = midasR(src, dst, times, rows, buckets, alpha);
|
118
|
-
cout << "Time taken: " << ((double)(clock() - start_time2)) / CLOCKS_PER_SEC << " s" << endl;
|
119
|
-
cout << "Writing Anomaly Scores to " << output_file << endl;
|
120
|
-
FILE* outfile2 = fopen(output_file.c_str(), "w");
|
121
|
-
for (int i = 0; i < scores2->size(); i++) {
|
122
|
-
fprintf(outfile2, "%f\n", scores2->at(i));
|
123
|
-
}
|
124
|
-
}
|
125
|
-
|
126
|
-
return 0;
|
127
|
-
}
|
data/vendor/MIDAS/nodehash.cpp
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
|
2
|
-
#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
|
3
|
-
|
4
|
-
#include <iostream>
|
5
|
-
#include "nodehash.hpp"
|
6
|
-
|
7
|
-
Nodehash::Nodehash(int r, int b)
|
8
|
-
{
|
9
|
-
num_rows = r;
|
10
|
-
num_buckets = b;
|
11
|
-
hash_a.resize(num_rows);
|
12
|
-
hash_b.resize(num_rows);
|
13
|
-
for (int i = 0; i < r; i++) {
|
14
|
-
// a is in [1, p-1]; b is in [0, p-1]
|
15
|
-
hash_a[i] = rand() % (num_buckets - 1) + 1;
|
16
|
-
hash_b[i] = rand() % num_buckets;
|
17
|
-
}
|
18
|
-
this->clear();
|
19
|
-
}
|
20
|
-
|
21
|
-
Nodehash::~Nodehash()
|
22
|
-
{
|
23
|
-
}
|
24
|
-
|
25
|
-
int Nodehash::hash(int a, int i)
|
26
|
-
{
|
27
|
-
int resid = (a * hash_a[i] + hash_b[i]) % num_buckets;
|
28
|
-
return resid + (resid < 0 ? num_buckets : 0);
|
29
|
-
}
|
30
|
-
|
31
|
-
void Nodehash::insert(int a, double weight)
|
32
|
-
{
|
33
|
-
int bucket;
|
34
|
-
for (int i = 0; i < num_rows; i++) {
|
35
|
-
bucket = hash(a, i);
|
36
|
-
count[i][bucket] += weight;
|
37
|
-
}
|
38
|
-
}
|
39
|
-
|
40
|
-
double Nodehash::get_count(int a)
|
41
|
-
{
|
42
|
-
double min_count = numeric_limits<double>::max();
|
43
|
-
int bucket;
|
44
|
-
for (int i = 0; i < num_rows; i++) {
|
45
|
-
bucket = hash(a, i);
|
46
|
-
min_count = MIN(min_count, count[i][bucket]);
|
47
|
-
}
|
48
|
-
return min_count;
|
49
|
-
}
|
50
|
-
|
51
|
-
void Nodehash::clear()
|
52
|
-
{
|
53
|
-
count = vector<vector<double> >(num_rows, vector<double>(num_buckets, 0.0));
|
54
|
-
}
|
55
|
-
|
56
|
-
void Nodehash::lower(double factor)
|
57
|
-
{
|
58
|
-
for (int i = 0; i < num_rows; i++) {
|
59
|
-
for (int j = 0; j < num_buckets; j++) {
|
60
|
-
count[i][j] = count[i][j] * factor;
|
61
|
-
}
|
62
|
-
}
|
63
|
-
}
|
data/vendor/MIDAS/nodehash.hpp
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
#ifndef nodehash_hpp
|
2
|
-
#define nodehash_hpp
|
3
|
-
|
4
|
-
#include <vector>
|
5
|
-
using namespace std;
|
6
|
-
|
7
|
-
class Nodehash {
|
8
|
-
public:
|
9
|
-
Nodehash(int r, int b);
|
10
|
-
~Nodehash();
|
11
|
-
void insert(int a, double weight);
|
12
|
-
double get_count(int a);
|
13
|
-
void clear();
|
14
|
-
void lower(double factor);
|
15
|
-
int num_rows;
|
16
|
-
int num_buckets;
|
17
|
-
int m;
|
18
|
-
|
19
|
-
private:
|
20
|
-
vector<int> hash_a, hash_b;
|
21
|
-
vector<vector<double> > count;
|
22
|
-
int hash(int a, int i);
|
23
|
-
};
|
24
|
-
|
25
|
-
#endif /* nodehash_hpp */
|