effspm 0.3.0__cp310-cp310-macosx_11_0_arm64.whl → 0.3.3__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/_effspm.cpp +683 -2
- effspm/_effspm.cpython-310-darwin.so +0 -0
- effspm/btminer/src/load_inst.cpp +21 -11
- effspm/btminer/src/main.cpp +83 -0
- effspm/htminer/src/build_mdd.cpp +41 -66
- effspm/htminer/src/build_mdd.hpp +56 -49
- effspm/htminer/src/freq_miner.cpp +341 -307
- effspm/htminer/src/freq_miner.hpp +39 -40
- effspm/htminer/src/load_inst.cpp +287 -336
- effspm/htminer/src/load_inst.hpp +23 -6
- effspm/htminer/src/main.cpp +97 -0
- effspm/htminer/src/utility.cpp +38 -57
- effspm/htminer/src/utility.hpp +9 -64
- effspm/largebm/src/main.cpp +95 -0
- effspm/largehm/src/build_mdd.cpp +75 -110
- effspm/largehm/src/build_mdd.hpp +53 -73
- effspm/largehm/src/freq_miner.cpp +132 -173
- effspm/largehm/src/freq_miner.hpp +37 -60
- effspm/largehm/src/load_inst.cpp +136 -191
- effspm/largehm/src/load_inst.hpp +13 -50
- effspm/largehm/src/main.cpp +95 -0
- effspm/largehm/src/utility.cpp +46 -28
- effspm/largehm/src/utility.hpp +18 -16
- effspm/largepp/src/load_inst.cpp +5 -4
- effspm/largepp/src/main.cpp +108 -0
- effspm/load_inst.cpp +8 -8
- effspm/main.cpp +103 -0
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
- effspm-0.3.3.dist-info/RECORD +60 -0
- effspm-0.3.0.dist-info/RECORD +0 -54
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
effspm/largehm/src/load_inst.hpp
CHANGED
|
@@ -1,64 +1,27 @@
|
|
|
1
|
-
#
|
|
2
|
-
#define LARGEHM_LOAD_INST_HPP
|
|
1
|
+
#pragma once
|
|
3
2
|
|
|
4
|
-
#include <string>
|
|
5
3
|
#include <vector>
|
|
4
|
+
#include <string>
|
|
6
5
|
#include <fstream>
|
|
7
|
-
#include <
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
#include
|
|
6
|
+
#include <map>
|
|
7
|
+
#include <unordered_set>
|
|
8
|
+
#include <unordered_map>
|
|
9
|
+
#include <time.h>
|
|
11
10
|
|
|
12
11
|
namespace largehm {
|
|
13
12
|
|
|
14
|
-
|
|
15
|
-
// ─── Globals & Function Prototypes ───────────────────────────────────────────
|
|
16
|
-
//
|
|
17
|
-
|
|
18
|
-
// Output/folder:
|
|
19
|
-
extern std::string out_file;
|
|
20
|
-
extern std::string folder;
|
|
21
|
-
|
|
22
|
-
// Flags:
|
|
23
|
-
extern bool b_disp;
|
|
24
|
-
extern bool b_write;
|
|
25
|
-
extern bool use_dic;
|
|
26
|
-
extern bool use_list;
|
|
27
|
-
extern bool just_build;
|
|
28
|
-
extern bool pre_pro;
|
|
29
|
-
extern bool itmset_exists;
|
|
13
|
+
using namespace std;
|
|
30
14
|
|
|
31
|
-
|
|
32
|
-
extern unsigned int M;
|
|
33
|
-
extern unsigned int L;
|
|
34
|
-
extern unsigned int mlim;
|
|
35
|
-
extern unsigned int time_limit;
|
|
15
|
+
bool Load_instance(string& items_file, double thresh);
|
|
36
16
|
|
|
37
|
-
extern
|
|
38
|
-
extern unsigned long long int theta;
|
|
39
|
-
extern unsigned long long int E;
|
|
17
|
+
extern string out_file, folder;
|
|
40
18
|
|
|
41
|
-
|
|
42
|
-
extern clock_t start_time;
|
|
43
|
-
|
|
44
|
-
// In‐memory sequences (only if “in‐memory” mode):
|
|
45
|
-
extern std::vector<std::vector<int>> items;
|
|
46
|
-
|
|
47
|
-
// Preprocessing dictionary (maps original → compressed IDs):
|
|
48
|
-
extern std::vector<int> item_dic;
|
|
19
|
+
extern bool b_disp, b_write, use_dic, just_build, pre_pro, itmset_exists;
|
|
49
20
|
|
|
50
|
-
|
|
51
|
-
extern std::vector<Pattern> DFS;
|
|
52
|
-
extern std::vector<VPattern> VDFS;
|
|
21
|
+
extern unsigned int M, L, mlim, time_limit;
|
|
53
22
|
|
|
54
|
-
|
|
55
|
-
bool Load_items_pre(std::string &inst_name);
|
|
56
|
-
bool Load_items(std::string &inst_name);
|
|
57
|
-
bool Preprocess(std::string &inst, double thresh);
|
|
23
|
+
extern unsigned long long int N, theta, E;
|
|
58
24
|
|
|
59
|
-
|
|
60
|
-
bool Load_instance(std::string &items_file, double thresh);
|
|
25
|
+
extern clock_t start_time;
|
|
61
26
|
|
|
62
27
|
} // namespace largehm
|
|
63
|
-
|
|
64
|
-
#endif // LARGEHM_LOAD_INST_HPP
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
#include <iostream>
|
|
2
|
+
#include <time.h>
|
|
3
|
+
#include <string.h>
|
|
4
|
+
#include <string>
|
|
5
|
+
#include "load_inst.hpp"
|
|
6
|
+
#include "build_mdd.hpp"
|
|
7
|
+
#include "utility.hpp"
|
|
8
|
+
#include "freq_miner.hpp"
|
|
9
|
+
|
|
10
|
+
using namespace std;
|
|
11
|
+
|
|
12
|
+
string out_file;
|
|
13
|
+
|
|
14
|
+
bool b_disp = 0, b_write = 0, use_dic = 0, just_build = 0, pre_pro = 1;
|
|
15
|
+
|
|
16
|
+
unsigned int time_limit = 10 * 3600;
|
|
17
|
+
|
|
18
|
+
clock_t start_time;
|
|
19
|
+
|
|
20
|
+
string folder;
|
|
21
|
+
|
|
22
|
+
int main(int argc, char* argv[]) {
|
|
23
|
+
|
|
24
|
+
string VV, attr;
|
|
25
|
+
|
|
26
|
+
double thresh = 0;
|
|
27
|
+
for (int i = 1; i<argc; i++) {
|
|
28
|
+
if (argv[i][0] != '-' || isdigit(argv[i][1]))
|
|
29
|
+
continue;
|
|
30
|
+
else if (strcmp(argv[i], "-thr") == 0)
|
|
31
|
+
thresh = stod(argv[i + 1]);
|
|
32
|
+
else if (strcmp(argv[i], "-file") == 0)
|
|
33
|
+
VV = argv[i + 1];
|
|
34
|
+
else if (strcmp(argv[i], "-time") == 0)
|
|
35
|
+
time_limit = stoi(argv[i + 1]);
|
|
36
|
+
else if (strcmp(argv[i], "-jbuild") == 0)
|
|
37
|
+
just_build = 1;
|
|
38
|
+
else if (strcmp(argv[i], "-folder") == 0)
|
|
39
|
+
folder = argv[i + 1];
|
|
40
|
+
else if (strcmp(argv[i], "-npre") == 0)
|
|
41
|
+
pre_pro = 0;
|
|
42
|
+
else if (strcmp(argv[i], "-dic") == 0)
|
|
43
|
+
use_dic = 1;
|
|
44
|
+
else if (strcmp(argv[i], "-out") == 0) {
|
|
45
|
+
if (i + 1 == argc || argv[i + 1][0] == '-')
|
|
46
|
+
b_disp = 1;
|
|
47
|
+
else if (argv[i + 1][0] == '+') {
|
|
48
|
+
b_disp = 1;
|
|
49
|
+
b_write = 1;
|
|
50
|
+
if (strlen(argv[i + 1]) > 1) {
|
|
51
|
+
out_file = argv[i + 1];
|
|
52
|
+
out_file = out_file.substr(1, out_file.size() - 1);
|
|
53
|
+
}
|
|
54
|
+
else
|
|
55
|
+
out_file = VV;
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
b_write = 1;
|
|
59
|
+
out_file = argv[i + 1];
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
else
|
|
64
|
+
cout << "Command " << argv[i] << " not recognized and skipped.\n";
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
cout << "\n********************** " << VV << "**********************\n";
|
|
70
|
+
|
|
71
|
+
string item_file = folder + VV + ".txt";
|
|
72
|
+
|
|
73
|
+
cout << "loading instances...\n";
|
|
74
|
+
|
|
75
|
+
start_time = clock();
|
|
76
|
+
|
|
77
|
+
if (!Load_instance(item_file, thresh)) {
|
|
78
|
+
cout << "Files invalid, exiting.\n";
|
|
79
|
+
cin.get();
|
|
80
|
+
return 0;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
//kk = clock();
|
|
84
|
+
|
|
85
|
+
if (!just_build && give_time(clock() - start_time) < time_limit) {
|
|
86
|
+
Freq_miner();
|
|
87
|
+
if (give_time(clock() - start_time) >= time_limit)
|
|
88
|
+
cout << "TIME LIMIT REACHED\n";
|
|
89
|
+
cout << "Mining Complete\n\nFound a total of " << num_patt << " patterns\n";
|
|
90
|
+
cout << "\nTotal CPU time " << give_time(clock() - start_time) << " seconds\n\n";
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
return 0;
|
|
95
|
+
}
|
effspm/largehm/src/utility.cpp
CHANGED
|
@@ -2,37 +2,55 @@
|
|
|
2
2
|
#include "build_mdd.hpp"
|
|
3
3
|
#include "load_inst.hpp"
|
|
4
4
|
#include <iostream>
|
|
5
|
+
|
|
5
6
|
namespace largehm {
|
|
6
|
-
std::vector<std::vector<int>> collected;
|
|
7
|
-
bool check_parent(unsigned long long int cur_anct, unsigned long long int str_pnt, unsigned long long int start, vector<unsigned long long int>& strpnt_vec) {
|
|
8
|
-
|
|
9
|
-
vector<unsigned long long int> ancestors;
|
|
10
|
-
|
|
11
|
-
while (abs(Tree[cur_anct].itmset) > abs(Tree[str_pnt].itmset)) {
|
|
12
|
-
if (Tree[cur_anct].item > 0)
|
|
13
|
-
ancestors.push_back(cur_anct);
|
|
14
|
-
cur_anct = Tree[cur_anct].anct;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
if (abs(Tree[cur_anct].itmset) == abs(Tree[str_pnt].itmset))
|
|
18
|
-
return 1;
|
|
19
|
-
else {
|
|
20
|
-
for (vector<unsigned long long int>::reverse_iterator it = ancestors.rbegin(); it != ancestors.rend(); ++it) {
|
|
21
|
-
for (unsigned int i = start; i < strpnt_vec.size(); ++i) {
|
|
22
|
-
if (strpnt_vec[i] == *it)
|
|
23
|
-
return 1;
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
return 0;
|
|
29
7
|
|
|
8
|
+
using namespace std;
|
|
9
|
+
|
|
10
|
+
// storage for mined patterns (each pattern = vector<int>)
|
|
11
|
+
std::vector<std::vector<int>> collectedPatterns;
|
|
12
|
+
|
|
13
|
+
bool check_parent(unsigned long long int cur_anct,
|
|
14
|
+
unsigned long long int str_pnt,
|
|
15
|
+
unsigned long long int start,
|
|
16
|
+
vector<unsigned long long int>& strpnt_vec) {
|
|
17
|
+
|
|
18
|
+
vector<unsigned long long int> ancestors;
|
|
19
|
+
|
|
20
|
+
while (abs(Tree[cur_anct].itmset) > abs(Tree[str_pnt].itmset)) {
|
|
21
|
+
if (Tree[cur_anct].item > 0)
|
|
22
|
+
ancestors.push_back(cur_anct);
|
|
23
|
+
cur_anct = Tree[cur_anct].anct;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (abs(Tree[cur_anct].itmset) == abs(Tree[str_pnt].itmset))
|
|
27
|
+
return 1;
|
|
28
|
+
else {
|
|
29
|
+
for (vector<unsigned long long int>::reverse_iterator it = ancestors.rbegin();
|
|
30
|
+
it != ancestors.rend(); ++it) {
|
|
31
|
+
for (unsigned int i = start; i < strpnt_vec.size(); ++i) {
|
|
32
|
+
if (strpnt_vec[i] == *it)
|
|
33
|
+
return 1;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return 0;
|
|
30
39
|
}
|
|
31
40
|
|
|
41
|
+
float give_time(clock_t kk) {
|
|
42
|
+
float ll = ((float)kk) / CLOCKS_PER_SEC;
|
|
43
|
+
return ll;
|
|
44
|
+
}
|
|
32
45
|
|
|
46
|
+
// clear vector used to return patterns to Python
|
|
47
|
+
void ClearCollected() {
|
|
48
|
+
collectedPatterns.clear();
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// return reference so Python wrapper can build list[list[int]]
|
|
52
|
+
const std::vector<std::vector<int>>& GetCollected() {
|
|
53
|
+
return collectedPatterns;
|
|
54
|
+
}
|
|
33
55
|
|
|
34
|
-
//
|
|
35
|
-
// float ll = ((float)kk) / CLOCKS_PER_SEC;
|
|
36
|
-
// return ll;
|
|
37
|
-
// }
|
|
38
|
-
}
|
|
56
|
+
} // namespace largehm
|
effspm/largehm/src/utility.hpp
CHANGED
|
@@ -6,24 +6,26 @@
|
|
|
6
6
|
#include "build_mdd.hpp"
|
|
7
7
|
|
|
8
8
|
namespace largehm {
|
|
9
|
-
using namespace std;
|
|
10
9
|
|
|
11
|
-
|
|
10
|
+
using std::vector;
|
|
11
|
+
using std::string;
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
collected.clear();
|
|
16
|
-
}
|
|
17
|
-
inline const std::vector<std::vector<int>>& GetCollected() {
|
|
18
|
-
return collected;
|
|
19
|
-
}
|
|
13
|
+
// time helper
|
|
14
|
+
float give_time(clock_t kk);
|
|
20
15
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
bool check_parent(unsigned long long int cur_anct, unsigned long long int str_pnt, unsigned long long int start, vector<unsigned long long int>& strpnt_vec);
|
|
16
|
+
// ancestor-check helper
|
|
17
|
+
bool check_parent(unsigned long long int cur_anct,
|
|
18
|
+
unsigned long long int str_pnt,
|
|
19
|
+
unsigned long long int start,
|
|
20
|
+
vector<unsigned long long int>& strpnt_vec);
|
|
27
21
|
|
|
22
|
+
// pattern collection for Python wrapper
|
|
23
|
+
extern std::vector<std::vector<int>> collectedPatterns;
|
|
28
24
|
|
|
29
|
-
|
|
25
|
+
// clear collected patterns between runs
|
|
26
|
+
void ClearCollected();
|
|
27
|
+
|
|
28
|
+
// get collected patterns after mining
|
|
29
|
+
const std::vector<std::vector<int>>& GetCollected();
|
|
30
|
+
|
|
31
|
+
} // namespace largehm
|
effspm/largepp/src/load_inst.cpp
CHANGED
|
@@ -50,10 +50,11 @@ bool Load_instance(string& items_file, double thresh)
|
|
|
50
50
|
return false;
|
|
51
51
|
else
|
|
52
52
|
theta = (thresh < 1.0) ? ceil(thresh * N) : thresh;
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
if (b_disp)
|
|
54
|
+
cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
|
|
55
|
+
if (b_disp)
|
|
56
|
+
cout << "Found " << N << " sequence, with max line len " << M
|
|
57
|
+
<< ", and " << L << " items, and " << E << " enteries\n";
|
|
57
58
|
|
|
58
59
|
// ───────────────────────────────────────────────────────────
|
|
59
60
|
// DEBUG snapshot of seeds right after loading
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#include <iostream>
|
|
2
|
+
#include <time.h>
|
|
3
|
+
#include <string.h>
|
|
4
|
+
#include <string>
|
|
5
|
+
#include "load_inst.hpp"
|
|
6
|
+
#include "freq_miner.hpp"
|
|
7
|
+
#include "utility.hpp"
|
|
8
|
+
|
|
9
|
+
namespace largepp // ← BEGIN namespacing
|
|
10
|
+
{
|
|
11
|
+
using namespace std;
|
|
12
|
+
|
|
13
|
+
string out_file;
|
|
14
|
+
|
|
15
|
+
bool b_disp = 0, b_write = 0, use_dic = 0, just_build = 0, ovr_count = 0, pre_pro = 0;
|
|
16
|
+
|
|
17
|
+
unsigned int time_limit = 10 * 3600;
|
|
18
|
+
|
|
19
|
+
clock_t start_time;
|
|
20
|
+
|
|
21
|
+
int main(int argc, char* argv[]) {
|
|
22
|
+
|
|
23
|
+
double thresh = 0;
|
|
24
|
+
string VV, folder;
|
|
25
|
+
for (int i = 1; i < argc; ++i){
|
|
26
|
+
if (argv[i][0] !='-')
|
|
27
|
+
continue;
|
|
28
|
+
else if (strcmp(argv[i], "-thr") == 0)
|
|
29
|
+
thresh = stof(argv[i + 1]);
|
|
30
|
+
else if (strcmp(argv[i], "-file") == 0)
|
|
31
|
+
VV = argv[i + 1];
|
|
32
|
+
else if (strcmp(argv[i], "-folder") == 0)
|
|
33
|
+
folder = argv[i + 1];
|
|
34
|
+
else if (strcmp(argv[i], "-time") == 0)
|
|
35
|
+
time_limit = stoi(argv[i + 1]);
|
|
36
|
+
else if (strcmp(argv[i], "-jbuild") == 0)
|
|
37
|
+
just_build = 1;
|
|
38
|
+
else if (strcmp(argv[i], "-npre") == 0)
|
|
39
|
+
pre_pro = 0;
|
|
40
|
+
else if (strcmp(argv[i], "-ovrc") == 0)
|
|
41
|
+
ovr_count = 1;
|
|
42
|
+
else if (strcmp(argv[i], "-dic") == 0)
|
|
43
|
+
use_dic = 1;
|
|
44
|
+
else if (strcmp(argv[i], "-out") == 0){
|
|
45
|
+
if (i + 1 == argc || argv[i + 1][0] == '-')
|
|
46
|
+
b_disp = 1;
|
|
47
|
+
else if (argv[i + 1][0] == '+') {
|
|
48
|
+
b_disp = 1;
|
|
49
|
+
b_write = 1;
|
|
50
|
+
out_file = argv[i + 1];
|
|
51
|
+
out_file = out_file.substr(1,out_file.size()-1);
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
b_write = 1;
|
|
55
|
+
out_file = argv[i + 1];
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
else
|
|
59
|
+
cout << "Command " << argv[i] << " not recognized and skipped.\n";
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (thresh == 0){
|
|
63
|
+
cout << "No Threshold given, using threshold deafult of 1%\n";
|
|
64
|
+
thresh = 0.01;
|
|
65
|
+
}
|
|
66
|
+
if (folder.back() != '/'){
|
|
67
|
+
folder += '/';
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
cout << "\n********************** " << VV << "**********************\n";
|
|
72
|
+
|
|
73
|
+
string item_file = folder + VV + ".txt";
|
|
74
|
+
//out_file = folder + VV + "_result.txt";
|
|
75
|
+
cout << "loading instances...\n";
|
|
76
|
+
|
|
77
|
+
start_time = clock();
|
|
78
|
+
|
|
79
|
+
if(!Load_instance(item_file, thresh)){
|
|
80
|
+
cout << "Files invalid, exiting.\n";
|
|
81
|
+
return 0;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
cout << "Instances loaded\n";
|
|
85
|
+
|
|
86
|
+
if (!just_build && give_time(clock() - start_time) < time_limit) {
|
|
87
|
+
cout << "\nRunning mining algorithm...\n";
|
|
88
|
+
Freq_miner();
|
|
89
|
+
if (give_time(clock() - start_time) >= time_limit)
|
|
90
|
+
cout << "TIME LIMIT REACHED\n";
|
|
91
|
+
cout << "Mining Complete\n\nFound a total of " << num_patt << " patterns\n";
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
cout << "Total CPU time is: ";
|
|
95
|
+
cout << give_time(clock() - start_time) << "\n";
|
|
96
|
+
|
|
97
|
+
if (b_write){
|
|
98
|
+
ofstream file;
|
|
99
|
+
file.open(out_file, std::ios::app);
|
|
100
|
+
file << "\nMining completed in " << give_time(clock() - start_time) << " seconds\n";
|
|
101
|
+
//file << "Found a total of " << num_max_patt << " maximal patterns\n";
|
|
102
|
+
file.close();
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
return 0;
|
|
107
|
+
}
|
|
108
|
+
}
|
effspm/load_inst.cpp
CHANGED
|
@@ -25,8 +25,8 @@ bool Load_instance(string &items_file, double thresh) {
|
|
|
25
25
|
if (pre_pro) {
|
|
26
26
|
if(!Preprocess(items_file, thresh))
|
|
27
27
|
return 0;
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
if (b_disp)
|
|
29
|
+
cout << "\nPreprocess done in " << give_time(clock() - kk) << " seconds\n\n";
|
|
30
30
|
|
|
31
31
|
DFS.reserve(L);
|
|
32
32
|
for (int i = 0; i < L; ++i)
|
|
@@ -48,10 +48,10 @@ bool Load_instance(string &items_file, double thresh) {
|
|
|
48
48
|
else
|
|
49
49
|
theta = thresh;
|
|
50
50
|
}
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
51
|
+
if (b_disp)
|
|
52
|
+
cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
|
|
53
|
+
if (b_disp)
|
|
54
|
+
cout << "Found " << N << " sequence, with max line len " << M << ", and " << L << " items, and " << E << " enteries\n";
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
return 1;
|
|
@@ -108,8 +108,8 @@ bool Preprocess(string &inst, double thresh) {
|
|
|
108
108
|
if (freq[i] >= theta)
|
|
109
109
|
item_dic[i] = ++real_L;
|
|
110
110
|
}
|
|
111
|
-
|
|
112
|
-
|
|
111
|
+
if (b_disp)
|
|
112
|
+
cout << "Original number of items: " << L << " Reduced to: " << real_L << endl;
|
|
113
113
|
|
|
114
114
|
L = real_L;
|
|
115
115
|
N = 0;
|
effspm/main.cpp
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#include <iostream>
|
|
2
|
+
#include <time.h>
|
|
3
|
+
#include <string.h>
|
|
4
|
+
#include <string>
|
|
5
|
+
#include "load_inst.hpp"
|
|
6
|
+
#include "freq_miner.hpp"
|
|
7
|
+
#include "utility.hpp"
|
|
8
|
+
|
|
9
|
+
using namespace std;
|
|
10
|
+
|
|
11
|
+
string out_file;
|
|
12
|
+
|
|
13
|
+
bool b_disp = 0, b_write = 0, use_dic = 0, use_list = 0, pre_pro = 0;
|
|
14
|
+
|
|
15
|
+
unsigned int time_limit = 10 * 3600;
|
|
16
|
+
|
|
17
|
+
clock_t start_time;
|
|
18
|
+
|
|
19
|
+
int main(int argc, char* argv[]) {
|
|
20
|
+
|
|
21
|
+
double thresh = 0;
|
|
22
|
+
string VV, folder;
|
|
23
|
+
for (int i = 1; i < argc; ++i){
|
|
24
|
+
if (argv[i][0] !='-')
|
|
25
|
+
continue;
|
|
26
|
+
else if (strcmp(argv[i], "-thr") == 0)
|
|
27
|
+
thresh = stof(argv[i + 1]);
|
|
28
|
+
else if (strcmp(argv[i], "-file") == 0)
|
|
29
|
+
VV = argv[i + 1];
|
|
30
|
+
else if (strcmp(argv[i], "-folder") == 0)
|
|
31
|
+
folder = argv[i + 1];
|
|
32
|
+
else if (strcmp(argv[i], "-time") == 0)
|
|
33
|
+
time_limit = stoi(argv[i + 1]);
|
|
34
|
+
else if (strcmp(argv[i], "-uselist") == 0)
|
|
35
|
+
use_list = 1;
|
|
36
|
+
else if (strcmp(argv[i], "-preproc") == 0)
|
|
37
|
+
pre_pro = 1;
|
|
38
|
+
else if (strcmp(argv[i], "-dic") == 0)
|
|
39
|
+
use_dic = 1;
|
|
40
|
+
else if (strcmp(argv[i], "-out") == 0){
|
|
41
|
+
if (i + 1 == argc || argv[i + 1][0] == '-')
|
|
42
|
+
b_disp = 1;
|
|
43
|
+
else if (argv[i + 1][0] == '+') {
|
|
44
|
+
b_disp = 1;
|
|
45
|
+
b_write = 1;
|
|
46
|
+
out_file = argv[i + 1];
|
|
47
|
+
out_file = out_file.substr(1,out_file.size()-1);
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
b_write = 1;
|
|
51
|
+
out_file = argv[i + 1];
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
else
|
|
55
|
+
cout << "Command " << argv[i] << " not recognized and skipped.\n";
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (thresh == 0){
|
|
59
|
+
cout << "No Threshold given, using threshold deafult of 1%\n";
|
|
60
|
+
thresh = 0.01;
|
|
61
|
+
}
|
|
62
|
+
if (folder.back() != '/'){
|
|
63
|
+
folder += '/';
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
cout << "\n********************** " << VV << "**********************\n";
|
|
68
|
+
|
|
69
|
+
string item_file = folder + VV + ".txt";
|
|
70
|
+
//out_file = folder + VV + "_result.txt";
|
|
71
|
+
cout << "loading instances...\n";
|
|
72
|
+
|
|
73
|
+
start_time = clock();
|
|
74
|
+
|
|
75
|
+
if(!Load_instance(item_file, thresh)){
|
|
76
|
+
cout << "Files invalid, exiting.\n";
|
|
77
|
+
return 0;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
cout << "Instances loaded\n";
|
|
81
|
+
|
|
82
|
+
if (give_time(clock() - start_time) < time_limit) {
|
|
83
|
+
cout << "\nRunning mining algorithm...\n";
|
|
84
|
+
Freq_miner();
|
|
85
|
+
if (give_time(clock() - start_time) >= time_limit)
|
|
86
|
+
cout << "TIME LIMIT REACHED\n";
|
|
87
|
+
cout << "Mining Complete\n\nFound a total of " << num_patt << " patterns\n";
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
cout << "Total CPU time is: ";
|
|
91
|
+
cout << give_time(clock() - start_time) << "\n";
|
|
92
|
+
|
|
93
|
+
if (b_write){
|
|
94
|
+
ofstream file;
|
|
95
|
+
file.open(out_file, std::ios::app);
|
|
96
|
+
file << "\nMining completed in " << give_time(clock() - start_time) << " seconds\n";
|
|
97
|
+
//file << "Found a total of " << num_max_patt << " maximal patterns\n";
|
|
98
|
+
file.close();
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
return 0;
|
|
103
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
effspm/utility.hpp,sha256=Y_MQVk9AmJWjguKyuyk0LraTi_6VzZFg0StsmVOCkNc,744
|
|
2
|
+
effspm/_core.cpp,sha256=S6UsUcl0HQLMQUTy2tMJxcrbLGJo8tjkLskyHqESYyI,3675
|
|
3
|
+
effspm/load_inst.hpp,sha256=VF0o_Ur-mO5xhdWWn1KHrFLu9_bTbd8wcURCVF0A0dM,907
|
|
4
|
+
effspm/__init__.py,sha256=_BkZ8cFlB_l1haxTzxJMgFHhO6LEmF3pIY-nmPTPMj8,246
|
|
5
|
+
effspm/_effspm.cpython-310-darwin.so,sha256=yKRIaI44JpARZ4jpfUqryX_fYGSGUnzzc1XtKEH7gQM,547216
|
|
6
|
+
effspm/freq_miner.cpp,sha256=9K-nO_c-vVzbFlmh511vmvypA2fu1TXpiJhyx03owDU,4642
|
|
7
|
+
effspm/_effspm.cpp,sha256=NkxFvVVbR5CAsSUZO5Z_Z-UwFVCQRhy6_st8_cD-2Us,51027
|
|
8
|
+
effspm/load_inst.cpp,sha256=h9pjtvRCIrtqIAt8W1r8SxOb1YViW1pCR52tbxbPdh0,4591
|
|
9
|
+
effspm/freq_miner.hpp,sha256=XDhydwUFmute6slWwZcFuiBjGZs4Kv5-UvAmcI_IMwI,786
|
|
10
|
+
effspm/utility.cpp,sha256=luWwBNy7OVWRmam9gz2RjhtrRmx6c37oOobFJaDWqcA,1403
|
|
11
|
+
effspm/main.cpp,sha256=aMyPwuTn5dPIlLAZ_XfWL8Z6cXfQS7EaJ0tEQPqpA8I,2558
|
|
12
|
+
effspm/htminer/src/build_mdd.cpp,sha256=TcHZ9BhY_s9A4ZiH1rs1WW8T74x8efhCaLQ0KFKaY3c,3911
|
|
13
|
+
effspm/htminer/src/utility.hpp,sha256=q3a9YEkk3-ZOH2lT1z8OWjg3vUNiBjIhA9rvefcsm7U,470
|
|
14
|
+
effspm/htminer/src/load_inst.hpp,sha256=mrADPOTlVUTIw-utqk83uYBYyy-mzJV7cCQkGS-kkj4,807
|
|
15
|
+
effspm/htminer/src/freq_miner.cpp,sha256=lrUPd6Oi3ZsBc5-nF0TLxcD68mmus1YLuWpCp10Qji0,14038
|
|
16
|
+
effspm/htminer/src/load_inst.cpp,sha256=KigZtjHOEyB7EizQ8VdmL_PRGYsODKrVo947yk1y6Wg,9945
|
|
17
|
+
effspm/htminer/src/freq_miner.hpp,sha256=Vclu-KuqHBiRHIwZknpgiZZMV_-zrL-6I_SJY5hQWys,990
|
|
18
|
+
effspm/htminer/src/build_mdd.hpp,sha256=VP7w5JYZJVddDqRwG9Cy41RMTta7WZTVuDbFqu0JEfU,1152
|
|
19
|
+
effspm/htminer/src/utility.cpp,sha256=PWIyHOQBnU9XM1jsgKeadzmi9KjndTFhUoAogX2lEW8,1253
|
|
20
|
+
effspm/htminer/src/main.cpp,sha256=AQlwQNAZPZ9usrLFgffd9GSsAK9Xc5Z7wcRtDIXYGWI,2207
|
|
21
|
+
effspm/largepp/src/utility.hpp,sha256=i06elLVb7qWGwkalC1t03bfuQ7c06Eu3QFt3I92FnG8,736
|
|
22
|
+
effspm/largepp/src/load_inst.hpp,sha256=m-gGw92sDKqnT1DjkACZjl5iGjoHHONWSdermWvPB-w,907
|
|
23
|
+
effspm/largepp/src/pattern.hpp,sha256=t7O8N86Hi985gR4on0e8xdyskcJ_XLh-LpqCGPFB1Z4,639
|
|
24
|
+
effspm/largepp/src/freq_miner.cpp,sha256=G6Kg1m-4jqeJBQELNAdn4wpdio85nhEh9RgWaTVLmkk,7234
|
|
25
|
+
effspm/largepp/src/load_inst.cpp,sha256=BMCsSs7xeoQdog1dKlK3szl7RtO_Pg6Dy8Smi3RhVU0,8107
|
|
26
|
+
effspm/largepp/src/freq_miner.hpp,sha256=JS1Kgq5Pjqnr2WnCsB7YZdXXffEHDvGnp0HJsv_y4l0,399
|
|
27
|
+
effspm/largepp/src/utility.cpp,sha256=Hbn49pUWCf1ixWyhiMBxXCW2a7uttkSNwc5gPgA1vqA,954
|
|
28
|
+
effspm/largepp/src/main.cpp,sha256=2y_xGXSHo2wyrHJh86VrD1GMDHaEDMBqvUmt5Rb03oI,2696
|
|
29
|
+
effspm/largehm/src/build_mdd.cpp,sha256=xlTZUdNZBy5_Ms-EfRfY-rCmjkw3My2-2FcZbnOyhCo,4444
|
|
30
|
+
effspm/largehm/src/utility.hpp,sha256=y3qI5dVP-hjTycrnvoGz9PRJTUUB-dqlBbYk8wlBXr4,716
|
|
31
|
+
effspm/largehm/src/load_inst.hpp,sha256=HV_lfjGCvDI0ZKmz7DVlNKZ_xOd-37YJOpy06O7PP-8,500
|
|
32
|
+
effspm/largehm/src/freq_miner.cpp,sha256=aWLVUnq_bdz3RIeQaPsNWw8qw9sdUnUaQcCw57M6AWw,14053
|
|
33
|
+
effspm/largehm/src/load_inst.cpp,sha256=VfUFDGrJJZS2GYecM9RCLzF6iTlq9VHbNxpiTJPn0tY,8308
|
|
34
|
+
effspm/largehm/src/freq_miner.hpp,sha256=PjAFrLSrhZG8cWReHsv-EJnc0HHqry0mQE-g-Lff9tQ,897
|
|
35
|
+
effspm/largehm/src/build_mdd.hpp,sha256=ZalJUY80miPvy0bIMRKTT4SRFnK9fxD_HD3dVksTRnQ,1280
|
|
36
|
+
effspm/largehm/src/utility.cpp,sha256=l0itd9GvZ_YWqwD1s-hN00j67wvbLf7k2QajK94euVk,1495
|
|
37
|
+
effspm/largehm/src/main.cpp,sha256=I9vYzBtok-6z_GefCWH7mCfLYtebdxZYr0LoOjEeORs,2142
|
|
38
|
+
effspm/largebm/src/build_mdd.cpp,sha256=laELkRB3S1gF22GSvElcRIU8fYqDw0cOij34mxmuoFM,2584
|
|
39
|
+
effspm/largebm/src/utility.hpp,sha256=wAOWyxrpazA8uOK82aMOXBkYsM7flbdICggGHrikOCM,413
|
|
40
|
+
effspm/largebm/src/load_inst.hpp,sha256=DcvCmsRh5kD2iCBKHyhDsldIwNvC17Y03EG3HXQdjv8,1158
|
|
41
|
+
effspm/largebm/src/freq_miner.cpp,sha256=UBeEHqUq4PuE1DxW2X6QnbZrPjWTeGPjSKusX4qwIvo,10913
|
|
42
|
+
effspm/largebm/src/load_inst.cpp,sha256=yOHemF_S-OhhIYfM4r2KAqq7752cdidXWun99HBnfh0,6820
|
|
43
|
+
effspm/largebm/src/freq_miner.hpp,sha256=wxqwP1YuukL2gp3cdOi6EFvEWqEDhv_o1P7ESmeXc3Y,874
|
|
44
|
+
effspm/largebm/src/build_mdd.hpp,sha256=-rUlWW2qDc5xrkxD_5_EzXpqlL5ZsX0CFGgCZTgqd_c,859
|
|
45
|
+
effspm/largebm/src/utility.cpp,sha256=wr8PAW_dODVIeBSDPOmcdmguj8ovAIjm7Kh2nU_fPzo,1049
|
|
46
|
+
effspm/largebm/src/main.cpp,sha256=fcGqsLCmnCPOyrMglSMjG8yC0QoGXdmfH6G2Jm5jycU,2083
|
|
47
|
+
effspm/btminer/src/build_mdd.cpp,sha256=7J5KMNNB3DngB4Bkn1h-zMfAgRvRiXP4MEkPbd3C-ZE,2209
|
|
48
|
+
effspm/btminer/src/utility.hpp,sha256=5m34T2ATvYY_32Sz6h0Zro4kLolosPwASJF0TiDKhhI,312
|
|
49
|
+
effspm/btminer/src/load_inst.hpp,sha256=Fj72XjlR7ppGi76vc0PMZZAkj_-JklTVuIqzbsZcTAw,987
|
|
50
|
+
effspm/btminer/src/freq_miner.cpp,sha256=sTA-bkUZqgFU9tVtSsUUfaVj7BU8MiFZm3scwRsxMeg,9192
|
|
51
|
+
effspm/btminer/src/load_inst.cpp,sha256=fquloXEJlmSXYbwstgX_lAO5-ar0CS6Gx1cfHQRQbS0,8366
|
|
52
|
+
effspm/btminer/src/freq_miner.hpp,sha256=sS2CGdNT0zSGtSz0evZZlUkuiKyWlSvrR3-M2YXP7-Q,1055
|
|
53
|
+
effspm/btminer/src/build_mdd.hpp,sha256=p0pEcNZMD-WqV0UB1WtOn-Soe0105gEL071Ht5okgJM,627
|
|
54
|
+
effspm/btminer/src/utility.cpp,sha256=YmwdNPCUHFjydwrUAyBLEkFqUecyg7r4HbPdgoD-j3s,1194
|
|
55
|
+
effspm/btminer/src/main.cpp,sha256=Jh9M5nsZUL3i-iENn7Jh--TOY0F5dnu6CvqZ7udWU3A,2678
|
|
56
|
+
effspm-0.3.3.dist-info/RECORD,,
|
|
57
|
+
effspm-0.3.3.dist-info/WHEEL,sha256=wyHf6UDzyHyUK-aDYscyyyExpYI7SeEZ9xjyEiU4cnw,109
|
|
58
|
+
effspm-0.3.3.dist-info/top_level.txt,sha256=2O-AuI0nw0pDmJMo2jzM1wvV2rj48AmkjskkAnsuuQk,7
|
|
59
|
+
effspm-0.3.3.dist-info/METADATA,sha256=0V27Cms7UBjdK62_SOAdqiAhq0bOIByG6oUTTALZEn8,14227
|
|
60
|
+
effspm-0.3.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|