effspm 0.3.0__cp310-cp310-macosx_11_0_arm64.whl → 0.3.3__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/_effspm.cpp +683 -2
- effspm/_effspm.cpython-310-darwin.so +0 -0
- effspm/btminer/src/load_inst.cpp +21 -11
- effspm/btminer/src/main.cpp +83 -0
- effspm/htminer/src/build_mdd.cpp +41 -66
- effspm/htminer/src/build_mdd.hpp +56 -49
- effspm/htminer/src/freq_miner.cpp +341 -307
- effspm/htminer/src/freq_miner.hpp +39 -40
- effspm/htminer/src/load_inst.cpp +287 -336
- effspm/htminer/src/load_inst.hpp +23 -6
- effspm/htminer/src/main.cpp +97 -0
- effspm/htminer/src/utility.cpp +38 -57
- effspm/htminer/src/utility.hpp +9 -64
- effspm/largebm/src/main.cpp +95 -0
- effspm/largehm/src/build_mdd.cpp +75 -110
- effspm/largehm/src/build_mdd.hpp +53 -73
- effspm/largehm/src/freq_miner.cpp +132 -173
- effspm/largehm/src/freq_miner.hpp +37 -60
- effspm/largehm/src/load_inst.cpp +136 -191
- effspm/largehm/src/load_inst.hpp +13 -50
- effspm/largehm/src/main.cpp +95 -0
- effspm/largehm/src/utility.cpp +46 -28
- effspm/largehm/src/utility.hpp +18 -16
- effspm/largepp/src/load_inst.cpp +5 -4
- effspm/largepp/src/main.cpp +108 -0
- effspm/load_inst.cpp +8 -8
- effspm/main.cpp +103 -0
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
- effspm-0.3.3.dist-info/RECORD +60 -0
- effspm-0.3.0.dist-info/RECORD +0 -54
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
|
Binary file
|
effspm/btminer/src/load_inst.cpp
CHANGED
|
@@ -31,7 +31,7 @@ map<int, string> item_map_rev;
|
|
|
31
31
|
|
|
32
32
|
std::vector<int> freq;
|
|
33
33
|
std::vector<int> item_dic;
|
|
34
|
-
|
|
34
|
+
std::vector<std::vector<int>> items;
|
|
35
35
|
// ✅ REAL DEFINITION lives here:
|
|
36
36
|
std::vector<Pattern> DFS;
|
|
37
37
|
|
|
@@ -67,8 +67,8 @@ bool Load_instance(string &items_file, double thresh) {
|
|
|
67
67
|
if (pre_pro) {
|
|
68
68
|
if (!Preprocess(items_file, thresh))
|
|
69
69
|
return false;
|
|
70
|
-
|
|
71
|
-
|
|
70
|
+
if (b_disp)
|
|
71
|
+
cout << "\nPreprocess done in " << give_time(clock() - kk) << " seconds\n\n";
|
|
72
72
|
|
|
73
73
|
// build empty DFS of size L
|
|
74
74
|
DFS.clear();
|
|
@@ -88,12 +88,13 @@ bool Load_instance(string &items_file, double thresh) {
|
|
|
88
88
|
else
|
|
89
89
|
theta = static_cast<int>(thresh);
|
|
90
90
|
}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
91
|
+
if (b_disp)
|
|
92
|
+
cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
|
|
93
|
+
if (b_disp)
|
|
94
|
+
cout << "Found " << N * N_mult
|
|
95
|
+
<< " sequence, with max line len " << M
|
|
96
|
+
<< ", and " << L << " items, and " << E << " enteries\n";
|
|
97
|
+
//cout << "Total MDD nodes: " << Tree.size() << endl;
|
|
97
98
|
|
|
98
99
|
return true;
|
|
99
100
|
}
|
|
@@ -102,6 +103,15 @@ bool Load_instance(string &items_file, double thresh) {
|
|
|
102
103
|
// preprocessing pass
|
|
103
104
|
// ---------------------------------------------------------------------
|
|
104
105
|
bool Preprocess(string &inst, double thresh) {
|
|
106
|
+
N = 0;
|
|
107
|
+
L = 0;
|
|
108
|
+
freq.clear();
|
|
109
|
+
item_dic.clear();
|
|
110
|
+
item_map.clear();
|
|
111
|
+
item_map_rev.clear();
|
|
112
|
+
// (E is usually for entries during Build_MDD, so we can leave it
|
|
113
|
+
// for the load phase; it’s already reset in the binding)
|
|
114
|
+
|
|
105
115
|
ifstream file(inst);
|
|
106
116
|
|
|
107
117
|
if (file.good()) {
|
|
@@ -147,8 +157,8 @@ bool Preprocess(string &inst, double thresh) {
|
|
|
147
157
|
if (freq[i] >= theta)
|
|
148
158
|
item_dic[i] = ++real_L;
|
|
149
159
|
}
|
|
150
|
-
|
|
151
|
-
|
|
160
|
+
if (b_disp)
|
|
161
|
+
cout << "Original number of items: " << L
|
|
152
162
|
<< " Reduced to: " << real_L << endl;
|
|
153
163
|
|
|
154
164
|
L = real_L;
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
#include <iostream>
|
|
2
|
+
#include <string.h>
|
|
3
|
+
#include "load_inst.hpp"
|
|
4
|
+
#include "freq_miner.hpp"
|
|
5
|
+
#include "utility.hpp"
|
|
6
|
+
#include "build_mdd.hpp"
|
|
7
|
+
|
|
8
|
+
namespace btminer {
|
|
9
|
+
// everything is already declared
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
int main(int argc, char* argv[]) {
|
|
13
|
+
using namespace btminer;
|
|
14
|
+
|
|
15
|
+
std::string VV, attr;
|
|
16
|
+
|
|
17
|
+
double thresh = 0;
|
|
18
|
+
for (int i = 1; i<argc; i++) {
|
|
19
|
+
if (argv[i][0] != '-' || isdigit(argv[i][1]))
|
|
20
|
+
continue;
|
|
21
|
+
else if (strcmp(argv[i], "-thr") == 0)
|
|
22
|
+
thresh = std::stod(argv[i + 1]);
|
|
23
|
+
else if (strcmp(argv[i], "-file") == 0)
|
|
24
|
+
VV = argv[i + 1];
|
|
25
|
+
else if (strcmp(argv[i], "-N_mult") == 0)
|
|
26
|
+
N_mult = std::stoi(argv[i + 1]);
|
|
27
|
+
else if (strcmp(argv[i], "-M_mult") == 0)
|
|
28
|
+
M_mult = std::stoi(argv[i + 1]);
|
|
29
|
+
else if (strcmp(argv[i], "-time") == 0)
|
|
30
|
+
time_limit = std::stoi(argv[i + 1]);
|
|
31
|
+
else if (strcmp(argv[i], "-jbuild") == 0)
|
|
32
|
+
just_build = 1;
|
|
33
|
+
else if (strcmp(argv[i], "-folder") == 0)
|
|
34
|
+
folder = argv[i + 1];
|
|
35
|
+
else if (strcmp(argv[i], "-npre") == 0)
|
|
36
|
+
pre_pro = 0;
|
|
37
|
+
else if (strcmp(argv[i], "-dic") == 0)
|
|
38
|
+
use_dic = 1;
|
|
39
|
+
else if (strcmp(argv[i], "-out") == 0) {
|
|
40
|
+
if (i + 1 == argc || argv[i + 1][0] == '-')
|
|
41
|
+
b_disp = 1;
|
|
42
|
+
else if (argv[i + 1][0] == '+') {
|
|
43
|
+
b_disp = 1;
|
|
44
|
+
b_write = 1;
|
|
45
|
+
if (strlen(argv[i + 1]) > 1) {
|
|
46
|
+
out_file = argv[i + 1];
|
|
47
|
+
out_file = out_file.substr(1, out_file.size() - 1);
|
|
48
|
+
}
|
|
49
|
+
else
|
|
50
|
+
out_file = VV;
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
b_write = 1;
|
|
54
|
+
out_file = argv[i + 1];
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
else
|
|
58
|
+
std::cout << "Command " << argv[i] << " not recognized and skipped.\n";
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
std::cout << "\n********************** " << VV << " N_mult: " << N_mult << " M_mult: " << M_mult << "**********************\n";
|
|
62
|
+
|
|
63
|
+
std::string item_file = folder + VV + ".txt";
|
|
64
|
+
|
|
65
|
+
std::cout << "loading instances...\n";
|
|
66
|
+
|
|
67
|
+
start_time = clock();
|
|
68
|
+
|
|
69
|
+
if (!Load_instance(item_file, thresh)) {
|
|
70
|
+
std::cout << "Files invalid, exiting.\n";
|
|
71
|
+
return 0;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (!just_build && give_time(clock() - start_time) < time_limit) {
|
|
75
|
+
Freq_miner();
|
|
76
|
+
if (give_time(clock() - start_time) >= time_limit)
|
|
77
|
+
std::cout << "TIME LIMIT REACHED\n";
|
|
78
|
+
std::cout << "Mining Complete\n\nFound a total of " << num_patt << " patterns\n";
|
|
79
|
+
std::cout << "\nTotal CPU time " << give_time(clock() - start_time) << " seconds\n\n";
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return 0;
|
|
83
|
+
}
|
effspm/htminer/src/build_mdd.cpp
CHANGED
|
@@ -7,43 +7,32 @@
|
|
|
7
7
|
|
|
8
8
|
namespace htminer {
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
int Add_arc(int item, unsigned int last_arc, int& itmset, std::vector<unsigned int>& ancest_map);
|
|
12
|
-
void Add_vec(std::vector<int>& items_lim, std::vector<unsigned int>& ancest_map, unsigned int last_arc, int itmset);
|
|
10
|
+
using std::vector;
|
|
13
11
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
std::vector<VArc> VTree;
|
|
17
|
-
std::vector<CArc> CTree;
|
|
12
|
+
int Add_arc(int item, unsigned int last_arc, int& itmset, vector<unsigned int>& ancest_map);
|
|
13
|
+
void Add_vec(vector<int>& items_lim, vector<unsigned int>& ancest_map, unsigned int last_arc, int itmset);
|
|
18
14
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
15
|
+
vector<Arc> Tree;
|
|
16
|
+
vector<VArc> VTree;
|
|
17
|
+
vector<CArc> CTree;
|
|
22
18
|
|
|
23
|
-
|
|
24
|
-
int itmset = 0;
|
|
25
|
-
|
|
26
|
-
// 1) normal items
|
|
27
|
-
for (size_t idx = 0; idx < items.size(); ++idx) {
|
|
28
|
-
int curr_item = items[idx];
|
|
19
|
+
void Build_MDD(vector<int>& items, vector<int>& items_lim) {
|
|
29
20
|
|
|
30
|
-
|
|
21
|
+
vector<unsigned int> ancest_map(L, 0);
|
|
31
22
|
|
|
32
|
-
|
|
33
|
-
|
|
23
|
+
unsigned int last_arc = 0;
|
|
24
|
+
int itmset = 0;
|
|
25
|
+
for (vector<int>::iterator it = items.begin(); it != items.end(); ++it)
|
|
26
|
+
last_arc = Add_arc(*it, last_arc, itmset, ancest_map);
|
|
34
27
|
|
|
35
|
-
|
|
36
|
-
if (!items_lim.empty()) {
|
|
28
|
+
if (!items_lim.empty())
|
|
37
29
|
Add_vec(items_lim, ancest_map, last_arc, itmset);
|
|
38
|
-
}
|
|
39
30
|
}
|
|
40
31
|
|
|
41
|
-
int Add_arc(int item,
|
|
42
|
-
|
|
43
|
-
int& itmset,
|
|
44
|
-
std::vector<unsigned int>& ancest_map)
|
|
45
|
-
{
|
|
32
|
+
int Add_arc(int item, unsigned int last_arc, int& itmset, vector<unsigned int>& ancest_map) {
|
|
33
|
+
|
|
46
34
|
unsigned int anct = ancest_map[std::abs(item) - 1];
|
|
35
|
+
|
|
47
36
|
if (item < 0)
|
|
48
37
|
++itmset;
|
|
49
38
|
|
|
@@ -51,9 +40,8 @@ int Add_arc(int item,
|
|
|
51
40
|
|
|
52
41
|
if (last_sibl == 0) {
|
|
53
42
|
Tree.emplace_back(item, itmset, anct);
|
|
54
|
-
last_sibl =
|
|
43
|
+
last_sibl = (unsigned int)Tree.size() - 1;
|
|
55
44
|
Tree[last_arc].chld = last_sibl;
|
|
56
|
-
|
|
57
45
|
if (anct == 0)
|
|
58
46
|
DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
|
|
59
47
|
}
|
|
@@ -61,8 +49,8 @@ int Add_arc(int item,
|
|
|
61
49
|
while (Tree[last_sibl].item != item) {
|
|
62
50
|
if (Tree[last_sibl].sibl == 0) {
|
|
63
51
|
Tree.emplace_back(item, itmset, anct);
|
|
64
|
-
Tree[last_sibl].sibl =
|
|
65
|
-
last_sibl
|
|
52
|
+
Tree[last_sibl].sibl = (unsigned int)Tree.size() - 1;
|
|
53
|
+
last_sibl = (unsigned int)Tree.size() - 1;
|
|
66
54
|
if (anct == 0)
|
|
67
55
|
DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
|
|
68
56
|
break;
|
|
@@ -78,61 +66,48 @@ int Add_arc(int item,
|
|
|
78
66
|
|
|
79
67
|
ancest_map[std::abs(item) - 1] = last_sibl;
|
|
80
68
|
|
|
81
|
-
return
|
|
69
|
+
return (int)last_sibl;
|
|
82
70
|
}
|
|
83
71
|
|
|
84
|
-
void Add_vec(
|
|
85
|
-
std::vector<unsigned int>& ancest,
|
|
86
|
-
unsigned int last_arc,
|
|
87
|
-
int itmset)
|
|
88
|
-
{
|
|
89
|
-
items_lim.shrink_to_fit();
|
|
72
|
+
void Add_vec(vector<int>& items_lim, vector<unsigned int>& ancest, unsigned int last_arc, int itmset) {
|
|
90
73
|
|
|
91
|
-
|
|
74
|
+
items_lim.shrink_to_fit();
|
|
75
|
+
vector<bool> counted(L, 0);
|
|
92
76
|
|
|
93
77
|
if (Tree[last_arc].itmset > 0) {
|
|
94
|
-
ancest.push_back(0);
|
|
78
|
+
ancest.push_back(0); // last element of ancest is CArc child
|
|
95
79
|
ancest.shrink_to_fit();
|
|
96
|
-
|
|
97
|
-
for (size_t i = 0; i < items_lim.size(); ++i) {
|
|
80
|
+
for (int i = 0; i < (int)items_lim.size(); ++i) {
|
|
98
81
|
int cur_itm = std::abs(items_lim[i]);
|
|
99
|
-
|
|
100
|
-
++E; // ✅ count this limited-entry too
|
|
101
|
-
|
|
102
82
|
if (ancest[cur_itm - 1] == 0 && !counted[cur_itm - 1]) {
|
|
103
|
-
if (i + 1 <
|
|
104
|
-
VDFS[cur_itm - 1].str_pnt.push_back(-
|
|
105
|
-
VDFS[cur_itm - 1].seq_ID.push_back(
|
|
83
|
+
if (i + 1 < (int)items_lim.size()) {
|
|
84
|
+
VDFS[cur_itm - 1].str_pnt.push_back(-i - 1); // CTree positions: negative pointers
|
|
85
|
+
VDFS[cur_itm - 1].seq_ID.push_back((unsigned int)CTree.size());
|
|
106
86
|
}
|
|
107
87
|
++DFS[cur_itm - 1].freq;
|
|
108
|
-
counted[cur_itm - 1] =
|
|
88
|
+
counted[cur_itm - 1] = 1;
|
|
109
89
|
}
|
|
110
90
|
}
|
|
111
|
-
|
|
112
91
|
CTree.emplace_back(ancest, items_lim);
|
|
113
|
-
Tree[last_arc].chld
|
|
114
|
-
Tree[last_arc].itmset = -itmset;
|
|
92
|
+
Tree[last_arc].chld = (unsigned int)CTree.size() - 1;
|
|
93
|
+
Tree[last_arc].itmset = -itmset; // Tree→CTree edge is marked by negative itmset
|
|
115
94
|
}
|
|
116
95
|
else {
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
for (size_t i = 0; i < items_lim.size(); ++i) {
|
|
96
|
+
vector<unsigned int>& ancest_ref = CTree[Tree[last_arc].chld].ancest;
|
|
97
|
+
for (int i = 0; i < (int)items_lim.size(); ++i) {
|
|
120
98
|
int cur_itm = std::abs(items_lim[i]);
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
if (i + 1 < static_cast<int>(items_lim.size())) {
|
|
126
|
-
VDFS[cur_itm - 1].str_pnt.push_back(static_cast<unsigned int>(i) + 1);
|
|
127
|
-
VDFS[cur_itm - 1].seq_ID.push_back(static_cast<unsigned int>(VTree.size()));
|
|
99
|
+
if (!counted[cur_itm - 1] && ancest_ref[cur_itm - 1] == 0) {
|
|
100
|
+
if (i + 1 < (int)items_lim.size()) {
|
|
101
|
+
VDFS[cur_itm - 1].str_pnt.push_back(i + 1);
|
|
102
|
+
VDFS[cur_itm - 1].seq_ID.push_back((unsigned int)VTree.size());
|
|
128
103
|
}
|
|
129
104
|
++DFS[cur_itm - 1].freq;
|
|
130
|
-
counted[cur_itm - 1] =
|
|
105
|
+
counted[cur_itm - 1] = 1;
|
|
131
106
|
}
|
|
132
107
|
}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
CTree
|
|
108
|
+
VTree.emplace_back(items_lim, CTree[Tree[last_arc].chld].ancest.back());
|
|
109
|
+
CTree[Tree[last_arc].chld].ancest.back() = (unsigned int)VTree.size();
|
|
110
|
+
// VTree siblings and CTree children are +1 of actual index to mark end
|
|
136
111
|
}
|
|
137
112
|
}
|
|
138
113
|
|
effspm/htminer/src/build_mdd.hpp
CHANGED
|
@@ -5,60 +5,67 @@
|
|
|
5
5
|
#include "load_inst.hpp"
|
|
6
6
|
|
|
7
7
|
namespace htminer {
|
|
8
|
-
void Build_MDD(std::vector<int>& items, std::vector<int>& items_lim);
|
|
9
8
|
|
|
10
|
-
|
|
11
|
-
public:
|
|
12
|
-
unsigned int chld;
|
|
13
|
-
unsigned int sibl;
|
|
14
|
-
unsigned int freq;
|
|
15
|
-
unsigned int anct;
|
|
16
|
-
int itmset;
|
|
17
|
-
int item;
|
|
9
|
+
using std::vector;
|
|
18
10
|
|
|
19
|
-
|
|
20
|
-
chld = 0;
|
|
21
|
-
sibl = 0;
|
|
22
|
-
freq = 0;
|
|
23
|
-
itmset = _itmset;
|
|
24
|
-
anct = _anc;
|
|
25
|
-
item = _itm;
|
|
26
|
-
}
|
|
11
|
+
void Build_MDD(vector<int>& items, vector<int>& items_lim);
|
|
27
12
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
13
|
+
class Arc {
|
|
14
|
+
public:
|
|
15
|
+
unsigned int chld;
|
|
16
|
+
unsigned int sibl;
|
|
17
|
+
unsigned int freq;
|
|
18
|
+
unsigned int anct;
|
|
19
|
+
int itmset;
|
|
20
|
+
int item;
|
|
34
21
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
22
|
+
Arc(unsigned int _itm, int _itmset, unsigned int _anc) {
|
|
23
|
+
chld = 0;
|
|
24
|
+
sibl = 0;
|
|
25
|
+
freq = 0;
|
|
26
|
+
itmset = _itmset;
|
|
27
|
+
anct = _anc;
|
|
28
|
+
item = _itm;
|
|
29
|
+
}
|
|
39
30
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
31
|
+
Arc() {
|
|
32
|
+
chld = 0;
|
|
33
|
+
sibl = 0;
|
|
34
|
+
freq = 0;
|
|
35
|
+
itmset = 0;
|
|
36
|
+
anct = 0;
|
|
37
|
+
item = 0;
|
|
38
|
+
}
|
|
39
|
+
};
|
|
49
40
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
41
|
+
class VArc {
|
|
42
|
+
public:
|
|
43
|
+
unsigned int sibl;
|
|
44
|
+
vector<int> seq;
|
|
54
45
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
};
|
|
46
|
+
VArc(vector<int>& items, unsigned int _sib) {
|
|
47
|
+
sibl = _sib;
|
|
48
|
+
seq.swap(items);
|
|
49
|
+
}
|
|
60
50
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
}
|
|
51
|
+
VArc() {
|
|
52
|
+
sibl = 0;
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
class CArc {
|
|
57
|
+
public:
|
|
58
|
+
vector<int> seq;
|
|
59
|
+
vector<unsigned int> ancest;
|
|
60
|
+
|
|
61
|
+
CArc(vector<unsigned int>& _anc, vector<int>& items) {
|
|
62
|
+
ancest.swap(_anc);
|
|
63
|
+
seq.swap(items);
|
|
64
|
+
}
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
extern vector<Arc> Tree;
|
|
68
|
+
extern vector<VArc> VTree;
|
|
69
|
+
extern vector<CArc> CTree;
|
|
70
|
+
|
|
71
|
+
} // namespace htminer
|