effspm 0.2.7__cp39-cp39-win_amd64.whl → 0.3.3__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/_effspm.cp39-win_amd64.pyd +0 -0
- effspm/_effspm.cpp +961 -210
- effspm/btminer/src/build_mdd.cpp +42 -17
- effspm/btminer/src/build_mdd.hpp +13 -19
- effspm/btminer/src/freq_miner.cpp +134 -49
- effspm/btminer/src/freq_miner.hpp +16 -0
- effspm/btminer/src/load_inst.cpp +211 -126
- effspm/btminer/src/load_inst.hpp +22 -4
- effspm/btminer/src/main.cpp +83 -0
- effspm/btminer/src/utility.cpp +26 -41
- effspm/btminer/src/utility.hpp +6 -30
- effspm/freq_miner.hpp +2 -1
- effspm/htminer/src/build_mdd.cpp +46 -124
- effspm/htminer/src/build_mdd.hpp +56 -49
- effspm/htminer/src/freq_miner.cpp +341 -307
- effspm/htminer/src/freq_miner.hpp +39 -40
- effspm/htminer/src/load_inst.cpp +287 -336
- effspm/htminer/src/load_inst.hpp +23 -6
- effspm/htminer/src/main.cpp +97 -0
- effspm/htminer/src/utility.cpp +38 -57
- effspm/htminer/src/utility.hpp +9 -64
- effspm/largebm/src/build_mdd.cpp +69 -110
- effspm/largebm/src/build_mdd.hpp +22 -37
- effspm/largebm/src/freq_miner.cpp +241 -291
- effspm/largebm/src/freq_miner.hpp +25 -36
- effspm/largebm/src/load_inst.cpp +20 -26
- effspm/largebm/src/load_inst.hpp +24 -34
- effspm/largebm/src/main.cpp +95 -0
- effspm/largebm/src/utility.cpp +11 -21
- effspm/largebm/src/utility.hpp +7 -10
- effspm/largehm/src/build_mdd.cpp +75 -110
- effspm/largehm/src/build_mdd.hpp +53 -73
- effspm/largehm/src/freq_miner.cpp +134 -191
- effspm/largehm/src/freq_miner.hpp +37 -60
- effspm/largehm/src/load_inst.cpp +137 -174
- effspm/largehm/src/load_inst.hpp +13 -50
- effspm/largehm/src/main.cpp +95 -0
- effspm/largehm/src/utility.cpp +46 -28
- effspm/largehm/src/utility.hpp +18 -16
- effspm/largepp/src/freq_miner.cpp +184 -156
- effspm/largepp/src/freq_miner.hpp +11 -36
- effspm/largepp/src/load_inst.cpp +32 -12
- effspm/largepp/src/load_inst.hpp +15 -9
- effspm/largepp/src/main.cpp +108 -0
- effspm/largepp/src/pattern.hpp +31 -0
- effspm/load_inst.cpp +8 -8
- effspm/load_inst.hpp +1 -1
- effspm/main.cpp +103 -0
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
- effspm-0.3.3.dist-info/RECORD +60 -0
- effspm-0.2.7.dist-info/RECORD +0 -53
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
effspm/btminer/src/load_inst.cpp
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
// effspm/btminer/src/load_inst.cpp
|
|
2
2
|
#include <iostream>
|
|
3
|
-
#include <sstream>
|
|
4
3
|
#include <fstream>
|
|
5
|
-
#include <
|
|
6
|
-
#include <ctime>
|
|
7
|
-
#include <map>
|
|
8
|
-
#include <vector>
|
|
4
|
+
#include <sstream>
|
|
9
5
|
#include <algorithm>
|
|
6
|
+
#include <math.h>
|
|
7
|
+
#include <time.h>
|
|
8
|
+
|
|
10
9
|
#include "load_inst.hpp"
|
|
11
10
|
#include "utility.hpp"
|
|
12
11
|
#include "build_mdd.hpp"
|
|
@@ -16,184 +15,270 @@ namespace btminer {
|
|
|
16
15
|
|
|
17
16
|
using namespace std;
|
|
18
17
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
18
|
+
// ---------------------------------------------------------------------
|
|
19
|
+
// global definitions (must match load_inst.hpp)
|
|
20
|
+
// ---------------------------------------------------------------------
|
|
21
|
+
int M = 0;
|
|
22
|
+
int N = 0;
|
|
23
|
+
int L = 0;
|
|
24
|
+
unsigned long long E = 0ULL; // matches header: extern unsigned long long E;
|
|
25
|
+
int num_nodes = 0;
|
|
26
|
+
int theta = 0;
|
|
27
|
+
int cur_node = 0;
|
|
28
|
+
|
|
29
|
+
map<string, int> item_map;
|
|
30
|
+
map<int, string> item_map_rev;
|
|
31
|
+
|
|
32
|
+
std::vector<int> freq;
|
|
33
|
+
std::vector<int> item_dic;
|
|
34
|
+
std::vector<std::vector<int>> items;
|
|
35
|
+
// ✅ REAL DEFINITION lives here:
|
|
36
|
+
std::vector<Pattern> DFS;
|
|
37
|
+
|
|
38
|
+
string out_file, folder;
|
|
39
|
+
bool b_disp = 0;
|
|
40
|
+
bool b_write = 0;
|
|
41
|
+
bool use_dic = 0;
|
|
42
|
+
bool just_build= 0;
|
|
43
|
+
bool pre_pro = 1;
|
|
44
|
+
|
|
45
|
+
int N_mult = 1;
|
|
46
|
+
int M_mult = 1;
|
|
47
|
+
int time_limit= 30 * 3600; // 30 hours, same as professor
|
|
48
|
+
|
|
49
|
+
clock_t start_time;
|
|
50
|
+
|
|
51
|
+
// ---------------------------------------------------------------------
|
|
52
|
+
// forward decls
|
|
53
|
+
// ---------------------------------------------------------------------
|
|
54
|
+
void Load_items_pre(string &inst_name);
|
|
55
|
+
bool Load_items(string &inst_name);
|
|
56
|
+
bool Preprocess(string &inst, double thresh);
|
|
57
|
+
|
|
58
|
+
// ---------------------------------------------------------------------
|
|
59
|
+
// main loader
|
|
60
|
+
// ---------------------------------------------------------------------
|
|
61
|
+
bool Load_instance(string &items_file, double thresh) {
|
|
35
62
|
clock_t kk = clock();
|
|
63
|
+
|
|
64
|
+
// root node for MDD
|
|
36
65
|
Tree.emplace_back(0, 0, 0);
|
|
37
66
|
|
|
38
67
|
if (pre_pro) {
|
|
39
68
|
if (!Preprocess(items_file, thresh))
|
|
40
69
|
return false;
|
|
70
|
+
if (b_disp)
|
|
71
|
+
cout << "\nPreprocess done in " << give_time(clock() - kk) << " seconds\n\n";
|
|
41
72
|
|
|
42
|
-
|
|
43
|
-
|
|
73
|
+
// build empty DFS of size L
|
|
74
|
+
DFS.clear();
|
|
44
75
|
DFS.reserve(L);
|
|
45
76
|
for (int i = 0; i < L; ++i)
|
|
46
77
|
DFS.emplace_back(-i - 1);
|
|
47
78
|
|
|
48
79
|
kk = clock();
|
|
49
80
|
Load_items_pre(items_file);
|
|
50
|
-
}
|
|
81
|
+
}
|
|
82
|
+
else if (!Load_items(items_file)) {
|
|
51
83
|
return false;
|
|
84
|
+
}
|
|
52
85
|
else {
|
|
53
|
-
|
|
86
|
+
if (thresh < 1)
|
|
87
|
+
theta = static_cast<int>(ceil(thresh * N * N_mult));
|
|
88
|
+
else
|
|
89
|
+
theta = static_cast<int>(thresh);
|
|
54
90
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
91
|
+
if (b_disp)
|
|
92
|
+
cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
|
|
93
|
+
if (b_disp)
|
|
94
|
+
cout << "Found " << N * N_mult
|
|
95
|
+
<< " sequence, with max line len " << M
|
|
96
|
+
<< ", and " << L << " items, and " << E << " enteries\n";
|
|
97
|
+
//cout << "Total MDD nodes: " << Tree.size() << endl;
|
|
59
98
|
|
|
60
99
|
return true;
|
|
61
100
|
}
|
|
62
101
|
|
|
63
|
-
|
|
102
|
+
// ---------------------------------------------------------------------
|
|
103
|
+
// preprocessing pass
|
|
104
|
+
// ---------------------------------------------------------------------
|
|
105
|
+
bool Preprocess(string &inst, double thresh) {
|
|
106
|
+
N = 0;
|
|
107
|
+
L = 0;
|
|
108
|
+
freq.clear();
|
|
109
|
+
item_dic.clear();
|
|
110
|
+
item_map.clear();
|
|
111
|
+
item_map_rev.clear();
|
|
112
|
+
// (E is usually for entries during Build_MDD, so we can leave it
|
|
113
|
+
// for the load phase; it’s already reset in the binding)
|
|
114
|
+
|
|
64
115
|
ifstream file(inst);
|
|
65
|
-
if (!file.good()) {
|
|
66
|
-
cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
|
|
67
|
-
return false;
|
|
68
|
-
}
|
|
69
116
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
117
|
+
if (file.good()) {
|
|
118
|
+
string line;
|
|
119
|
+
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
120
|
+
++N;
|
|
121
|
+
vector<bool> counted(L, false);
|
|
122
|
+
|
|
123
|
+
istringstream word(line);
|
|
124
|
+
string itm;
|
|
125
|
+
while (word >> itm) {
|
|
126
|
+
int ditem = stoi(itm);
|
|
127
|
+
if (L < abs(ditem))
|
|
128
|
+
L = abs(ditem);
|
|
129
|
+
|
|
130
|
+
// extend freq / counted if L grew
|
|
131
|
+
while (static_cast<int>(freq.size()) < L) {
|
|
132
|
+
freq.push_back(0);
|
|
133
|
+
counted.push_back(false);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
int idx = abs(ditem) - 1;
|
|
137
|
+
if (!counted[idx]) {
|
|
138
|
+
++freq[idx];
|
|
139
|
+
counted[idx] = true;
|
|
140
|
+
}
|
|
87
141
|
}
|
|
88
142
|
}
|
|
143
|
+
} else {
|
|
144
|
+
cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
|
|
145
|
+
return false;
|
|
89
146
|
}
|
|
90
147
|
|
|
91
|
-
|
|
148
|
+
if (thresh < 1)
|
|
149
|
+
theta = static_cast<int>(ceil(thresh * N * N_mult));
|
|
150
|
+
else
|
|
151
|
+
theta = static_cast<int>(thresh);
|
|
92
152
|
|
|
153
|
+
// build item_dic with only frequent items
|
|
93
154
|
int real_L = 0;
|
|
94
155
|
item_dic = vector<int>(L, -1);
|
|
95
156
|
for (int i = 0; i < L; ++i) {
|
|
96
157
|
if (freq[i] >= theta)
|
|
97
158
|
item_dic[i] = ++real_L;
|
|
98
159
|
}
|
|
160
|
+
if (b_disp)
|
|
161
|
+
cout << "Original number of items: " << L
|
|
162
|
+
<< " Reduced to: " << real_L << endl;
|
|
99
163
|
|
|
100
|
-
cout << "Original number of items: " << L << " Reduced to: " << real_L << endl;
|
|
101
164
|
L = real_L;
|
|
102
|
-
N = 0;
|
|
165
|
+
N = 0; // will be recounted in Load_items_pre
|
|
166
|
+
|
|
103
167
|
return true;
|
|
104
168
|
}
|
|
105
169
|
|
|
106
|
-
|
|
170
|
+
// ---------------------------------------------------------------------
|
|
171
|
+
// load after preprocessing
|
|
172
|
+
// ---------------------------------------------------------------------
|
|
173
|
+
void Load_items_pre(string &inst_name) {
|
|
107
174
|
ifstream file(inst_name);
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
175
|
+
|
|
176
|
+
if (file.good()) {
|
|
177
|
+
string line;
|
|
178
|
+
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
179
|
+
istringstream word(line);
|
|
180
|
+
string itm;
|
|
181
|
+
vector<int> temp_vec;
|
|
182
|
+
bool sgn = false;
|
|
183
|
+
while (word >> itm) {
|
|
184
|
+
int ditem;
|
|
185
|
+
if (use_dic) {
|
|
186
|
+
auto it = item_map.find(itm);
|
|
187
|
+
if (it == item_map.end()) {
|
|
188
|
+
item_map[itm] = ++L;
|
|
189
|
+
item_map_rev[L] = itm;
|
|
190
|
+
ditem = L;
|
|
191
|
+
} else {
|
|
192
|
+
ditem = it->second;
|
|
193
|
+
}
|
|
124
194
|
} else {
|
|
125
|
-
ditem =
|
|
195
|
+
ditem = stoi(itm);
|
|
126
196
|
}
|
|
127
|
-
} else {
|
|
128
|
-
ditem = stoi(itm);
|
|
129
|
-
}
|
|
130
197
|
|
|
131
|
-
|
|
132
|
-
if (
|
|
133
|
-
sgn
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
198
|
+
// drop infrequent items
|
|
199
|
+
if (freq[abs(ditem) - 1] < theta) {
|
|
200
|
+
if (!sgn)
|
|
201
|
+
sgn = (ditem < 0);
|
|
202
|
+
continue;
|
|
203
|
+
} else {
|
|
204
|
+
if (ditem > 0)
|
|
205
|
+
ditem = item_dic[ditem - 1];
|
|
206
|
+
else
|
|
207
|
+
ditem = -item_dic[-ditem - 1];
|
|
208
|
+
}
|
|
138
209
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
210
|
+
if (sgn) {
|
|
211
|
+
if (ditem > 0)
|
|
212
|
+
ditem = -ditem;
|
|
213
|
+
sgn = false;
|
|
214
|
+
}
|
|
142
215
|
|
|
143
|
-
|
|
144
|
-
|
|
216
|
+
temp_vec.push_back(ditem);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
if (temp_vec.empty())
|
|
220
|
+
continue;
|
|
145
221
|
|
|
146
|
-
|
|
222
|
+
++N;
|
|
147
223
|
|
|
148
|
-
|
|
149
|
-
|
|
224
|
+
if (static_cast<int>(temp_vec.size()) > M)
|
|
225
|
+
M = static_cast<int>(temp_vec.size());
|
|
150
226
|
|
|
151
|
-
|
|
152
|
-
|
|
227
|
+
// this increments E inside Build_MDD
|
|
228
|
+
Build_MDD(temp_vec);
|
|
229
|
+
}
|
|
153
230
|
}
|
|
154
231
|
}
|
|
155
232
|
|
|
156
|
-
|
|
233
|
+
// ---------------------------------------------------------------------
|
|
234
|
+
// load without preprocessing
|
|
235
|
+
// ---------------------------------------------------------------------
|
|
236
|
+
bool Load_items(string &inst_name) {
|
|
157
237
|
ifstream file(inst_name);
|
|
158
|
-
if (!file.good()) {
|
|
159
|
-
cout << "!!!!!! No such file exists: " << inst_name << " !!!!!!\n";
|
|
160
|
-
return false;
|
|
161
|
-
}
|
|
162
238
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
item_map
|
|
175
|
-
|
|
176
|
-
|
|
239
|
+
if (file.good()) {
|
|
240
|
+
string line;
|
|
241
|
+
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
242
|
+
++N;
|
|
243
|
+
istringstream word(line);
|
|
244
|
+
string itm;
|
|
245
|
+
vector<int> temp_vec;
|
|
246
|
+
while (word >> itm) {
|
|
247
|
+
int ditem;
|
|
248
|
+
if (use_dic) {
|
|
249
|
+
auto it = item_map.find(itm);
|
|
250
|
+
if (it == item_map.end()) {
|
|
251
|
+
item_map[itm] = ++L;
|
|
252
|
+
item_map_rev[L] = itm;
|
|
253
|
+
ditem = L;
|
|
254
|
+
} else {
|
|
255
|
+
ditem = it->second;
|
|
256
|
+
}
|
|
177
257
|
} else {
|
|
178
|
-
ditem =
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
DFS.emplace_back(-DFS.size() - 1);
|
|
258
|
+
ditem = stoi(itm);
|
|
259
|
+
if (L < abs(ditem)) {
|
|
260
|
+
L = abs(ditem);
|
|
261
|
+
// make sure DFS is large enough (unless just_build)
|
|
262
|
+
while (static_cast<int>(DFS.size()) < L && !just_build) {
|
|
263
|
+
DFS.reserve(L);
|
|
264
|
+
DFS.emplace_back(-((int)DFS.size()) - 1);
|
|
265
|
+
}
|
|
187
266
|
}
|
|
188
267
|
}
|
|
268
|
+
|
|
269
|
+
temp_vec.push_back(ditem);
|
|
189
270
|
}
|
|
190
|
-
temp_vec.push_back(ditem);
|
|
191
|
-
}
|
|
192
271
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
272
|
+
if (static_cast<int>(temp_vec.size()) > M)
|
|
273
|
+
M = static_cast<int>(temp_vec.size());
|
|
274
|
+
|
|
275
|
+
Build_MDD(temp_vec);
|
|
276
|
+
}
|
|
277
|
+
} else {
|
|
278
|
+
cout << "!!!!!! No such file exists: " << inst_name << " !!!!!!\n";
|
|
279
|
+
return false;
|
|
196
280
|
}
|
|
281
|
+
|
|
197
282
|
return true;
|
|
198
283
|
}
|
|
199
284
|
|
effspm/btminer/src/load_inst.hpp
CHANGED
|
@@ -10,16 +10,34 @@
|
|
|
10
10
|
|
|
11
11
|
namespace btminer {
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
using std::string;
|
|
14
|
+
using std::vector;
|
|
15
|
+
using std::map;
|
|
16
|
+
using std::unordered_map;
|
|
17
|
+
using std::unordered_set;
|
|
14
18
|
|
|
15
|
-
|
|
19
|
+
bool Load_instance(string& items_file, double thresh);
|
|
20
|
+
|
|
21
|
+
extern string out_file, folder;
|
|
16
22
|
|
|
17
23
|
extern bool b_disp, b_write, use_dic, just_build, pre_pro;
|
|
18
24
|
|
|
19
|
-
extern int
|
|
25
|
+
extern int N, M, L, theta, num_nodes, M_mult, N_mult, time_limit, cur_node;
|
|
26
|
+
extern unsigned long long E; // total number of entries (we need this for _effspm.cpp)
|
|
27
|
+
|
|
28
|
+
extern std::clock_t start_time;
|
|
29
|
+
|
|
30
|
+
// these 2 are for dictionary mode
|
|
31
|
+
extern map<string,int> item_map;
|
|
32
|
+
extern map<int,string> item_map_rev;
|
|
20
33
|
|
|
21
|
-
extern
|
|
34
|
+
extern vector<int> freq;
|
|
35
|
+
extern vector<int> item_dic;
|
|
22
36
|
|
|
37
|
+
// expose items so _effspm.cpp can fall back to seeding (it expects btminer::items)
|
|
38
|
+
extern vector<vector<int>> items;
|
|
23
39
|
|
|
40
|
+
class Pattern;
|
|
41
|
+
extern vector<Pattern> DFS;
|
|
24
42
|
|
|
25
43
|
} // namespace btminer
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
#include <iostream>
|
|
2
|
+
#include <string.h>
|
|
3
|
+
#include "load_inst.hpp"
|
|
4
|
+
#include "freq_miner.hpp"
|
|
5
|
+
#include "utility.hpp"
|
|
6
|
+
#include "build_mdd.hpp"
|
|
7
|
+
|
|
8
|
+
namespace btminer {
|
|
9
|
+
// everything is already declared
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
int main(int argc, char* argv[]) {
|
|
13
|
+
using namespace btminer;
|
|
14
|
+
|
|
15
|
+
std::string VV, attr;
|
|
16
|
+
|
|
17
|
+
double thresh = 0;
|
|
18
|
+
for (int i = 1; i<argc; i++) {
|
|
19
|
+
if (argv[i][0] != '-' || isdigit(argv[i][1]))
|
|
20
|
+
continue;
|
|
21
|
+
else if (strcmp(argv[i], "-thr") == 0)
|
|
22
|
+
thresh = std::stod(argv[i + 1]);
|
|
23
|
+
else if (strcmp(argv[i], "-file") == 0)
|
|
24
|
+
VV = argv[i + 1];
|
|
25
|
+
else if (strcmp(argv[i], "-N_mult") == 0)
|
|
26
|
+
N_mult = std::stoi(argv[i + 1]);
|
|
27
|
+
else if (strcmp(argv[i], "-M_mult") == 0)
|
|
28
|
+
M_mult = std::stoi(argv[i + 1]);
|
|
29
|
+
else if (strcmp(argv[i], "-time") == 0)
|
|
30
|
+
time_limit = std::stoi(argv[i + 1]);
|
|
31
|
+
else if (strcmp(argv[i], "-jbuild") == 0)
|
|
32
|
+
just_build = 1;
|
|
33
|
+
else if (strcmp(argv[i], "-folder") == 0)
|
|
34
|
+
folder = argv[i + 1];
|
|
35
|
+
else if (strcmp(argv[i], "-npre") == 0)
|
|
36
|
+
pre_pro = 0;
|
|
37
|
+
else if (strcmp(argv[i], "-dic") == 0)
|
|
38
|
+
use_dic = 1;
|
|
39
|
+
else if (strcmp(argv[i], "-out") == 0) {
|
|
40
|
+
if (i + 1 == argc || argv[i + 1][0] == '-')
|
|
41
|
+
b_disp = 1;
|
|
42
|
+
else if (argv[i + 1][0] == '+') {
|
|
43
|
+
b_disp = 1;
|
|
44
|
+
b_write = 1;
|
|
45
|
+
if (strlen(argv[i + 1]) > 1) {
|
|
46
|
+
out_file = argv[i + 1];
|
|
47
|
+
out_file = out_file.substr(1, out_file.size() - 1);
|
|
48
|
+
}
|
|
49
|
+
else
|
|
50
|
+
out_file = VV;
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
b_write = 1;
|
|
54
|
+
out_file = argv[i + 1];
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
else
|
|
58
|
+
std::cout << "Command " << argv[i] << " not recognized and skipped.\n";
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
std::cout << "\n********************** " << VV << " N_mult: " << N_mult << " M_mult: " << M_mult << "**********************\n";
|
|
62
|
+
|
|
63
|
+
std::string item_file = folder + VV + ".txt";
|
|
64
|
+
|
|
65
|
+
std::cout << "loading instances...\n";
|
|
66
|
+
|
|
67
|
+
start_time = clock();
|
|
68
|
+
|
|
69
|
+
if (!Load_instance(item_file, thresh)) {
|
|
70
|
+
std::cout << "Files invalid, exiting.\n";
|
|
71
|
+
return 0;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (!just_build && give_time(clock() - start_time) < time_limit) {
|
|
75
|
+
Freq_miner();
|
|
76
|
+
if (give_time(clock() - start_time) >= time_limit)
|
|
77
|
+
std::cout << "TIME LIMIT REACHED\n";
|
|
78
|
+
std::cout << "Mining Complete\n\nFound a total of " << num_patt << " patterns\n";
|
|
79
|
+
std::cout << "\nTotal CPU time " << give_time(clock() - start_time) << " seconds\n\n";
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return 0;
|
|
83
|
+
}
|
effspm/btminer/src/utility.cpp
CHANGED
|
@@ -5,61 +5,46 @@
|
|
|
5
5
|
|
|
6
6
|
namespace btminer {
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
bool use_dic = false;
|
|
10
|
-
std::vector<std::vector<int>> items;
|
|
11
|
-
bool use_list = false;
|
|
12
|
-
bool just_build = false;
|
|
13
|
-
int E = 0, M = 0, N = 0, L = 0, theta = 0;
|
|
14
|
-
std::vector<Pattern> DFS;
|
|
15
|
-
clock_t start_time = 0;
|
|
16
|
-
bool b_disp = false, b_write = false;
|
|
17
|
-
std::string out_file;
|
|
18
|
-
|
|
19
|
-
bool pre_pro = true;
|
|
20
|
-
int N_mult = 1, M_mult = 1;
|
|
21
|
-
int time_limit = 30 * 3600;
|
|
22
|
-
|
|
23
|
-
// buffer of mined patterns returned to Python
|
|
24
|
-
std::vector<std::vector<int>> collected;
|
|
25
|
-
|
|
26
|
-
void ClearCollected() { collected.clear(); }
|
|
27
|
-
const std::vector<std::vector<int>>& GetCollected() { return collected; }
|
|
28
|
-
|
|
29
|
-
// ─── Utility functions ───────────────────────────────────────────
|
|
30
|
-
int find_ID(std::vector<int>& vec, int itm)
|
|
31
|
-
{
|
|
8
|
+
int find_ID(vector<int>& vec, int itm) {
|
|
32
9
|
int plc = 0;
|
|
33
|
-
while (plc < static_cast<int>(vec.size()) && vec[plc] != itm)
|
|
34
|
-
|
|
10
|
+
while (plc < static_cast<int>(vec.size()) && vec[plc] != itm)
|
|
11
|
+
++plc;
|
|
12
|
+
|
|
13
|
+
if (plc == static_cast<int>(vec.size()))
|
|
14
|
+
return -1;
|
|
15
|
+
else
|
|
16
|
+
return plc;
|
|
35
17
|
}
|
|
36
18
|
|
|
37
|
-
bool check_parent(int cur_arc, int str_pnt, int start,
|
|
38
|
-
|
|
39
|
-
{
|
|
19
|
+
bool check_parent(int cur_arc, int str_pnt, int start, std::vector<int>& strpnt_vec) {
|
|
20
|
+
|
|
40
21
|
std::vector<int> ancestors;
|
|
22
|
+
|
|
41
23
|
int cur_anct = Tree[cur_arc].anct;
|
|
42
24
|
|
|
43
25
|
while (Tree[cur_anct].itmset > Tree[str_pnt].itmset) {
|
|
44
|
-
if (Tree[cur_anct].item > 0)
|
|
26
|
+
if (Tree[cur_anct].item > 0)
|
|
27
|
+
ancestors.push_back(cur_anct);
|
|
45
28
|
cur_anct = Tree[cur_anct].anct;
|
|
46
29
|
}
|
|
47
|
-
if (Tree[cur_anct].itmset == Tree[str_pnt].itmset) return true;
|
|
48
30
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
31
|
+
if (Tree[cur_anct].itmset == Tree[str_pnt].itmset)
|
|
32
|
+
return true;
|
|
33
|
+
else {
|
|
34
|
+
for (auto it = ancestors.rbegin(); it != ancestors.rend(); ++it) {
|
|
35
|
+
for (int i = start; i < static_cast<int>(strpnt_vec.size()); ++i) {
|
|
36
|
+
if (strpnt_vec[i] == *it)
|
|
37
|
+
return true;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
52
41
|
|
|
53
42
|
return false;
|
|
54
43
|
}
|
|
55
44
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
if (vec[i] == pnt) return true;
|
|
60
|
-
return false;
|
|
45
|
+
float give_time(clock_t kk) {
|
|
46
|
+
float ll = static_cast<float>(kk) / CLOCKS_PER_SEC;
|
|
47
|
+
return ll;
|
|
61
48
|
}
|
|
62
49
|
|
|
63
|
-
double give_time(clock_t kk) { return double(kk) / CLOCKS_PER_SEC; }
|
|
64
|
-
|
|
65
50
|
} // namespace btminer
|
effspm/btminer/src/utility.hpp
CHANGED
|
@@ -1,40 +1,16 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include <vector>
|
|
4
|
-
#include <
|
|
4
|
+
#include <time.h>
|
|
5
5
|
#include <string>
|
|
6
6
|
#include "build_mdd.hpp"
|
|
7
|
-
#include "freq_miner.hpp"
|
|
8
|
-
#include "load_inst.hpp"
|
|
9
7
|
|
|
10
8
|
namespace btminer {
|
|
11
9
|
|
|
12
|
-
|
|
13
|
-
bool find_pnt(Arc* pnt, std::vector<Arc*>& vec, int pos);
|
|
14
|
-
int find_ID(std::vector<int>& vec, int itm);
|
|
15
|
-
double give_time(clock_t kk);
|
|
16
|
-
bool check_parent(int cur_arc, int str_pnt, int start, std::vector<int>& strpnt_vec);
|
|
10
|
+
using std::vector;
|
|
17
11
|
|
|
18
|
-
|
|
12
|
+
int find_ID(vector<int>& vec, int itm);
|
|
13
|
+
float give_time(clock_t kk);
|
|
14
|
+
bool check_parent(int cur_arc, int str_pnt, int start, vector<int>& strpnt_vec);
|
|
19
15
|
|
|
20
|
-
|
|
21
|
-
extern bool just_build;
|
|
22
|
-
extern int E, M, N, L, theta;
|
|
23
|
-
extern std::vector<Pattern> DFS;
|
|
24
|
-
extern clock_t start_time;
|
|
25
|
-
extern bool b_disp, b_write;
|
|
26
|
-
extern std::string out_file;
|
|
27
|
-
extern bool pre_pro;
|
|
28
|
-
extern int N_mult, M_mult;
|
|
29
|
-
extern int time_limit;
|
|
30
|
-
extern std::vector<std::vector<int>> items;
|
|
31
|
-
|
|
32
|
-
extern std::vector<std::vector<int>> collected;
|
|
33
|
-
|
|
34
|
-
void ClearCollected();
|
|
35
|
-
const std::vector<std::vector<int>>& GetCollected();
|
|
36
|
-
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
// namespace btminer
|
|
16
|
+
} // namespace btminer
|
effspm/freq_miner.hpp
CHANGED