effspm 0.2.7__cp312-cp312-win_amd64.whl → 0.3.3__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/_effspm.cp312-win_amd64.pyd +0 -0
- effspm/_effspm.cpp +961 -210
- effspm/btminer/src/build_mdd.cpp +42 -17
- effspm/btminer/src/build_mdd.hpp +13 -19
- effspm/btminer/src/freq_miner.cpp +134 -49
- effspm/btminer/src/freq_miner.hpp +16 -0
- effspm/btminer/src/load_inst.cpp +211 -126
- effspm/btminer/src/load_inst.hpp +22 -4
- effspm/btminer/src/main.cpp +83 -0
- effspm/btminer/src/utility.cpp +26 -41
- effspm/btminer/src/utility.hpp +6 -30
- effspm/freq_miner.hpp +2 -1
- effspm/htminer/src/build_mdd.cpp +46 -124
- effspm/htminer/src/build_mdd.hpp +56 -49
- effspm/htminer/src/freq_miner.cpp +341 -307
- effspm/htminer/src/freq_miner.hpp +39 -40
- effspm/htminer/src/load_inst.cpp +287 -336
- effspm/htminer/src/load_inst.hpp +23 -6
- effspm/htminer/src/main.cpp +97 -0
- effspm/htminer/src/utility.cpp +38 -57
- effspm/htminer/src/utility.hpp +9 -64
- effspm/largebm/src/build_mdd.cpp +69 -110
- effspm/largebm/src/build_mdd.hpp +22 -37
- effspm/largebm/src/freq_miner.cpp +241 -291
- effspm/largebm/src/freq_miner.hpp +25 -36
- effspm/largebm/src/load_inst.cpp +20 -26
- effspm/largebm/src/load_inst.hpp +24 -34
- effspm/largebm/src/main.cpp +95 -0
- effspm/largebm/src/utility.cpp +11 -21
- effspm/largebm/src/utility.hpp +7 -10
- effspm/largehm/src/build_mdd.cpp +75 -110
- effspm/largehm/src/build_mdd.hpp +53 -73
- effspm/largehm/src/freq_miner.cpp +134 -191
- effspm/largehm/src/freq_miner.hpp +37 -60
- effspm/largehm/src/load_inst.cpp +137 -174
- effspm/largehm/src/load_inst.hpp +13 -50
- effspm/largehm/src/main.cpp +95 -0
- effspm/largehm/src/utility.cpp +46 -28
- effspm/largehm/src/utility.hpp +18 -16
- effspm/largepp/src/freq_miner.cpp +184 -156
- effspm/largepp/src/freq_miner.hpp +11 -36
- effspm/largepp/src/load_inst.cpp +32 -12
- effspm/largepp/src/load_inst.hpp +15 -9
- effspm/largepp/src/main.cpp +108 -0
- effspm/largepp/src/pattern.hpp +31 -0
- effspm/load_inst.cpp +8 -8
- effspm/load_inst.hpp +1 -1
- effspm/main.cpp +103 -0
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
- effspm-0.3.3.dist-info/RECORD +60 -0
- effspm-0.2.7.dist-info/RECORD +0 -53
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
effspm/largehm/src/load_inst.cpp
CHANGED
|
@@ -1,131 +1,106 @@
|
|
|
1
1
|
#include <iostream>
|
|
2
2
|
#include <sstream>
|
|
3
3
|
#include <algorithm>
|
|
4
|
-
#include <
|
|
5
|
-
#include <cmath>
|
|
6
|
-
#include <ctime>
|
|
7
|
-
|
|
4
|
+
#include <math.h>
|
|
8
5
|
#include "load_inst.hpp"
|
|
9
6
|
#include "utility.hpp"
|
|
10
7
|
#include "build_mdd.hpp"
|
|
11
8
|
#include "freq_miner.hpp"
|
|
12
9
|
|
|
13
10
|
namespace largehm {
|
|
14
|
-
using namespace std;
|
|
15
11
|
|
|
16
|
-
|
|
17
|
-
string folder;
|
|
12
|
+
using namespace std;
|
|
18
13
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
bool use_dic = false;
|
|
22
|
-
bool use_list = false;
|
|
23
|
-
bool just_build = false;
|
|
24
|
-
bool pre_pro = false;
|
|
25
|
-
bool itmset_exists = false;
|
|
14
|
+
unsigned int M = 0, L = 0, mlim;
|
|
15
|
+
unsigned long long int N = 0, theta, E = 0;
|
|
26
16
|
|
|
27
|
-
|
|
28
|
-
unsigned int L = 0;
|
|
29
|
-
unsigned int mlim = 0;
|
|
30
|
-
unsigned int time_limit = 0;
|
|
17
|
+
bool itmset_exists = 0;
|
|
31
18
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
19
|
+
vector<int> item_dic;
|
|
20
|
+
vector<Pattern> DFS;
|
|
21
|
+
vector<VPattern> VDFS;
|
|
35
22
|
|
|
36
|
-
|
|
23
|
+
string out_file, folder;
|
|
37
24
|
|
|
38
|
-
|
|
25
|
+
bool b_disp = 0;
|
|
26
|
+
bool b_write = 0;
|
|
27
|
+
bool use_dic = 0;
|
|
28
|
+
bool just_build = 0;
|
|
29
|
+
bool pre_pro = 1;
|
|
39
30
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
vector<VPattern> VDFS;
|
|
31
|
+
unsigned int time_limit = 10 * 3600;
|
|
32
|
+
clock_t start_time;
|
|
43
33
|
|
|
34
|
+
void Load_items_pre(string &inst_name);
|
|
35
|
+
bool Load_items(string &inst_name);
|
|
36
|
+
bool Preprocess(string& inst, double thresh);
|
|
44
37
|
|
|
45
38
|
bool Load_instance(string& items_file, double thresh) {
|
|
46
|
-
// ─── 1) CLEAR ANY leftover state from a previous run ───
|
|
47
|
-
Tree.clear();
|
|
48
|
-
VTree.clear();
|
|
49
|
-
CTree.clear();
|
|
50
|
-
DFS.clear();
|
|
51
|
-
VDFS.clear();
|
|
52
|
-
item_dic.clear();
|
|
53
|
-
items.clear();
|
|
54
|
-
|
|
55
|
-
N = 0;
|
|
56
|
-
M = 0;
|
|
57
|
-
L = 0;
|
|
58
|
-
E = 0;
|
|
59
|
-
theta = 0;
|
|
60
|
-
itmset_exists = false;
|
|
61
|
-
// ────────────────────────────────────────────────────
|
|
62
39
|
|
|
63
40
|
clock_t kk = clock();
|
|
41
|
+
Tree.emplace_back(0, 0, 0);
|
|
64
42
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
43
|
+
if (pre_pro) {
|
|
44
|
+
if (!Preprocess(items_file, thresh))
|
|
45
|
+
return 0;
|
|
69
46
|
|
|
70
|
-
|
|
47
|
+
// ✅ KEEP THIS: Preprocess timing
|
|
48
|
+
if (b_disp)
|
|
49
|
+
cout << "\nPreprocess done in " << give_time(clock() - kk) << " seconds\n\n";
|
|
71
50
|
|
|
72
|
-
if (!pre_pro) {
|
|
73
|
-
if (!Load_items(items_file))
|
|
74
|
-
return false;
|
|
75
51
|
DFS.reserve(L);
|
|
76
|
-
|
|
77
|
-
DFS.emplace_back(-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
}
|
|
83
|
-
if (thresh < 1.0) {
|
|
84
|
-
theta = static_cast<unsigned long long>(ceil(thresh * N));
|
|
85
|
-
} else {
|
|
86
|
-
theta = static_cast<unsigned long long>(thresh);
|
|
87
|
-
}
|
|
52
|
+
for (int i = 0; i < (int)L; ++i)
|
|
53
|
+
DFS.emplace_back(-i - 1);
|
|
54
|
+
|
|
55
|
+
kk = clock();
|
|
56
|
+
Load_items_pre(items_file);
|
|
57
|
+
|
|
88
58
|
}
|
|
59
|
+
else if (!Load_items(items_file))
|
|
60
|
+
return 0;
|
|
89
61
|
else {
|
|
90
|
-
if (
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
theta =
|
|
94
|
-
} else {
|
|
95
|
-
theta = static_cast<unsigned long long>(thresh);
|
|
96
|
-
}
|
|
62
|
+
if (thresh < 1)
|
|
63
|
+
theta = ceil(thresh * N);
|
|
64
|
+
else
|
|
65
|
+
theta = thresh;
|
|
97
66
|
}
|
|
98
67
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
68
|
+
// ✅ KEEP THIS: MDD build timing
|
|
69
|
+
if (b_disp)
|
|
70
|
+
cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
|
|
71
|
+
|
|
72
|
+
// ✅ KEEP THIS: main summary line
|
|
73
|
+
if (b_disp)
|
|
74
|
+
cout << "Found " << N << " sequence, with max line len " << M
|
|
75
|
+
<< ", and " << L << " items, and " << E << " enteries\n";
|
|
76
|
+
|
|
77
|
+
// ❌ COMMENT OUT: extra debug
|
|
102
78
|
// cout << "Total Trie nodes: " << Tree.size()
|
|
103
79
|
// << " Total CTree nodes: " << CTree.size()
|
|
104
80
|
// << " Total VTree nodes: " << VTree.size() << endl;
|
|
105
81
|
|
|
106
|
-
return
|
|
82
|
+
return 1;
|
|
107
83
|
}
|
|
108
84
|
|
|
109
|
-
|
|
110
85
|
bool Preprocess(string &inst, double thresh) {
|
|
111
|
-
vector<unsigned long long int> MN(100, 0);
|
|
112
|
-
vector<vector<bool>> ML(100, vector<bool>(1000000, false));
|
|
113
86
|
|
|
87
|
+
vector<unsigned long long int> MN(100, 0);
|
|
88
|
+
vector<vector<bool>> ML(100, vector<bool>(1000000, 0));
|
|
114
89
|
ifstream file(inst);
|
|
90
|
+
|
|
91
|
+
vector<unsigned long long int> freq(1000000);
|
|
92
|
+
vector<unsigned long long int> counted(1000000, 0);
|
|
93
|
+
|
|
115
94
|
if (!file.good()) {
|
|
116
|
-
cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
|
|
117
|
-
return
|
|
95
|
+
// cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
|
|
96
|
+
return 0;
|
|
118
97
|
}
|
|
119
98
|
|
|
120
|
-
vector<unsigned long long int> freq(1000000, 0ULL);
|
|
121
|
-
vector<unsigned long long int> counted(1000000, 0ULL);
|
|
122
|
-
|
|
123
99
|
string line;
|
|
124
100
|
int ditem;
|
|
125
101
|
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
126
102
|
++N;
|
|
127
|
-
if (N % 10000000 == 0)
|
|
128
|
-
cout << "N: " << N << endl;
|
|
103
|
+
// if (N % 10000000 == 0) cout << "N: " << N << endl;
|
|
129
104
|
|
|
130
105
|
istringstream word(line);
|
|
131
106
|
string itm;
|
|
@@ -135,107 +110,114 @@ bool Preprocess(string &inst, double thresh) {
|
|
|
135
110
|
ditem = stoi(itm);
|
|
136
111
|
|
|
137
112
|
if (ditem > 0)
|
|
138
|
-
itmset_exists =
|
|
113
|
+
itmset_exists = 1;
|
|
139
114
|
else
|
|
140
|
-
ditem
|
|
115
|
+
ditem *= -1;
|
|
141
116
|
|
|
142
117
|
if (size_m < (int)MN.size()) {
|
|
143
118
|
++MN[size_m - 1];
|
|
144
|
-
if (
|
|
145
|
-
ML[size_m - 1].
|
|
119
|
+
if (ML[size_m - 1].size() < (size_t)ditem) {
|
|
120
|
+
ML[size_m - 1].reserve(ditem);
|
|
121
|
+
while (ML[size_m - 1].size() < (size_t)ditem)
|
|
122
|
+
ML[size_m - 1].push_back(0);
|
|
146
123
|
}
|
|
147
|
-
ML[size_m - 1][ditem - 1] =
|
|
124
|
+
ML[size_m - 1][ditem - 1] = 1;
|
|
148
125
|
}
|
|
149
126
|
|
|
150
|
-
if (L <
|
|
151
|
-
L =
|
|
152
|
-
}
|
|
127
|
+
if (L < (unsigned int)ditem)
|
|
128
|
+
L = ditem;
|
|
153
129
|
|
|
154
|
-
if (
|
|
155
|
-
freq.
|
|
156
|
-
counted.
|
|
130
|
+
if (freq.size() < L) {
|
|
131
|
+
freq.reserve(L);
|
|
132
|
+
counted.reserve(L);
|
|
133
|
+
while (freq.size() < L) {
|
|
134
|
+
freq.push_back(0);
|
|
135
|
+
counted.push_back(0);
|
|
136
|
+
}
|
|
157
137
|
}
|
|
138
|
+
|
|
158
139
|
if (counted[ditem - 1] != N) {
|
|
159
140
|
++freq[ditem - 1];
|
|
160
141
|
counted[ditem - 1] = N;
|
|
161
142
|
}
|
|
143
|
+
|
|
144
|
+
++E; // count entries
|
|
162
145
|
}
|
|
163
146
|
if (size_m > (int)M)
|
|
164
147
|
M = size_m;
|
|
165
148
|
}
|
|
166
149
|
|
|
167
|
-
if (thresh < 1
|
|
168
|
-
theta =
|
|
169
|
-
|
|
170
|
-
theta =
|
|
171
|
-
}
|
|
150
|
+
if (thresh < 1)
|
|
151
|
+
theta = ceil(thresh * N);
|
|
152
|
+
else
|
|
153
|
+
theta = thresh;
|
|
172
154
|
|
|
173
155
|
int real_L = 0;
|
|
174
|
-
item_dic
|
|
175
|
-
vector<bool> item_in(L,
|
|
156
|
+
item_dic = vector<int>(L, -1);
|
|
157
|
+
vector<bool> item_in(L, 0);
|
|
176
158
|
for (int i = 0; i < (int)L; ++i) {
|
|
177
159
|
if (freq[i] >= theta) {
|
|
178
160
|
item_dic[i] = ++real_L;
|
|
179
|
-
item_in[i]
|
|
161
|
+
item_in[i] = 1;
|
|
180
162
|
}
|
|
181
163
|
}
|
|
182
164
|
|
|
183
|
-
|
|
165
|
+
// ❌ COMMENTED: extra stats
|
|
166
|
+
// cout << "Original number of items: " << L
|
|
167
|
+
// << " Reduced to: " << real_L << endl;
|
|
184
168
|
|
|
185
169
|
unsigned long long int LpM = 1;
|
|
186
170
|
mlim = M;
|
|
187
171
|
int orgmlim = 0;
|
|
188
172
|
int ulim = min(1 + real_L / 4, 10);
|
|
189
173
|
unsigned long long int ml;
|
|
190
|
-
|
|
174
|
+
int coef = 1 + 1 * itmset_exists;
|
|
191
175
|
for (int i = 0; i + ulim < (int)MN.size() && i + ulim < (int)M; ++i) {
|
|
192
176
|
ml = 0;
|
|
193
177
|
for (int j = 0; j < (int)L; ++j) {
|
|
194
178
|
if (ML[i][j] && item_in[j])
|
|
195
179
|
++ml;
|
|
196
180
|
}
|
|
197
|
-
LpM *= ml *
|
|
198
|
-
cout << ml << " " << LpM << " " << MN[i] << endl;
|
|
181
|
+
LpM *= ml * coef;
|
|
182
|
+
// cout << ml << " " << LpM << " " << MN[i] << endl;
|
|
199
183
|
if (LpM * ulim > MN[i]) {
|
|
200
184
|
orgmlim = i;
|
|
201
185
|
while (i + ulim - 1 < (int)MN.size() && i + ulim - 1 < (int)M) {
|
|
202
|
-
cout <<
|
|
203
|
-
|
|
204
|
-
if (
|
|
205
|
-
|
|
186
|
+
// cout << MN[i - 1] - MN[i + ulim - 1]
|
|
187
|
+
// << " " << MN[i + ulim - 1] << endl;
|
|
188
|
+
if (MN[i - 1] - MN[i + ulim - 1] < MN[i + ulim - 1] &&
|
|
189
|
+
MN[i + ulim - 1] < 600000000) {
|
|
206
190
|
mlim = i - 1;
|
|
207
191
|
break;
|
|
208
192
|
}
|
|
209
|
-
|
|
193
|
+
i += 1;
|
|
210
194
|
}
|
|
211
195
|
break;
|
|
212
196
|
}
|
|
213
197
|
}
|
|
214
198
|
|
|
215
|
-
cout << "M is: " << M << " Mlim is: " << mlim
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
<< round((log(N) - log(6)) / log(real_L)) << endl;
|
|
199
|
+
// cout << "M is: " << M << " Mlim is: " << mlim
|
|
200
|
+
// << " ulim is: " << ulim
|
|
201
|
+
// << " original mlim is: " << orgmlim
|
|
202
|
+
// << " guess is: " << round((log(N) - log(6)) / log(real_L)) << endl;
|
|
220
203
|
|
|
221
|
-
if (mlim <
|
|
204
|
+
if (mlim < M) {
|
|
222
205
|
for (int i = 0; i < real_L; ++i)
|
|
223
206
|
VDFS.emplace_back(i);
|
|
224
207
|
}
|
|
225
208
|
|
|
226
|
-
L =
|
|
209
|
+
L = real_L;
|
|
227
210
|
N = 0;
|
|
228
211
|
M = 0;
|
|
229
|
-
|
|
212
|
+
|
|
213
|
+
return 1;
|
|
230
214
|
}
|
|
231
215
|
|
|
216
|
+
void Load_items_pre(string &inst_name) {
|
|
232
217
|
|
|
233
|
-
bool Load_items_pre(string &inst_name) {
|
|
234
218
|
ifstream file(inst_name);
|
|
235
|
-
if (!file.good())
|
|
236
|
-
|
|
237
|
-
return false;
|
|
238
|
-
}
|
|
219
|
+
if (!file.good())
|
|
220
|
+
return;
|
|
239
221
|
|
|
240
222
|
string line;
|
|
241
223
|
int ditem;
|
|
@@ -244,12 +226,11 @@ bool Load_items_pre(string &inst_name) {
|
|
|
244
226
|
string itm;
|
|
245
227
|
vector<int> temp_vec;
|
|
246
228
|
vector<int> temp_lim;
|
|
247
|
-
bool sgn =
|
|
248
|
-
|
|
249
|
-
// L is final from Preprocess
|
|
229
|
+
bool sgn = 0;
|
|
250
230
|
while (word >> itm) {
|
|
251
231
|
ditem = stoi(itm);
|
|
252
|
-
|
|
232
|
+
|
|
233
|
+
if (item_dic[abs(ditem) - 1] == -1) {
|
|
253
234
|
if (!sgn)
|
|
254
235
|
sgn = (ditem < 0);
|
|
255
236
|
continue;
|
|
@@ -259,99 +240,81 @@ bool Load_items_pre(string &inst_name) {
|
|
|
259
240
|
else
|
|
260
241
|
ditem = -item_dic[-ditem - 1];
|
|
261
242
|
}
|
|
243
|
+
|
|
262
244
|
if (sgn) {
|
|
263
245
|
if (ditem > 0)
|
|
264
246
|
ditem = -ditem;
|
|
265
|
-
sgn =
|
|
247
|
+
sgn = 0;
|
|
266
248
|
}
|
|
267
|
-
|
|
249
|
+
|
|
250
|
+
if (temp_vec.size() <= mlim)
|
|
268
251
|
temp_vec.push_back(ditem);
|
|
269
252
|
else
|
|
270
253
|
temp_lim.push_back(ditem);
|
|
254
|
+
|
|
255
|
+
++E;
|
|
271
256
|
}
|
|
272
257
|
|
|
273
258
|
if (temp_vec.empty())
|
|
274
259
|
continue;
|
|
275
260
|
|
|
276
261
|
++N;
|
|
277
|
-
if (N % 10000000 == 0)
|
|
278
|
-
cout << N << endl;
|
|
262
|
+
// if (N % 10000000 == 0) cout << N << endl;
|
|
279
263
|
|
|
280
|
-
if (temp_vec.size() + temp_lim.size() >
|
|
281
|
-
M =
|
|
282
|
-
|
|
283
|
-
// ─── Ensure DFS/VDFS size before Build_MDD ───
|
|
284
|
-
while (DFS.size() < L)
|
|
285
|
-
DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
|
|
286
|
-
while (VDFS.size() < L)
|
|
287
|
-
VDFS.emplace_back(static_cast<int>(VDFS.size()));
|
|
288
|
-
// ──────────────────────────────────────────────
|
|
264
|
+
if (temp_vec.size() + temp_lim.size() > M)
|
|
265
|
+
M = temp_vec.size() + temp_lim.size();
|
|
289
266
|
|
|
290
267
|
Build_MDD(temp_vec, temp_lim);
|
|
291
268
|
}
|
|
292
|
-
|
|
293
|
-
return true;
|
|
294
269
|
}
|
|
295
270
|
|
|
296
|
-
|
|
297
271
|
bool Load_items(string &inst_name) {
|
|
298
|
-
|
|
299
|
-
// << inst_name << "'" << std::endl;
|
|
272
|
+
|
|
300
273
|
ifstream file(inst_name);
|
|
301
274
|
if (!file.good()) {
|
|
302
|
-
cout << "!!!!!! No such file exists: " << inst_name << " !!!!!!\n";
|
|
303
|
-
return
|
|
275
|
+
// cout << "!!!!!! No such file exists: " << inst_name << " !!!!!!\n";
|
|
276
|
+
return 0;
|
|
304
277
|
}
|
|
305
278
|
|
|
306
279
|
string line;
|
|
307
280
|
int ditem;
|
|
308
281
|
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
309
282
|
++N;
|
|
310
|
-
if (N % 1000000 == 0)
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
<< " enteries\n";
|
|
283
|
+
// if (N % 1000000 == 0)
|
|
284
|
+
// cout << "Found " << N << " sequence, with max line len " << M
|
|
285
|
+
// << ", and " << L << " items, and " << E << " enteries\n";
|
|
314
286
|
|
|
315
287
|
istringstream word(line);
|
|
316
288
|
string itm;
|
|
317
289
|
vector<int> temp_vec;
|
|
318
290
|
vector<int> temp_lim;
|
|
319
|
-
|
|
320
291
|
while (word >> itm) {
|
|
321
292
|
ditem = stoi(itm);
|
|
322
|
-
|
|
323
293
|
if (ditem > 0)
|
|
324
|
-
itmset_exists =
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
while (VDFS.size() < L)
|
|
332
|
-
VDFS.emplace_back(static_cast<int>(VDFS.size()));
|
|
294
|
+
itmset_exists = 1;
|
|
295
|
+
if (L < (unsigned int)abs(ditem)) {
|
|
296
|
+
L = abs(ditem);
|
|
297
|
+
while (DFS.size() < L) {
|
|
298
|
+
DFS.reserve(L);
|
|
299
|
+
DFS.emplace_back(-DFS.size() - 1);
|
|
300
|
+
}
|
|
333
301
|
}
|
|
334
302
|
|
|
335
|
-
if (temp_vec.size() <
|
|
303
|
+
if (temp_vec.size() < mlim)
|
|
336
304
|
temp_vec.push_back(ditem);
|
|
337
305
|
else
|
|
338
306
|
temp_lim.push_back(ditem);
|
|
307
|
+
|
|
308
|
+
++E;
|
|
339
309
|
}
|
|
340
|
-
E += static_cast<unsigned long long>(temp_vec.size() + temp_lim.size());
|
|
341
|
-
if (temp_vec.size() + temp_lim.size() > (size_t)M)
|
|
342
|
-
M = static_cast<unsigned int>(temp_vec.size() + temp_lim.size());
|
|
343
310
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
|
|
347
|
-
while (VDFS.size() < L)
|
|
348
|
-
VDFS.emplace_back(static_cast<int>(VDFS.size()));
|
|
349
|
-
// ──────────────────────────────────────────────
|
|
311
|
+
if (temp_vec.size() + temp_lim.size() > M)
|
|
312
|
+
M = temp_vec.size();
|
|
350
313
|
|
|
351
314
|
Build_MDD(temp_vec, temp_lim);
|
|
352
315
|
}
|
|
353
316
|
|
|
354
|
-
return
|
|
317
|
+
return 1;
|
|
355
318
|
}
|
|
356
319
|
|
|
357
320
|
} // namespace largehm
|
effspm/largehm/src/load_inst.hpp
CHANGED
|
@@ -1,64 +1,27 @@
|
|
|
1
|
-
#
|
|
2
|
-
#define LARGEHM_LOAD_INST_HPP
|
|
1
|
+
#pragma once
|
|
3
2
|
|
|
4
|
-
#include <string>
|
|
5
3
|
#include <vector>
|
|
4
|
+
#include <string>
|
|
6
5
|
#include <fstream>
|
|
7
|
-
#include <
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
#include
|
|
6
|
+
#include <map>
|
|
7
|
+
#include <unordered_set>
|
|
8
|
+
#include <unordered_map>
|
|
9
|
+
#include <time.h>
|
|
11
10
|
|
|
12
11
|
namespace largehm {
|
|
13
12
|
|
|
14
|
-
|
|
15
|
-
// ─── Globals & Function Prototypes ───────────────────────────────────────────
|
|
16
|
-
//
|
|
17
|
-
|
|
18
|
-
// Output/folder:
|
|
19
|
-
extern std::string out_file;
|
|
20
|
-
extern std::string folder;
|
|
21
|
-
|
|
22
|
-
// Flags:
|
|
23
|
-
extern bool b_disp;
|
|
24
|
-
extern bool b_write;
|
|
25
|
-
extern bool use_dic;
|
|
26
|
-
extern bool use_list;
|
|
27
|
-
extern bool just_build;
|
|
28
|
-
extern bool pre_pro;
|
|
29
|
-
extern bool itmset_exists;
|
|
13
|
+
using namespace std;
|
|
30
14
|
|
|
31
|
-
|
|
32
|
-
extern unsigned int M;
|
|
33
|
-
extern unsigned int L;
|
|
34
|
-
extern unsigned int mlim;
|
|
35
|
-
extern unsigned int time_limit;
|
|
15
|
+
bool Load_instance(string& items_file, double thresh);
|
|
36
16
|
|
|
37
|
-
extern
|
|
38
|
-
extern unsigned long long int theta;
|
|
39
|
-
extern unsigned long long int E;
|
|
17
|
+
extern string out_file, folder;
|
|
40
18
|
|
|
41
|
-
|
|
42
|
-
extern clock_t start_time;
|
|
43
|
-
|
|
44
|
-
// In‐memory sequences (only if “in‐memory” mode):
|
|
45
|
-
extern std::vector<std::vector<int>> items;
|
|
46
|
-
|
|
47
|
-
// Preprocessing dictionary (maps original → compressed IDs):
|
|
48
|
-
extern std::vector<int> item_dic;
|
|
19
|
+
extern bool b_disp, b_write, use_dic, just_build, pre_pro, itmset_exists;
|
|
49
20
|
|
|
50
|
-
|
|
51
|
-
extern std::vector<Pattern> DFS;
|
|
52
|
-
extern std::vector<VPattern> VDFS;
|
|
21
|
+
extern unsigned int M, L, mlim, time_limit;
|
|
53
22
|
|
|
54
|
-
|
|
55
|
-
bool Load_items_pre(std::string &inst_name);
|
|
56
|
-
bool Load_items(std::string &inst_name);
|
|
57
|
-
bool Preprocess(std::string &inst, double thresh);
|
|
23
|
+
extern unsigned long long int N, theta, E;
|
|
58
24
|
|
|
59
|
-
|
|
60
|
-
bool Load_instance(std::string &items_file, double thresh);
|
|
25
|
+
extern clock_t start_time;
|
|
61
26
|
|
|
62
27
|
} // namespace largehm
|
|
63
|
-
|
|
64
|
-
#endif // LARGEHM_LOAD_INST_HPP
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
#include <iostream>
|
|
2
|
+
#include <time.h>
|
|
3
|
+
#include <string.h>
|
|
4
|
+
#include <string>
|
|
5
|
+
#include "load_inst.hpp"
|
|
6
|
+
#include "build_mdd.hpp"
|
|
7
|
+
#include "utility.hpp"
|
|
8
|
+
#include "freq_miner.hpp"
|
|
9
|
+
|
|
10
|
+
using namespace std;
|
|
11
|
+
|
|
12
|
+
string out_file;
|
|
13
|
+
|
|
14
|
+
bool b_disp = 0, b_write = 0, use_dic = 0, just_build = 0, pre_pro = 1;
|
|
15
|
+
|
|
16
|
+
unsigned int time_limit = 10 * 3600;
|
|
17
|
+
|
|
18
|
+
clock_t start_time;
|
|
19
|
+
|
|
20
|
+
string folder;
|
|
21
|
+
|
|
22
|
+
int main(int argc, char* argv[]) {
|
|
23
|
+
|
|
24
|
+
string VV, attr;
|
|
25
|
+
|
|
26
|
+
double thresh = 0;
|
|
27
|
+
for (int i = 1; i<argc; i++) {
|
|
28
|
+
if (argv[i][0] != '-' || isdigit(argv[i][1]))
|
|
29
|
+
continue;
|
|
30
|
+
else if (strcmp(argv[i], "-thr") == 0)
|
|
31
|
+
thresh = stod(argv[i + 1]);
|
|
32
|
+
else if (strcmp(argv[i], "-file") == 0)
|
|
33
|
+
VV = argv[i + 1];
|
|
34
|
+
else if (strcmp(argv[i], "-time") == 0)
|
|
35
|
+
time_limit = stoi(argv[i + 1]);
|
|
36
|
+
else if (strcmp(argv[i], "-jbuild") == 0)
|
|
37
|
+
just_build = 1;
|
|
38
|
+
else if (strcmp(argv[i], "-folder") == 0)
|
|
39
|
+
folder = argv[i + 1];
|
|
40
|
+
else if (strcmp(argv[i], "-npre") == 0)
|
|
41
|
+
pre_pro = 0;
|
|
42
|
+
else if (strcmp(argv[i], "-dic") == 0)
|
|
43
|
+
use_dic = 1;
|
|
44
|
+
else if (strcmp(argv[i], "-out") == 0) {
|
|
45
|
+
if (i + 1 == argc || argv[i + 1][0] == '-')
|
|
46
|
+
b_disp = 1;
|
|
47
|
+
else if (argv[i + 1][0] == '+') {
|
|
48
|
+
b_disp = 1;
|
|
49
|
+
b_write = 1;
|
|
50
|
+
if (strlen(argv[i + 1]) > 1) {
|
|
51
|
+
out_file = argv[i + 1];
|
|
52
|
+
out_file = out_file.substr(1, out_file.size() - 1);
|
|
53
|
+
}
|
|
54
|
+
else
|
|
55
|
+
out_file = VV;
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
b_write = 1;
|
|
59
|
+
out_file = argv[i + 1];
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
else
|
|
64
|
+
cout << "Command " << argv[i] << " not recognized and skipped.\n";
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
cout << "\n********************** " << VV << "**********************\n";
|
|
70
|
+
|
|
71
|
+
string item_file = folder + VV + ".txt";
|
|
72
|
+
|
|
73
|
+
cout << "loading instances...\n";
|
|
74
|
+
|
|
75
|
+
start_time = clock();
|
|
76
|
+
|
|
77
|
+
if (!Load_instance(item_file, thresh)) {
|
|
78
|
+
cout << "Files invalid, exiting.\n";
|
|
79
|
+
cin.get();
|
|
80
|
+
return 0;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
//kk = clock();
|
|
84
|
+
|
|
85
|
+
if (!just_build && give_time(clock() - start_time) < time_limit) {
|
|
86
|
+
Freq_miner();
|
|
87
|
+
if (give_time(clock() - start_time) >= time_limit)
|
|
88
|
+
cout << "TIME LIMIT REACHED\n";
|
|
89
|
+
cout << "Mining Complete\n\nFound a total of " << num_patt << " patterns\n";
|
|
90
|
+
cout << "\nTotal CPU time " << give_time(clock() - start_time) << " seconds\n\n";
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
return 0;
|
|
95
|
+
}
|