effspm 0.2.7__cp312-cp312-win_amd64.whl → 0.3.3__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/_effspm.cp312-win_amd64.pyd +0 -0
- effspm/_effspm.cpp +961 -210
- effspm/btminer/src/build_mdd.cpp +42 -17
- effspm/btminer/src/build_mdd.hpp +13 -19
- effspm/btminer/src/freq_miner.cpp +134 -49
- effspm/btminer/src/freq_miner.hpp +16 -0
- effspm/btminer/src/load_inst.cpp +211 -126
- effspm/btminer/src/load_inst.hpp +22 -4
- effspm/btminer/src/main.cpp +83 -0
- effspm/btminer/src/utility.cpp +26 -41
- effspm/btminer/src/utility.hpp +6 -30
- effspm/freq_miner.hpp +2 -1
- effspm/htminer/src/build_mdd.cpp +46 -124
- effspm/htminer/src/build_mdd.hpp +56 -49
- effspm/htminer/src/freq_miner.cpp +341 -307
- effspm/htminer/src/freq_miner.hpp +39 -40
- effspm/htminer/src/load_inst.cpp +287 -336
- effspm/htminer/src/load_inst.hpp +23 -6
- effspm/htminer/src/main.cpp +97 -0
- effspm/htminer/src/utility.cpp +38 -57
- effspm/htminer/src/utility.hpp +9 -64
- effspm/largebm/src/build_mdd.cpp +69 -110
- effspm/largebm/src/build_mdd.hpp +22 -37
- effspm/largebm/src/freq_miner.cpp +241 -291
- effspm/largebm/src/freq_miner.hpp +25 -36
- effspm/largebm/src/load_inst.cpp +20 -26
- effspm/largebm/src/load_inst.hpp +24 -34
- effspm/largebm/src/main.cpp +95 -0
- effspm/largebm/src/utility.cpp +11 -21
- effspm/largebm/src/utility.hpp +7 -10
- effspm/largehm/src/build_mdd.cpp +75 -110
- effspm/largehm/src/build_mdd.hpp +53 -73
- effspm/largehm/src/freq_miner.cpp +134 -191
- effspm/largehm/src/freq_miner.hpp +37 -60
- effspm/largehm/src/load_inst.cpp +137 -174
- effspm/largehm/src/load_inst.hpp +13 -50
- effspm/largehm/src/main.cpp +95 -0
- effspm/largehm/src/utility.cpp +46 -28
- effspm/largehm/src/utility.hpp +18 -16
- effspm/largepp/src/freq_miner.cpp +184 -156
- effspm/largepp/src/freq_miner.hpp +11 -36
- effspm/largepp/src/load_inst.cpp +32 -12
- effspm/largepp/src/load_inst.hpp +15 -9
- effspm/largepp/src/main.cpp +108 -0
- effspm/largepp/src/pattern.hpp +31 -0
- effspm/load_inst.cpp +8 -8
- effspm/load_inst.hpp +1 -1
- effspm/main.cpp +103 -0
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
- effspm-0.3.3.dist-info/RECORD +60 -0
- effspm-0.2.7.dist-info/RECORD +0 -53
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
|
@@ -1,129 +1,89 @@
|
|
|
1
|
-
|
|
2
|
-
#include <cstdint>
|
|
3
|
-
#include <vector>
|
|
4
|
-
|
|
5
1
|
#include <iostream>
|
|
6
2
|
#include <time.h>
|
|
7
|
-
// for std::vector
|
|
8
|
-
#include <cmath> // for std::ceil
|
|
9
|
-
|
|
10
3
|
#include "freq_miner.hpp"
|
|
11
4
|
#include "build_mdd.hpp"
|
|
12
5
|
#include "utility.hpp"
|
|
13
|
-
|
|
6
|
+
|
|
14
7
|
namespace largehm {
|
|
15
8
|
|
|
16
|
-
|
|
9
|
+
using namespace std;
|
|
10
|
+
|
|
11
|
+
void Out_patt(vector<int>& seq, unsigned int freq);
|
|
17
12
|
void Extend_patt(Pattern& _patt);
|
|
18
|
-
void Mine_vec(unsigned long long int seq_ID,
|
|
19
|
-
int
|
|
20
|
-
int
|
|
21
|
-
|
|
22
|
-
std::vector<int>& items,
|
|
13
|
+
void Mine_vec(unsigned long long int seq_ID, int pos, int num_found,
|
|
14
|
+
vector<unsigned long long int>& ancest,
|
|
15
|
+
vector<int>& items,
|
|
23
16
|
unsigned long long int inod,
|
|
24
17
|
int sgn);
|
|
25
18
|
|
|
26
19
|
unsigned long long int num_patt = 0;
|
|
27
20
|
|
|
28
|
-
|
|
29
|
-
|
|
21
|
+
vector<bool> ilist;
|
|
22
|
+
vector<bool> slist;
|
|
30
23
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
24
|
+
vector<Pattern> pot_patt;
|
|
25
|
+
vector<VPattern> pot_vpatt;
|
|
26
|
+
vector<unsigned long long int> last_strpnt;
|
|
27
|
+
vector<unsigned long long int> ancest_base;
|
|
28
|
+
vector<int> DFS_numfound;
|
|
34
29
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
Pattern _patt;
|
|
30
|
+
Pattern _patt;
|
|
38
31
|
VPattern _vpatt;
|
|
39
32
|
|
|
40
33
|
int itmset_size;
|
|
41
34
|
int last_neg;
|
|
42
|
-
|
|
43
35
|
bool ilist_nempty;
|
|
44
36
|
|
|
45
|
-
|
|
46
37
|
void Freq_miner() {
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
// ─── Make sure DFS and VDFS are at least size L ─────────────────────────────
|
|
50
|
-
if (DFS.size() < static_cast<size_t>(L)) {
|
|
51
|
-
size_t old = DFS.size();
|
|
52
|
-
DFS.resize(static_cast<size_t>(L));
|
|
53
|
-
for (size_t i = old; i < DFS.size(); ++i) {
|
|
54
|
-
DFS[i] = Pattern(-static_cast<int>(i) - 1);
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
if (VDFS.size() < static_cast<size_t>(L)) {
|
|
58
|
-
size_t old = VDFS.size();
|
|
59
|
-
VDFS.resize(static_cast<size_t>(L));
|
|
60
|
-
for (size_t i = old; i < VDFS.size(); ++i) {
|
|
61
|
-
VDFS[i] = VPattern(static_cast<int>(i));
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
// ─────────────────────────────────────────────────────────────────────────────
|
|
65
38
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
// << ", Tree[0].sibl=" << Tree[0].sibl
|
|
70
|
-
// << ", Tree[0].freq=" << Tree[0].freq;
|
|
71
|
-
}
|
|
72
|
-
// std::cout << ", DFS.size()=" << DFS.size()
|
|
73
|
-
// << ", theta=" << theta
|
|
74
|
-
// << ", M=" << M
|
|
75
|
-
// << ", E=" << E
|
|
76
|
-
// << std::endl;
|
|
77
|
-
|
|
78
|
-
std::vector<int> tmp_list;
|
|
79
|
-
for (int i = 0; i < static_cast<int>(L); ++i) {
|
|
39
|
+
vector<int> list;
|
|
40
|
+
|
|
41
|
+
for (int i = 0; i < (int)L; ++i) {
|
|
80
42
|
if (DFS[i].freq >= theta) {
|
|
81
|
-
|
|
82
|
-
if (itmset_exists)
|
|
83
|
-
|
|
84
|
-
}
|
|
43
|
+
list.push_back(-i - 1);
|
|
44
|
+
if (itmset_exists)
|
|
45
|
+
list.push_back(i + 1);
|
|
85
46
|
}
|
|
86
47
|
}
|
|
87
48
|
|
|
88
|
-
for (int i = 0; i <
|
|
89
|
-
DFS[i].list =
|
|
90
|
-
}
|
|
49
|
+
for (int i = 0; i < (int)DFS.size(); ++i)
|
|
50
|
+
DFS[i].list = list;
|
|
91
51
|
|
|
92
|
-
while (!DFS.empty() && give_time(
|
|
93
|
-
if (DFS.back().freq >= theta)
|
|
52
|
+
while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
|
|
53
|
+
if (DFS.back().freq >= theta)
|
|
94
54
|
Extend_patt(DFS.back());
|
|
95
|
-
}
|
|
96
55
|
else {
|
|
97
56
|
DFS.pop_back();
|
|
98
|
-
if (!VDFS.empty() && VDFS.back().ass_patt ==
|
|
57
|
+
if (!VDFS.empty() && VDFS.back().ass_patt == DFS.size())
|
|
99
58
|
VDFS.pop_back();
|
|
100
|
-
}
|
|
101
59
|
}
|
|
102
60
|
}
|
|
103
61
|
}
|
|
104
62
|
|
|
105
|
-
|
|
106
63
|
void Extend_patt(Pattern& _pattern) {
|
|
64
|
+
|
|
107
65
|
swap(_patt, _pattern);
|
|
108
66
|
DFS.pop_back();
|
|
109
67
|
|
|
110
|
-
slist =
|
|
111
|
-
ilist_nempty =
|
|
68
|
+
slist = vector<bool>(L, 0);
|
|
69
|
+
ilist_nempty = 0;
|
|
112
70
|
|
|
113
71
|
if (itmset_exists) {
|
|
114
|
-
ilist =
|
|
115
|
-
for (
|
|
72
|
+
ilist = vector<bool>(L, 0);
|
|
73
|
+
for (vector<int>::iterator it = _patt.list.begin();
|
|
74
|
+
it != _patt.list.end(); ++it) {
|
|
116
75
|
if (*it < 0)
|
|
117
|
-
slist[-(*it) - 1] =
|
|
76
|
+
slist[-(*it) - 1] = 1;
|
|
118
77
|
else {
|
|
119
|
-
ilist[(*it) - 1] =
|
|
120
|
-
ilist_nempty =
|
|
78
|
+
ilist[(*it) - 1] = 1;
|
|
79
|
+
ilist_nempty = 1;
|
|
121
80
|
}
|
|
122
81
|
}
|
|
123
82
|
}
|
|
124
83
|
else {
|
|
125
|
-
for (
|
|
126
|
-
|
|
84
|
+
for (vector<int>::iterator it = _patt.list.begin();
|
|
85
|
+
it != _patt.list.end(); ++it)
|
|
86
|
+
slist[-(*it) - 1] = 1;
|
|
127
87
|
}
|
|
128
88
|
|
|
129
89
|
last_neg = _patt.seq.size() - 1;
|
|
@@ -131,39 +91,27 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
131
91
|
--last_neg;
|
|
132
92
|
itmset_size = _patt.seq.size() - last_neg;
|
|
133
93
|
|
|
134
|
-
pot_patt =
|
|
94
|
+
pot_patt = vector<Pattern>(L + L * ilist_nempty);
|
|
135
95
|
if (!CTree.empty())
|
|
136
|
-
pot_vpatt =
|
|
96
|
+
pot_vpatt = vector<VPattern>(L + L * ilist_nempty);
|
|
137
97
|
|
|
138
|
-
last_strpnt =
|
|
98
|
+
last_strpnt = vector<unsigned long long int>(L, 0);
|
|
139
99
|
|
|
140
|
-
if (!VDFS.empty() && VDFS.back().ass_patt ==
|
|
100
|
+
if (!VDFS.empty() && VDFS.back().ass_patt == DFS.size()) {
|
|
141
101
|
swap(_vpatt, VDFS.back());
|
|
142
102
|
VDFS.pop_back();
|
|
143
103
|
for (unsigned long long int pnt = 0; pnt < _vpatt.str_pnt.size(); ++pnt) {
|
|
144
|
-
if (_vpatt.str_pnt[pnt] < 0)
|
|
145
|
-
Mine_vec(_vpatt.seq_ID[pnt],
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
0,
|
|
151
|
-
-1);
|
|
152
|
-
}
|
|
153
|
-
else {
|
|
154
|
-
Mine_vec(_vpatt.seq_ID[pnt],
|
|
155
|
-
_vpatt.str_pnt[pnt],
|
|
156
|
-
-1,
|
|
157
|
-
ancest_base,
|
|
158
|
-
VTree[_vpatt.seq_ID[pnt]].seq,
|
|
159
|
-
0,
|
|
160
|
-
1);
|
|
161
|
-
}
|
|
104
|
+
if (_vpatt.str_pnt[pnt] < 0)
|
|
105
|
+
Mine_vec(_vpatt.seq_ID[pnt], -_vpatt.str_pnt[pnt], -1,
|
|
106
|
+
ancest_base, CTree[_vpatt.seq_ID[pnt]].seq, 0, -1);
|
|
107
|
+
else
|
|
108
|
+
Mine_vec(_vpatt.seq_ID[pnt], _vpatt.str_pnt[pnt], -1,
|
|
109
|
+
ancest_base, VTree[_vpatt.seq_ID[pnt]].seq, 0, 1);
|
|
162
110
|
}
|
|
163
111
|
}
|
|
164
112
|
|
|
165
|
-
|
|
166
|
-
|
|
113
|
+
vector<unsigned long long int> DFS_itm;
|
|
114
|
+
vector<unsigned long long int> DFS_seq;
|
|
167
115
|
if (ilist_nempty)
|
|
168
116
|
DFS_numfound.clear();
|
|
169
117
|
|
|
@@ -173,23 +121,13 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
173
121
|
unsigned long long int cur_sibl = DFS_itm.back();
|
|
174
122
|
DFS_itm.pop_back();
|
|
175
123
|
if (Tree[cur_sibl].itmset < 0) {
|
|
176
|
-
unsigned int carc = Tree[cur_sibl].chld;
|
|
177
|
-
Mine_vec(carc,
|
|
178
|
-
|
|
179
|
-
-1,
|
|
180
|
-
CTree[carc].ancest,
|
|
181
|
-
CTree[carc].seq,
|
|
182
|
-
_patt.str_pnt[pnt],
|
|
183
|
-
-1);
|
|
124
|
+
unsigned long long int carc = Tree[cur_sibl].chld;
|
|
125
|
+
Mine_vec(carc, 0, -1, CTree[carc].ancest,
|
|
126
|
+
CTree[carc].seq, _patt.str_pnt[pnt], -1);
|
|
184
127
|
cur_sibl = CTree[carc].ancest.back();
|
|
185
128
|
while (cur_sibl != 0) {
|
|
186
|
-
Mine_vec(cur_sibl - 1,
|
|
187
|
-
|
|
188
|
-
-1,
|
|
189
|
-
CTree[carc].ancest,
|
|
190
|
-
VTree[cur_sibl - 1].seq,
|
|
191
|
-
_patt.str_pnt[pnt],
|
|
192
|
-
1);
|
|
129
|
+
Mine_vec(cur_sibl - 1, 0, -1, CTree[carc].ancest,
|
|
130
|
+
VTree[cur_sibl - 1].seq, _patt.str_pnt[pnt], 1);
|
|
193
131
|
cur_sibl = VTree[cur_sibl - 1].sibl;
|
|
194
132
|
}
|
|
195
133
|
continue;
|
|
@@ -232,7 +170,7 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
232
170
|
last_strpnt[i] = pot_patt[i + L].str_pnt.size();
|
|
233
171
|
}
|
|
234
172
|
}
|
|
235
|
-
while(!DFS_seq.empty()) {
|
|
173
|
+
while (!DFS_seq.empty()) {
|
|
236
174
|
unsigned long long int cur_sibl = DFS_seq.back();
|
|
237
175
|
DFS_seq.pop_back();
|
|
238
176
|
int num_found = 0;
|
|
@@ -242,22 +180,12 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
242
180
|
}
|
|
243
181
|
if (Tree[cur_sibl].itmset < 0) {
|
|
244
182
|
unsigned int carc = Tree[cur_sibl].chld;
|
|
245
|
-
Mine_vec(carc,
|
|
246
|
-
|
|
247
|
-
num_found,
|
|
248
|
-
CTree[carc].ancest,
|
|
249
|
-
CTree[carc].seq,
|
|
250
|
-
_patt.str_pnt[pnt],
|
|
251
|
-
-1);
|
|
183
|
+
Mine_vec(carc, 0, num_found, CTree[carc].ancest,
|
|
184
|
+
CTree[carc].seq, _patt.str_pnt[pnt], -1);
|
|
252
185
|
cur_sibl = CTree[carc].ancest.back();
|
|
253
186
|
while (cur_sibl != 0) {
|
|
254
|
-
Mine_vec(cur_sibl - 1,
|
|
255
|
-
|
|
256
|
-
num_found,
|
|
257
|
-
CTree[carc].ancest,
|
|
258
|
-
VTree[cur_sibl - 1].seq,
|
|
259
|
-
_patt.str_pnt[pnt],
|
|
260
|
-
1);
|
|
187
|
+
Mine_vec(cur_sibl - 1, 0, num_found, CTree[carc].ancest,
|
|
188
|
+
VTree[cur_sibl - 1].seq, _patt.str_pnt[pnt], 1);
|
|
261
189
|
cur_sibl = VTree[cur_sibl - 1].sibl;
|
|
262
190
|
}
|
|
263
191
|
continue;
|
|
@@ -268,19 +196,18 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
268
196
|
if (cur_itm > 0) {
|
|
269
197
|
if (num_found == itmset_size &&
|
|
270
198
|
ilist[cur_itm - 1] &&
|
|
271
|
-
(
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
{
|
|
199
|
+
(abs(Tree[Tree[cur_sibl].anct].itmset) <
|
|
200
|
+
abs(Tree[_patt.str_pnt[pnt]].itmset) ||
|
|
201
|
+
!check_parent(Tree[cur_sibl].anct, _patt.str_pnt[pnt],
|
|
202
|
+
last_strpnt[cur_itm - 1],
|
|
203
|
+
pot_patt[cur_itm + L - 1].str_pnt))) {
|
|
277
204
|
pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
|
|
278
205
|
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
279
206
|
pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
|
|
280
207
|
}
|
|
281
208
|
if (slist[cur_itm - 1] &&
|
|
282
|
-
|
|
283
|
-
|
|
209
|
+
abs(Tree[Tree[cur_sibl].anct].itmset) <=
|
|
210
|
+
abs(Tree[_patt.str_pnt[pnt]].itmset)) {
|
|
284
211
|
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
285
212
|
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
286
213
|
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
@@ -288,8 +215,9 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
288
215
|
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
|
|
289
216
|
DFS_seq.push_back(cur_sibl);
|
|
290
217
|
if (ilist_nempty) {
|
|
291
|
-
if (num_found < itmset_size
|
|
292
|
-
|
|
218
|
+
if (num_found < itmset_size &&
|
|
219
|
+
cur_itm ==
|
|
220
|
+
abs(_patt.seq[last_neg + num_found]))
|
|
293
221
|
DFS_numfound.push_back(num_found + 1);
|
|
294
222
|
else
|
|
295
223
|
DFS_numfound.push_back(num_found);
|
|
@@ -299,8 +227,8 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
299
227
|
else {
|
|
300
228
|
cur_itm = -cur_itm;
|
|
301
229
|
if (slist[cur_itm - 1] &&
|
|
302
|
-
|
|
303
|
-
|
|
230
|
+
abs(Tree[Tree[cur_sibl].anct].itmset) <=
|
|
231
|
+
abs(Tree[_patt.str_pnt[pnt]].itmset)) {
|
|
304
232
|
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
305
233
|
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
306
234
|
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
@@ -320,126 +248,141 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
320
248
|
}
|
|
321
249
|
}
|
|
322
250
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
for (
|
|
326
|
-
|
|
251
|
+
vector<int> ilistp;
|
|
252
|
+
vector<int> slistp;
|
|
253
|
+
for (vector<int>::iterator it = _patt.list.begin();
|
|
254
|
+
it != _patt.list.end(); ++it) {
|
|
255
|
+
if (*it > 0 && pot_patt[(*it) + L - 1].freq >= theta)
|
|
327
256
|
ilistp.push_back(*it);
|
|
328
257
|
else if (*it < 0 && pot_patt[-(*it) - 1].freq >= theta) {
|
|
329
|
-
if (itmset_exists)
|
|
258
|
+
if (itmset_exists)
|
|
330
259
|
slistp.push_back(-(*it));
|
|
331
260
|
ilistp.push_back(*it);
|
|
332
261
|
slistp.push_back(*it);
|
|
333
262
|
}
|
|
334
|
-
}
|
|
263
|
+
}
|
|
335
264
|
|
|
336
|
-
|
|
265
|
+
for (vector<int>::iterator it = ilistp.begin(); it != ilistp.end(); ++it) {
|
|
337
266
|
int p;
|
|
338
|
-
if (*it < 0)
|
|
267
|
+
if (*it < 0)
|
|
339
268
|
p = -(*it) - 1;
|
|
340
269
|
else
|
|
341
270
|
p = (*it) - 1 + L;
|
|
271
|
+
|
|
342
272
|
pot_patt[p].str_pnt.shrink_to_fit();
|
|
343
273
|
DFS.push_back(pot_patt[p]);
|
|
274
|
+
|
|
275
|
+
// Build the full pattern sequence
|
|
344
276
|
DFS.back().seq = _patt.seq;
|
|
345
277
|
DFS.back().seq.push_back(*it);
|
|
278
|
+
|
|
279
|
+
// Update candidate list
|
|
346
280
|
if (*it < 0)
|
|
347
281
|
DFS.back().list = slistp;
|
|
348
282
|
else
|
|
349
283
|
DFS.back().list = ilistp;
|
|
284
|
+
|
|
285
|
+
// Attach VPatterns if needed
|
|
350
286
|
if (!CTree.empty() && !pot_vpatt[p].str_pnt.empty()) {
|
|
351
|
-
pot_vpatt[p].ass_patt =
|
|
287
|
+
pot_vpatt[p].ass_patt = DFS.size() - 1;
|
|
352
288
|
VDFS.push_back(pot_vpatt[p]);
|
|
353
289
|
}
|
|
354
|
-
|
|
290
|
+
|
|
291
|
+
// ✅ Always record the pattern for Python, independent of b_disp / b_write.
|
|
292
|
+
collectedPatterns.push_back(DFS.back().seq);
|
|
293
|
+
|
|
294
|
+
// Original output behavior (only if requested)
|
|
295
|
+
if (b_disp || b_write)
|
|
355
296
|
Out_patt(DFS.back().seq, DFS.back().freq);
|
|
297
|
+
|
|
356
298
|
++num_patt;
|
|
357
299
|
}
|
|
300
|
+
|
|
358
301
|
}
|
|
359
302
|
|
|
303
|
+
void Mine_vec(unsigned long long int seq_ID, int pos, int num_found,
|
|
304
|
+
vector<unsigned long long int>& ancest,
|
|
305
|
+
vector<int>& items,
|
|
306
|
+
unsigned long long int pnt,
|
|
307
|
+
int sgn) {
|
|
360
308
|
|
|
361
|
-
|
|
362
|
-
int pos,
|
|
363
|
-
int num_found,
|
|
364
|
-
std::vector<std::uint64_t>& ancest,
|
|
365
|
-
std::vector<int>& items,
|
|
366
|
-
std::uint64_t pnt,
|
|
367
|
-
int sgn)
|
|
368
|
-
{
|
|
369
|
-
std::vector<bool> found(L + L * (ilist_nempty ? 1 : 0), false);
|
|
309
|
+
vector<bool> found(L + L * ilist_nempty, 0);
|
|
370
310
|
|
|
371
311
|
if (num_found == -1) {
|
|
372
|
-
while (pos <
|
|
312
|
+
while (pos < (int)items.size() && items[pos] > 0) {
|
|
373
313
|
int cur_itm = items[pos];
|
|
374
314
|
if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
|
|
375
|
-
if (pos + 1 <
|
|
315
|
+
if (pos + 1 < (int)items.size()) {
|
|
376
316
|
pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
|
|
377
317
|
pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (pos + 1));
|
|
378
318
|
}
|
|
379
319
|
++pot_patt[cur_itm + L - 1].freq;
|
|
380
|
-
found[cur_itm + L - 1] =
|
|
320
|
+
found[cur_itm + L - 1] = 1;
|
|
381
321
|
}
|
|
382
322
|
++pos;
|
|
383
323
|
}
|
|
384
324
|
}
|
|
385
325
|
|
|
386
326
|
for (unsigned int k = pos; k < items.size(); ++k) {
|
|
387
|
-
int cur_itm =
|
|
388
|
-
if (items[k] < 0)
|
|
327
|
+
int cur_itm = abs(items[k]);
|
|
328
|
+
if (items[k] < 0)
|
|
389
329
|
num_found = 0;
|
|
390
330
|
if (slist[cur_itm - 1] && !found[cur_itm - 1]) {
|
|
391
|
-
if (ancest.empty() ||
|
|
392
|
-
|
|
331
|
+
if (ancest.empty() ||
|
|
332
|
+
abs(Tree[ancest[cur_itm - 1]].itmset) <= abs(Tree[pnt].itmset)) {
|
|
333
|
+
if (k + 1 < (int)items.size()) {
|
|
393
334
|
pot_vpatt[cur_itm - 1].seq_ID.push_back(seq_ID);
|
|
394
335
|
pot_vpatt[cur_itm - 1].str_pnt.push_back(sgn * (k + 1));
|
|
395
336
|
}
|
|
396
337
|
++pot_patt[cur_itm - 1].freq;
|
|
397
338
|
}
|
|
398
|
-
found[cur_itm - 1] =
|
|
339
|
+
found[cur_itm - 1] = 1;
|
|
399
340
|
}
|
|
400
341
|
if (num_found == itmset_size) {
|
|
401
342
|
if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
|
|
402
|
-
if (ancest.empty() ||
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
343
|
+
if (ancest.empty() ||
|
|
344
|
+
abs(Tree[ancest[cur_itm - 1]].itmset) < abs(Tree[pnt].itmset) ||
|
|
345
|
+
!check_parent(ancest[cur_itm - 1], pnt,
|
|
346
|
+
last_strpnt[cur_itm - 1],
|
|
347
|
+
pot_patt[cur_itm + L - 1].str_pnt)) {
|
|
348
|
+
if (k + 1 < (int)items.size()) {
|
|
407
349
|
pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
|
|
408
350
|
pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (k + 1));
|
|
409
351
|
}
|
|
410
352
|
++pot_patt[cur_itm + L - 1].freq;
|
|
411
353
|
}
|
|
412
|
-
found[cur_itm + L - 1] =
|
|
354
|
+
found[cur_itm + L - 1] = 1;
|
|
413
355
|
}
|
|
414
356
|
}
|
|
415
|
-
else if (cur_itm ==
|
|
357
|
+
else if (cur_itm == abs(_patt.seq[last_neg + num_found]))
|
|
416
358
|
++num_found;
|
|
417
|
-
}
|
|
418
359
|
}
|
|
419
360
|
}
|
|
420
361
|
|
|
362
|
+
void Out_patt(vector<int>& seq, unsigned int freq) {
|
|
421
363
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
std::ofstream file_o;
|
|
425
|
-
if (b_write)
|
|
364
|
+
ofstream file_o;
|
|
365
|
+
if (b_write)
|
|
426
366
|
file_o.open(out_file, std::ios::app);
|
|
427
367
|
|
|
428
|
-
for (int ii = 0; ii <
|
|
368
|
+
for (int ii = 0; ii < (int)seq.size(); ii++) {
|
|
429
369
|
if (b_disp)
|
|
430
|
-
|
|
431
|
-
if (b_write)
|
|
370
|
+
cout << seq[ii] << " ";
|
|
371
|
+
if (b_write)
|
|
432
372
|
file_o << seq[ii] << " ";
|
|
433
373
|
}
|
|
434
374
|
if (b_disp)
|
|
435
|
-
|
|
375
|
+
cout << endl;
|
|
376
|
+
if (b_write)
|
|
377
|
+
file_o << endl;
|
|
378
|
+
|
|
379
|
+
if (b_disp)
|
|
380
|
+
cout << "************** Freq: " << freq << endl;
|
|
436
381
|
if (b_write) {
|
|
437
|
-
file_o <<
|
|
438
|
-
file_o << "************** Freq: " << freq << std::endl;
|
|
382
|
+
file_o << "************** Freq: " << freq << endl;
|
|
439
383
|
file_o.close();
|
|
440
384
|
}
|
|
441
|
-
if (b_disp)
|
|
442
|
-
std::cout << "************** Freq: " << freq << std::endl;
|
|
443
385
|
}
|
|
444
386
|
|
|
387
|
+
|
|
445
388
|
} // namespace largehm
|
|
@@ -1,77 +1,54 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
3
|
-
#include
|
|
4
|
-
#include
|
|
5
|
-
|
|
6
|
-
#include <fstream>
|
|
7
|
-
#include <ctime> // for clock_t
|
|
8
|
-
extern std::vector<std::uint64_t> ancest_base;
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "load_inst.hpp"
|
|
4
|
+
#include "build_mdd.hpp"
|
|
5
|
+
|
|
9
6
|
namespace largehm {
|
|
10
7
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
8
|
+
using namespace std;
|
|
9
|
+
|
|
10
|
+
void Freq_miner();
|
|
14
11
|
|
|
15
12
|
class Pattern {
|
|
16
13
|
public:
|
|
17
|
-
|
|
18
|
-
unsigned int
|
|
19
|
-
|
|
20
|
-
|
|
14
|
+
vector<int> seq;
|
|
15
|
+
vector<unsigned long long int> str_pnt;
|
|
16
|
+
vector<int> list;
|
|
17
|
+
unsigned long long int freq;
|
|
18
|
+
|
|
19
|
+
Pattern(vector<int>& _seq, int item) {
|
|
20
|
+
seq.swap(_seq);
|
|
21
|
+
seq.push_back(item);
|
|
22
|
+
freq = 0;
|
|
23
|
+
}
|
|
21
24
|
|
|
22
|
-
Pattern(int
|
|
23
|
-
|
|
24
|
-
|
|
25
|
+
Pattern(int item) {
|
|
26
|
+
seq.push_back(item);
|
|
27
|
+
freq = 0;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
Pattern() {
|
|
31
|
+
freq = 0;
|
|
25
32
|
}
|
|
26
33
|
};
|
|
27
34
|
|
|
28
35
|
class VPattern {
|
|
29
36
|
public:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
int
|
|
37
|
+
unsigned long long int ass_patt;
|
|
38
|
+
vector<int> str_pnt;
|
|
39
|
+
vector<unsigned long long int> seq_ID;
|
|
33
40
|
|
|
34
|
-
VPattern(
|
|
35
|
-
|
|
41
|
+
VPattern(unsigned long long int _patt) {
|
|
42
|
+
ass_patt = _patt;
|
|
43
|
+
}
|
|
36
44
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
extern std::vector<VPattern> VDFS;
|
|
45
|
+
VPattern() {
|
|
46
|
+
ass_patt = 0;
|
|
47
|
+
}
|
|
48
|
+
};
|
|
42
49
|
|
|
43
50
|
extern unsigned long long int num_patt;
|
|
44
|
-
|
|
45
|
-
extern
|
|
46
|
-
extern std::vector<bool> slist;
|
|
47
|
-
|
|
48
|
-
extern std::vector<Pattern> pot_patt;
|
|
49
|
-
extern std::vector<VPattern> pot_vpatt;
|
|
50
|
-
extern std::vector<unsigned long long int> last_strpnt;
|
|
51
|
-
|
|
52
|
-
extern std::vector<int> DFS_numfound;
|
|
53
|
-
|
|
54
|
-
extern Pattern _patt;
|
|
55
|
-
extern VPattern _vpatt;
|
|
56
|
-
|
|
57
|
-
extern int itmset_size;
|
|
58
|
-
extern int last_neg;
|
|
59
|
-
extern bool ilist_nempty;
|
|
60
|
-
|
|
61
|
-
//
|
|
62
|
-
// ─── Function Prototypes ─────────────────────────────────────────────────────
|
|
63
|
-
//
|
|
64
|
-
void Freq_miner();
|
|
65
|
-
void Extend_patt(Pattern& _patt);
|
|
66
|
-
void Mine_vec(std::uint64_t seq_ID,
|
|
67
|
-
int pos,
|
|
68
|
-
int num_found,
|
|
69
|
-
std::vector<std::uint64_t>& ancest,
|
|
70
|
-
std::vector<int>& items,
|
|
71
|
-
std::uint64_t pnt,
|
|
72
|
-
int sgn);
|
|
73
|
-
void Out_patt(std::vector<int>& seq, unsigned int freq);
|
|
51
|
+
extern vector<Pattern> DFS;
|
|
52
|
+
extern vector<VPattern> VDFS;
|
|
74
53
|
|
|
75
54
|
} // namespace largehm
|
|
76
|
-
|
|
77
|
-
#endif // LARGEHM_FREQ_MINER_HPP
|