effspm 0.2.7__cp310-cp310-macosx_11_0_arm64.whl → 0.3.0__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/_effspm.cpp +310 -240
- effspm/_effspm.cpython-310-darwin.so +0 -0
- effspm/btminer/src/build_mdd.cpp +42 -17
- effspm/btminer/src/build_mdd.hpp +13 -19
- effspm/btminer/src/freq_miner.cpp +134 -49
- effspm/btminer/src/freq_miner.hpp +16 -0
- effspm/btminer/src/load_inst.cpp +196 -121
- effspm/btminer/src/load_inst.hpp +22 -4
- effspm/btminer/src/utility.cpp +26 -41
- effspm/btminer/src/utility.hpp +6 -30
- effspm/freq_miner.hpp +2 -1
- effspm/htminer/src/build_mdd.cpp +33 -86
- effspm/largebm/src/build_mdd.cpp +69 -110
- effspm/largebm/src/build_mdd.hpp +22 -37
- effspm/largebm/src/freq_miner.cpp +241 -291
- effspm/largebm/src/freq_miner.hpp +25 -36
- effspm/largebm/src/load_inst.cpp +20 -26
- effspm/largebm/src/load_inst.hpp +24 -34
- effspm/largebm/src/utility.cpp +11 -21
- effspm/largebm/src/utility.hpp +7 -10
- effspm/largehm/src/freq_miner.cpp +62 -78
- effspm/largehm/src/load_inst.cpp +79 -61
- effspm/largepp/src/freq_miner.cpp +184 -156
- effspm/largepp/src/freq_miner.hpp +11 -36
- effspm/largepp/src/load_inst.cpp +27 -8
- effspm/largepp/src/load_inst.hpp +15 -9
- effspm/largepp/src/pattern.hpp +31 -0
- effspm/load_inst.hpp +1 -1
- {effspm-0.2.7.dist-info → effspm-0.3.0.dist-info}/METADATA +1 -1
- effspm-0.3.0.dist-info/RECORD +54 -0
- effspm-0.2.7.dist-info/RECORD +0 -53
- {effspm-0.2.7.dist-info → effspm-0.3.0.dist-info}/WHEEL +0 -0
- {effspm-0.2.7.dist-info → effspm-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.2.7.dist-info → effspm-0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -1,26 +1,33 @@
|
|
|
1
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
2
|
+
// NEW CHANGE (2025-10-24):
|
|
3
|
+
// - Always call Out_patt(...) so patterns are collected regardless of verbosity.
|
|
4
|
+
// - Printing/writing remains guarded inside Out_patt by b_disp/b_write.
|
|
5
|
+
// - This fixes LargeHT returning 0 patterns when verbose=False.
|
|
6
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
1
7
|
|
|
2
8
|
#include <cstdint>
|
|
3
9
|
#include <vector>
|
|
4
10
|
|
|
5
11
|
#include <iostream>
|
|
6
12
|
#include <time.h>
|
|
7
|
-
// for std::vector
|
|
8
13
|
#include <cmath> // for std::ceil
|
|
9
14
|
|
|
10
15
|
#include "freq_miner.hpp"
|
|
11
16
|
#include "build_mdd.hpp"
|
|
12
17
|
#include "utility.hpp"
|
|
18
|
+
|
|
13
19
|
std::vector<std::uint64_t> ancest_base;
|
|
20
|
+
|
|
14
21
|
namespace largehm {
|
|
15
22
|
|
|
16
23
|
void Out_patt(std::vector<int>& seq, unsigned int freq);
|
|
17
24
|
void Extend_patt(Pattern& _patt);
|
|
18
|
-
void Mine_vec(
|
|
25
|
+
void Mine_vec(std::uint64_t seq_ID,
|
|
19
26
|
int pos,
|
|
20
27
|
int num_found,
|
|
21
|
-
|
|
28
|
+
std::vector<std::uint64_t>& ancest,
|
|
22
29
|
std::vector<int>& items,
|
|
23
|
-
|
|
30
|
+
std::uint64_t pnt,
|
|
24
31
|
int sgn);
|
|
25
32
|
|
|
26
33
|
unsigned long long int num_patt = 0;
|
|
@@ -28,25 +35,22 @@ unsigned long long int num_patt = 0;
|
|
|
28
35
|
std::vector<bool> ilist;
|
|
29
36
|
std::vector<bool> slist;
|
|
30
37
|
|
|
31
|
-
std::vector<Pattern>
|
|
38
|
+
std::vector<Pattern> pot_patt;
|
|
32
39
|
std::vector<VPattern> pot_vpatt;
|
|
33
40
|
std::vector<unsigned long long int> last_strpnt;
|
|
34
41
|
|
|
35
42
|
std::vector<int> DFS_numfound;
|
|
36
43
|
|
|
37
|
-
Pattern
|
|
44
|
+
Pattern _patt;
|
|
38
45
|
VPattern _vpatt;
|
|
39
46
|
|
|
40
|
-
int
|
|
41
|
-
int
|
|
47
|
+
int itmset_size;
|
|
48
|
+
int last_neg;
|
|
42
49
|
|
|
43
50
|
bool ilist_nempty;
|
|
44
51
|
|
|
45
|
-
|
|
46
52
|
void Freq_miner() {
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
// ─── Make sure DFS and VDFS are at least size L ─────────────────────────────
|
|
53
|
+
// Ensure DFS and VDFS are at least size L
|
|
50
54
|
if (DFS.size() < static_cast<size_t>(L)) {
|
|
51
55
|
size_t old = DFS.size();
|
|
52
56
|
DFS.resize(static_cast<size_t>(L));
|
|
@@ -61,19 +65,6 @@ void Freq_miner() {
|
|
|
61
65
|
VDFS[i] = VPattern(static_cast<int>(i));
|
|
62
66
|
}
|
|
63
67
|
}
|
|
64
|
-
// ─────────────────────────────────────────────────────────────────────────────
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
if (!Tree.empty()) {
|
|
68
|
-
// std::cout << ", Tree[0].chld=" << Tree[0].chld
|
|
69
|
-
// << ", Tree[0].sibl=" << Tree[0].sibl
|
|
70
|
-
// << ", Tree[0].freq=" << Tree[0].freq;
|
|
71
|
-
}
|
|
72
|
-
// std::cout << ", DFS.size()=" << DFS.size()
|
|
73
|
-
// << ", theta=" << theta
|
|
74
|
-
// << ", M=" << M
|
|
75
|
-
// << ", E=" << E
|
|
76
|
-
// << std::endl;
|
|
77
68
|
|
|
78
69
|
std::vector<int> tmp_list;
|
|
79
70
|
for (int i = 0; i < static_cast<int>(L); ++i) {
|
|
@@ -84,7 +75,6 @@ void Freq_miner() {
|
|
|
84
75
|
}
|
|
85
76
|
}
|
|
86
77
|
}
|
|
87
|
-
|
|
88
78
|
for (int i = 0; i < static_cast<int>(DFS.size()); ++i) {
|
|
89
79
|
DFS[i].list = tmp_list;
|
|
90
80
|
}
|
|
@@ -92,8 +82,7 @@ void Freq_miner() {
|
|
|
92
82
|
while (!DFS.empty() && give_time(std::clock() - start_time) < time_limit) {
|
|
93
83
|
if (DFS.back().freq >= theta) {
|
|
94
84
|
Extend_patt(DFS.back());
|
|
95
|
-
}
|
|
96
|
-
else {
|
|
85
|
+
} else {
|
|
97
86
|
DFS.pop_back();
|
|
98
87
|
if (!VDFS.empty() && VDFS.back().ass_patt == static_cast<int>(DFS.size())) {
|
|
99
88
|
VDFS.pop_back();
|
|
@@ -102,7 +91,6 @@ void Freq_miner() {
|
|
|
102
91
|
}
|
|
103
92
|
}
|
|
104
93
|
|
|
105
|
-
|
|
106
94
|
void Extend_patt(Pattern& _pattern) {
|
|
107
95
|
swap(_patt, _pattern);
|
|
108
96
|
DFS.pop_back();
|
|
@@ -120,8 +108,7 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
120
108
|
ilist_nempty = true;
|
|
121
109
|
}
|
|
122
110
|
}
|
|
123
|
-
}
|
|
124
|
-
else {
|
|
111
|
+
} else {
|
|
125
112
|
for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it)
|
|
126
113
|
slist[-(*it) - 1] = true;
|
|
127
114
|
}
|
|
@@ -149,8 +136,7 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
149
136
|
CTree[_vpatt.seq_ID[pnt]].seq,
|
|
150
137
|
0,
|
|
151
138
|
-1);
|
|
152
|
-
}
|
|
153
|
-
else {
|
|
139
|
+
} else {
|
|
154
140
|
Mine_vec(_vpatt.seq_ID[pnt],
|
|
155
141
|
_vpatt.str_pnt[pnt],
|
|
156
142
|
-1,
|
|
@@ -174,18 +160,14 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
174
160
|
DFS_itm.pop_back();
|
|
175
161
|
if (Tree[cur_sibl].itmset < 0) {
|
|
176
162
|
unsigned int carc = Tree[cur_sibl].chld;
|
|
177
|
-
Mine_vec(carc,
|
|
178
|
-
0,
|
|
179
|
-
-1,
|
|
163
|
+
Mine_vec(carc, 0, -1,
|
|
180
164
|
CTree[carc].ancest,
|
|
181
165
|
CTree[carc].seq,
|
|
182
166
|
_patt.str_pnt[pnt],
|
|
183
167
|
-1);
|
|
184
168
|
cur_sibl = CTree[carc].ancest.back();
|
|
185
169
|
while (cur_sibl != 0) {
|
|
186
|
-
Mine_vec(cur_sibl - 1,
|
|
187
|
-
0,
|
|
188
|
-
-1,
|
|
170
|
+
Mine_vec(cur_sibl - 1, 0, -1,
|
|
189
171
|
CTree[carc].ancest,
|
|
190
172
|
VTree[cur_sibl - 1].seq,
|
|
191
173
|
_patt.str_pnt[pnt],
|
|
@@ -213,8 +195,7 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
213
195
|
DFS_numfound.push_back(0);
|
|
214
196
|
}
|
|
215
197
|
}
|
|
216
|
-
}
|
|
217
|
-
else {
|
|
198
|
+
} else {
|
|
218
199
|
if (ilist[cur_itm - 1]) {
|
|
219
200
|
pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
|
|
220
201
|
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
@@ -226,13 +207,15 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
226
207
|
cur_sibl = Tree[cur_sibl].sibl;
|
|
227
208
|
}
|
|
228
209
|
}
|
|
210
|
+
|
|
229
211
|
if (ilist_nempty) {
|
|
230
212
|
for (int i = 0; i < (int)L; ++i) {
|
|
231
213
|
if (ilist[i])
|
|
232
214
|
last_strpnt[i] = pot_patt[i + L].str_pnt.size();
|
|
233
215
|
}
|
|
234
216
|
}
|
|
235
|
-
|
|
217
|
+
|
|
218
|
+
while (!DFS_seq.empty()) {
|
|
236
219
|
unsigned long long int cur_sibl = DFS_seq.back();
|
|
237
220
|
DFS_seq.pop_back();
|
|
238
221
|
int num_found = 0;
|
|
@@ -242,18 +225,14 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
242
225
|
}
|
|
243
226
|
if (Tree[cur_sibl].itmset < 0) {
|
|
244
227
|
unsigned int carc = Tree[cur_sibl].chld;
|
|
245
|
-
Mine_vec(carc,
|
|
246
|
-
0,
|
|
247
|
-
num_found,
|
|
228
|
+
Mine_vec(carc, 0, num_found,
|
|
248
229
|
CTree[carc].ancest,
|
|
249
230
|
CTree[carc].seq,
|
|
250
231
|
_patt.str_pnt[pnt],
|
|
251
232
|
-1);
|
|
252
233
|
cur_sibl = CTree[carc].ancest.back();
|
|
253
234
|
while (cur_sibl != 0) {
|
|
254
|
-
Mine_vec(cur_sibl - 1,
|
|
255
|
-
0,
|
|
256
|
-
num_found,
|
|
235
|
+
Mine_vec(cur_sibl - 1, 0, num_found,
|
|
257
236
|
CTree[carc].ancest,
|
|
258
237
|
VTree[cur_sibl - 1].seq,
|
|
259
238
|
_patt.str_pnt[pnt],
|
|
@@ -268,19 +247,17 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
268
247
|
if (cur_itm > 0) {
|
|
269
248
|
if (num_found == itmset_size &&
|
|
270
249
|
ilist[cur_itm - 1] &&
|
|
271
|
-
(std::abs(Tree[Tree[cur_sibl].anct].itmset) < std::abs(Tree[_patt.str_pnt[pnt]].itmset)
|
|
250
|
+
(std::abs(Tree[Tree[cur_sibl].anct].itmset) < std::abs(Tree[_patt.str_pnt[pnt]].itmset)
|
|
272
251
|
|| !check_parent(Tree[cur_sibl].anct,
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
{
|
|
252
|
+
_patt.str_pnt[pnt],
|
|
253
|
+
last_strpnt[cur_itm - 1],
|
|
254
|
+
pot_patt[cur_itm + L - 1].str_pnt))) {
|
|
277
255
|
pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
|
|
278
256
|
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
279
257
|
pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
|
|
280
258
|
}
|
|
281
259
|
if (slist[cur_itm - 1] &&
|
|
282
|
-
std::abs(Tree[Tree[cur_sibl].anct].itmset) <= std::abs(Tree[_patt.str_pnt[pnt]].itmset))
|
|
283
|
-
{
|
|
260
|
+
std::abs(Tree[Tree[cur_sibl].anct].itmset) <= std::abs(Tree[_patt.str_pnt[pnt]].itmset)) {
|
|
284
261
|
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
285
262
|
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
286
263
|
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
@@ -288,19 +265,17 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
288
265
|
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
|
|
289
266
|
DFS_seq.push_back(cur_sibl);
|
|
290
267
|
if (ilist_nempty) {
|
|
291
|
-
if (num_found < itmset_size
|
|
292
|
-
|
|
268
|
+
if (num_found < itmset_size &&
|
|
269
|
+
cur_itm == std::abs(_patt.seq[last_neg + num_found]))
|
|
293
270
|
DFS_numfound.push_back(num_found + 1);
|
|
294
271
|
else
|
|
295
272
|
DFS_numfound.push_back(num_found);
|
|
296
273
|
}
|
|
297
274
|
}
|
|
298
|
-
}
|
|
299
|
-
else {
|
|
275
|
+
} else {
|
|
300
276
|
cur_itm = -cur_itm;
|
|
301
277
|
if (slist[cur_itm - 1] &&
|
|
302
|
-
std::abs(Tree[Tree[cur_sibl].anct].itmset) <= std::abs(Tree[_patt.str_pnt[pnt]].itmset))
|
|
303
|
-
{
|
|
278
|
+
std::abs(Tree[Tree[cur_sibl].anct].itmset) <= std::abs(Tree[_patt.str_pnt[pnt]].itmset)) {
|
|
304
279
|
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
305
280
|
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
306
281
|
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
@@ -323,22 +298,23 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
323
298
|
std::vector<int> ilistp;
|
|
324
299
|
std::vector<int> slistp;
|
|
325
300
|
for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
|
|
326
|
-
if (*it > 0 && pot_patt[(*it) + L - 1].freq >= theta)
|
|
301
|
+
if (*it > 0 && pot_patt[(*it) + L - 1].freq >= theta)
|
|
327
302
|
ilistp.push_back(*it);
|
|
328
303
|
else if (*it < 0 && pot_patt[-(*it) - 1].freq >= theta) {
|
|
329
|
-
if (itmset_exists)
|
|
304
|
+
if (itmset_exists)
|
|
330
305
|
slistp.push_back(-(*it));
|
|
331
306
|
ilistp.push_back(*it);
|
|
332
307
|
slistp.push_back(*it);
|
|
333
308
|
}
|
|
334
|
-
}
|
|
309
|
+
}
|
|
335
310
|
|
|
336
311
|
for (auto it = ilistp.begin(); it != ilistp.end(); ++it) {
|
|
337
312
|
int p;
|
|
338
|
-
if (*it < 0)
|
|
313
|
+
if (*it < 0)
|
|
339
314
|
p = -(*it) - 1;
|
|
340
315
|
else
|
|
341
316
|
p = (*it) - 1 + L;
|
|
317
|
+
|
|
342
318
|
pot_patt[p].str_pnt.shrink_to_fit();
|
|
343
319
|
DFS.push_back(pot_patt[p]);
|
|
344
320
|
DFS.back().seq = _patt.seq;
|
|
@@ -347,17 +323,23 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
347
323
|
DFS.back().list = slistp;
|
|
348
324
|
else
|
|
349
325
|
DFS.back().list = ilistp;
|
|
326
|
+
|
|
350
327
|
if (!CTree.empty() && !pot_vpatt[p].str_pnt.empty()) {
|
|
351
328
|
pot_vpatt[p].ass_patt = static_cast<int>(DFS.size()) - 1;
|
|
352
329
|
VDFS.push_back(pot_vpatt[p]);
|
|
353
330
|
}
|
|
354
|
-
|
|
355
|
-
|
|
331
|
+
|
|
332
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
333
|
+
// NEW CHANGE: Call Out_patt ALWAYS to populate `collected` even when
|
|
334
|
+
// verbose is false and we are not writing to a file.
|
|
335
|
+
// Out_patt itself guards printing/writing with b_disp/b_write.
|
|
336
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
337
|
+
Out_patt(DFS.back().seq, DFS.back().freq);
|
|
338
|
+
|
|
356
339
|
++num_patt;
|
|
357
340
|
}
|
|
358
341
|
}
|
|
359
342
|
|
|
360
|
-
|
|
361
343
|
void Mine_vec(std::uint64_t seq_ID,
|
|
362
344
|
int pos,
|
|
363
345
|
int num_found,
|
|
@@ -377,7 +359,7 @@ void Mine_vec(std::uint64_t seq_ID,
|
|
|
377
359
|
pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (pos + 1));
|
|
378
360
|
}
|
|
379
361
|
++pot_patt[cur_itm + L - 1].freq;
|
|
380
|
-
found[cur_itm + L - 1] = true;
|
|
362
|
+
found[cur_itm + L - 1] = true;
|
|
381
363
|
}
|
|
382
364
|
++pos;
|
|
383
365
|
}
|
|
@@ -385,8 +367,9 @@ void Mine_vec(std::uint64_t seq_ID,
|
|
|
385
367
|
|
|
386
368
|
for (unsigned int k = pos; k < items.size(); ++k) {
|
|
387
369
|
int cur_itm = std::abs(items[k]);
|
|
388
|
-
if (items[k] < 0)
|
|
370
|
+
if (items[k] < 0)
|
|
389
371
|
num_found = 0;
|
|
372
|
+
|
|
390
373
|
if (slist[cur_itm - 1] && !found[cur_itm - 1]) {
|
|
391
374
|
if (ancest.empty() || std::abs(Tree[ancest[cur_itm - 1]].itmset) <= std::abs(Tree[pnt].itmset)) {
|
|
392
375
|
if (k + 1 < static_cast<int>(items.size())) {
|
|
@@ -397,11 +380,12 @@ void Mine_vec(std::uint64_t seq_ID,
|
|
|
397
380
|
}
|
|
398
381
|
found[cur_itm - 1] = true;
|
|
399
382
|
}
|
|
383
|
+
|
|
400
384
|
if (num_found == itmset_size) {
|
|
401
385
|
if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
|
|
402
|
-
if (ancest.empty() ||
|
|
403
|
-
std::abs(Tree[ancest[cur_itm - 1]].itmset) < std::abs(Tree[pnt].itmset)
|
|
404
|
-
|
|
386
|
+
if (ancest.empty() ||
|
|
387
|
+
std::abs(Tree[ancest[cur_itm - 1]].itmset) < std::abs(Tree[pnt].itmset) ||
|
|
388
|
+
!check_parent(ancest[cur_itm - 1], pnt, last_strpnt[cur_itm - 1], pot_patt[cur_itm + L - 1].str_pnt))
|
|
405
389
|
{
|
|
406
390
|
if (k + 1 < static_cast<int>(items.size())) {
|
|
407
391
|
pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
|
|
@@ -411,24 +395,24 @@ void Mine_vec(std::uint64_t seq_ID,
|
|
|
411
395
|
}
|
|
412
396
|
found[cur_itm + L - 1] = true;
|
|
413
397
|
}
|
|
414
|
-
}
|
|
415
|
-
else if (cur_itm == std::abs(_patt.seq[last_neg + num_found])) {
|
|
398
|
+
} else if (cur_itm == std::abs(_patt.seq[last_neg + num_found])) {
|
|
416
399
|
++num_found;
|
|
417
400
|
}
|
|
418
401
|
}
|
|
419
402
|
}
|
|
420
403
|
|
|
421
|
-
|
|
422
404
|
void Out_patt(std::vector<int>& seq, unsigned int freq) {
|
|
405
|
+
// Always collect:
|
|
423
406
|
largehm::collected.push_back(seq);
|
|
407
|
+
|
|
424
408
|
std::ofstream file_o;
|
|
425
|
-
if (b_write)
|
|
409
|
+
if (b_write)
|
|
426
410
|
file_o.open(out_file, std::ios::app);
|
|
427
411
|
|
|
428
412
|
for (int ii = 0; ii < static_cast<int>(seq.size()); ii++) {
|
|
429
413
|
if (b_disp)
|
|
430
414
|
std::cout << seq[ii] << " ";
|
|
431
|
-
if (b_write)
|
|
415
|
+
if (b_write)
|
|
432
416
|
file_o << seq[ii] << " ";
|
|
433
417
|
}
|
|
434
418
|
if (b_disp)
|