effspm 0.3.0__cp310-cp310-macosx_11_0_arm64.whl → 0.3.3__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/_effspm.cpp +683 -2
- effspm/_effspm.cpython-310-darwin.so +0 -0
- effspm/btminer/src/load_inst.cpp +21 -11
- effspm/btminer/src/main.cpp +83 -0
- effspm/htminer/src/build_mdd.cpp +41 -66
- effspm/htminer/src/build_mdd.hpp +56 -49
- effspm/htminer/src/freq_miner.cpp +341 -307
- effspm/htminer/src/freq_miner.hpp +39 -40
- effspm/htminer/src/load_inst.cpp +287 -336
- effspm/htminer/src/load_inst.hpp +23 -6
- effspm/htminer/src/main.cpp +97 -0
- effspm/htminer/src/utility.cpp +38 -57
- effspm/htminer/src/utility.hpp +9 -64
- effspm/largebm/src/main.cpp +95 -0
- effspm/largehm/src/build_mdd.cpp +75 -110
- effspm/largehm/src/build_mdd.hpp +53 -73
- effspm/largehm/src/freq_miner.cpp +132 -173
- effspm/largehm/src/freq_miner.hpp +37 -60
- effspm/largehm/src/load_inst.cpp +136 -191
- effspm/largehm/src/load_inst.hpp +13 -50
- effspm/largehm/src/main.cpp +95 -0
- effspm/largehm/src/utility.cpp +46 -28
- effspm/largehm/src/utility.hpp +18 -16
- effspm/largepp/src/load_inst.cpp +5 -4
- effspm/largepp/src/main.cpp +108 -0
- effspm/load_inst.cpp +8 -8
- effspm/main.cpp +103 -0
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
- effspm-0.3.3.dist-info/RECORD +60 -0
- effspm-0.3.0.dist-info/RECORD +0 -54
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
effspm/largehm/src/build_mdd.hpp
CHANGED
|
@@ -1,93 +1,73 @@
|
|
|
1
|
-
#
|
|
2
|
-
#define LARGEHM_BUILD_MDD_HPP
|
|
1
|
+
#pragma once
|
|
3
2
|
|
|
4
3
|
#include <vector>
|
|
5
|
-
#include <
|
|
6
|
-
#include
|
|
7
|
-
#include <cstdint> // for uint64_t
|
|
8
|
-
|
|
9
|
-
#include "load_inst.hpp" // defines L, DFS, VDFS, Tree, etc.
|
|
10
|
-
#include "freq_miner.hpp" // for Pattern, VPattern
|
|
11
|
-
#include "utility.hpp" // if you need check_parent or collected
|
|
4
|
+
#include <cmath>
|
|
5
|
+
#include "load_inst.hpp"
|
|
12
6
|
|
|
13
7
|
namespace largehm {
|
|
14
8
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
//
|
|
37
|
-
|
|
38
|
-
int Add_arc(int item,
|
|
39
|
-
std::uint64_t last_arc,
|
|
40
|
-
int& itmset,
|
|
41
|
-
std::unordered_map<int, std::uint64_t>& ancest_map);
|
|
42
|
-
|
|
43
|
-
void Add_vec(std::vector<int>& items_lim,
|
|
44
|
-
std::unordered_map<int, std::uint64_t>& ancest_map,
|
|
45
|
-
std::uint64_t last_arc,
|
|
46
|
-
int itmset);
|
|
47
|
-
|
|
48
|
-
//
|
|
49
|
-
// ─── Struct Definitions ───────────────────────────────────────────────────────
|
|
50
|
-
//
|
|
51
|
-
|
|
52
|
-
struct Arc {
|
|
53
|
-
int item;
|
|
54
|
-
int itmset;
|
|
55
|
-
std::uint64_t anct;
|
|
56
|
-
std::uint64_t chld;
|
|
57
|
-
std::uint64_t sibl;
|
|
58
|
-
unsigned long long freq;
|
|
9
|
+
using namespace std;
|
|
10
|
+
|
|
11
|
+
void Build_MDD(vector<int>& items, vector<int>& items_lim);
|
|
12
|
+
|
|
13
|
+
class Arc {
|
|
14
|
+
public:
|
|
15
|
+
unsigned long long int chld;
|
|
16
|
+
unsigned long long int sibl;
|
|
17
|
+
unsigned long long int freq;
|
|
18
|
+
unsigned long long int anct;
|
|
19
|
+
int itmset;
|
|
20
|
+
int item;
|
|
21
|
+
|
|
22
|
+
Arc(unsigned int _itm, int _itmset, unsigned long long int _anc) {
|
|
23
|
+
chld = 0;
|
|
24
|
+
sibl = 0;
|
|
25
|
+
freq = 0;
|
|
26
|
+
itmset = _itmset;
|
|
27
|
+
anct = _anc;
|
|
28
|
+
item = _itm;
|
|
29
|
+
}
|
|
59
30
|
|
|
60
|
-
Arc(
|
|
61
|
-
|
|
62
|
-
|
|
31
|
+
Arc() {
|
|
32
|
+
chld = 0;
|
|
33
|
+
sibl = 0;
|
|
34
|
+
freq = 0;
|
|
35
|
+
anct = 0;
|
|
36
|
+
itmset = 0;
|
|
37
|
+
item = 0;
|
|
38
|
+
}
|
|
63
39
|
};
|
|
64
40
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
41
|
+
class VArc {
|
|
42
|
+
public:
|
|
43
|
+
unsigned long long int sibl;
|
|
44
|
+
vector<int> seq;
|
|
69
45
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
{
|
|
46
|
+
VArc(vector<int>& items, unsigned long long int _sib) {
|
|
47
|
+
sibl = _sib;
|
|
73
48
|
seq.swap(items);
|
|
74
49
|
}
|
|
50
|
+
|
|
51
|
+
VArc() {
|
|
52
|
+
sibl = 0;
|
|
53
|
+
}
|
|
75
54
|
};
|
|
76
55
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
unsigned long long
|
|
56
|
+
class CArc {
|
|
57
|
+
public:
|
|
58
|
+
vector<int> seq;
|
|
59
|
+
vector<unsigned long long int> ancest;
|
|
81
60
|
|
|
82
|
-
|
|
83
|
-
std::vector<int>& items)
|
|
84
|
-
: ancest(), seq(), freq(0u)
|
|
85
|
-
{
|
|
61
|
+
CArc(vector<unsigned long long int>& _anc, vector<int>& items) {
|
|
86
62
|
ancest.swap(_anc);
|
|
87
63
|
seq.swap(items);
|
|
88
64
|
}
|
|
65
|
+
|
|
66
|
+
CArc() = default;
|
|
89
67
|
};
|
|
90
68
|
|
|
91
|
-
|
|
69
|
+
extern vector<Arc> Tree;
|
|
70
|
+
extern vector<VArc> VTree;
|
|
71
|
+
extern vector<CArc> CTree;
|
|
92
72
|
|
|
93
|
-
|
|
73
|
+
} // namespace largehm
|
|
@@ -1,116 +1,89 @@
|
|
|
1
|
-
// ─────────────────────────────────────────────────────────────────────────────
|
|
2
|
-
// NEW CHANGE (2025-10-24):
|
|
3
|
-
// - Always call Out_patt(...) so patterns are collected regardless of verbosity.
|
|
4
|
-
// - Printing/writing remains guarded inside Out_patt by b_disp/b_write.
|
|
5
|
-
// - This fixes LargeHT returning 0 patterns when verbose=False.
|
|
6
|
-
// ─────────────────────────────────────────────────────────────────────────────
|
|
7
|
-
|
|
8
|
-
#include <cstdint>
|
|
9
|
-
#include <vector>
|
|
10
|
-
|
|
11
1
|
#include <iostream>
|
|
12
2
|
#include <time.h>
|
|
13
|
-
#include <cmath> // for std::ceil
|
|
14
|
-
|
|
15
3
|
#include "freq_miner.hpp"
|
|
16
4
|
#include "build_mdd.hpp"
|
|
17
5
|
#include "utility.hpp"
|
|
18
6
|
|
|
19
|
-
std::vector<std::uint64_t> ancest_base;
|
|
20
|
-
|
|
21
7
|
namespace largehm {
|
|
22
8
|
|
|
23
|
-
|
|
9
|
+
using namespace std;
|
|
10
|
+
|
|
11
|
+
void Out_patt(vector<int>& seq, unsigned int freq);
|
|
24
12
|
void Extend_patt(Pattern& _patt);
|
|
25
|
-
void Mine_vec(
|
|
26
|
-
int
|
|
27
|
-
int
|
|
28
|
-
|
|
29
|
-
std::vector<int>& items,
|
|
30
|
-
std::uint64_t pnt,
|
|
13
|
+
void Mine_vec(unsigned long long int seq_ID, int pos, int num_found,
|
|
14
|
+
vector<unsigned long long int>& ancest,
|
|
15
|
+
vector<int>& items,
|
|
16
|
+
unsigned long long int inod,
|
|
31
17
|
int sgn);
|
|
32
18
|
|
|
33
19
|
unsigned long long int num_patt = 0;
|
|
34
20
|
|
|
35
|
-
|
|
36
|
-
|
|
21
|
+
vector<bool> ilist;
|
|
22
|
+
vector<bool> slist;
|
|
37
23
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
24
|
+
vector<Pattern> pot_patt;
|
|
25
|
+
vector<VPattern> pot_vpatt;
|
|
26
|
+
vector<unsigned long long int> last_strpnt;
|
|
27
|
+
vector<unsigned long long int> ancest_base;
|
|
28
|
+
vector<int> DFS_numfound;
|
|
43
29
|
|
|
44
30
|
Pattern _patt;
|
|
45
31
|
VPattern _vpatt;
|
|
46
32
|
|
|
47
|
-
int
|
|
48
|
-
int
|
|
49
|
-
|
|
33
|
+
int itmset_size;
|
|
34
|
+
int last_neg;
|
|
50
35
|
bool ilist_nempty;
|
|
51
36
|
|
|
52
37
|
void Freq_miner() {
|
|
53
|
-
// Ensure DFS and VDFS are at least size L
|
|
54
|
-
if (DFS.size() < static_cast<size_t>(L)) {
|
|
55
|
-
size_t old = DFS.size();
|
|
56
|
-
DFS.resize(static_cast<size_t>(L));
|
|
57
|
-
for (size_t i = old; i < DFS.size(); ++i) {
|
|
58
|
-
DFS[i] = Pattern(-static_cast<int>(i) - 1);
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
if (VDFS.size() < static_cast<size_t>(L)) {
|
|
62
|
-
size_t old = VDFS.size();
|
|
63
|
-
VDFS.resize(static_cast<size_t>(L));
|
|
64
|
-
for (size_t i = old; i < VDFS.size(); ++i) {
|
|
65
|
-
VDFS[i] = VPattern(static_cast<int>(i));
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
38
|
|
|
69
|
-
|
|
70
|
-
|
|
39
|
+
vector<int> list;
|
|
40
|
+
|
|
41
|
+
for (int i = 0; i < (int)L; ++i) {
|
|
71
42
|
if (DFS[i].freq >= theta) {
|
|
72
|
-
|
|
73
|
-
if (itmset_exists)
|
|
74
|
-
|
|
75
|
-
}
|
|
43
|
+
list.push_back(-i - 1);
|
|
44
|
+
if (itmset_exists)
|
|
45
|
+
list.push_back(i + 1);
|
|
76
46
|
}
|
|
77
47
|
}
|
|
78
|
-
for (int i = 0; i < static_cast<int>(DFS.size()); ++i) {
|
|
79
|
-
DFS[i].list = tmp_list;
|
|
80
|
-
}
|
|
81
48
|
|
|
82
|
-
|
|
83
|
-
|
|
49
|
+
for (int i = 0; i < (int)DFS.size(); ++i)
|
|
50
|
+
DFS[i].list = list;
|
|
51
|
+
|
|
52
|
+
while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
|
|
53
|
+
if (DFS.back().freq >= theta)
|
|
84
54
|
Extend_patt(DFS.back());
|
|
85
|
-
|
|
55
|
+
else {
|
|
86
56
|
DFS.pop_back();
|
|
87
|
-
if (!VDFS.empty() && VDFS.back().ass_patt ==
|
|
57
|
+
if (!VDFS.empty() && VDFS.back().ass_patt == DFS.size())
|
|
88
58
|
VDFS.pop_back();
|
|
89
|
-
}
|
|
90
59
|
}
|
|
91
60
|
}
|
|
92
61
|
}
|
|
93
62
|
|
|
94
63
|
void Extend_patt(Pattern& _pattern) {
|
|
64
|
+
|
|
95
65
|
swap(_patt, _pattern);
|
|
96
66
|
DFS.pop_back();
|
|
97
67
|
|
|
98
|
-
slist =
|
|
99
|
-
ilist_nempty =
|
|
68
|
+
slist = vector<bool>(L, 0);
|
|
69
|
+
ilist_nempty = 0;
|
|
100
70
|
|
|
101
71
|
if (itmset_exists) {
|
|
102
|
-
ilist =
|
|
103
|
-
for (
|
|
72
|
+
ilist = vector<bool>(L, 0);
|
|
73
|
+
for (vector<int>::iterator it = _patt.list.begin();
|
|
74
|
+
it != _patt.list.end(); ++it) {
|
|
104
75
|
if (*it < 0)
|
|
105
|
-
slist[-(*it) - 1] =
|
|
76
|
+
slist[-(*it) - 1] = 1;
|
|
106
77
|
else {
|
|
107
|
-
ilist[(*it) - 1] =
|
|
108
|
-
ilist_nempty =
|
|
78
|
+
ilist[(*it) - 1] = 1;
|
|
79
|
+
ilist_nempty = 1;
|
|
109
80
|
}
|
|
110
81
|
}
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
for (vector<int>::iterator it = _patt.list.begin();
|
|
85
|
+
it != _patt.list.end(); ++it)
|
|
86
|
+
slist[-(*it) - 1] = 1;
|
|
114
87
|
}
|
|
115
88
|
|
|
116
89
|
last_neg = _patt.seq.size() - 1;
|
|
@@ -118,38 +91,27 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
118
91
|
--last_neg;
|
|
119
92
|
itmset_size = _patt.seq.size() - last_neg;
|
|
120
93
|
|
|
121
|
-
pot_patt =
|
|
94
|
+
pot_patt = vector<Pattern>(L + L * ilist_nempty);
|
|
122
95
|
if (!CTree.empty())
|
|
123
|
-
pot_vpatt =
|
|
96
|
+
pot_vpatt = vector<VPattern>(L + L * ilist_nempty);
|
|
124
97
|
|
|
125
|
-
last_strpnt =
|
|
98
|
+
last_strpnt = vector<unsigned long long int>(L, 0);
|
|
126
99
|
|
|
127
|
-
if (!VDFS.empty() && VDFS.back().ass_patt ==
|
|
100
|
+
if (!VDFS.empty() && VDFS.back().ass_patt == DFS.size()) {
|
|
128
101
|
swap(_vpatt, VDFS.back());
|
|
129
102
|
VDFS.pop_back();
|
|
130
103
|
for (unsigned long long int pnt = 0; pnt < _vpatt.str_pnt.size(); ++pnt) {
|
|
131
|
-
if (_vpatt.str_pnt[pnt] < 0)
|
|
132
|
-
Mine_vec(_vpatt.seq_ID[pnt],
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
0,
|
|
138
|
-
-1);
|
|
139
|
-
} else {
|
|
140
|
-
Mine_vec(_vpatt.seq_ID[pnt],
|
|
141
|
-
_vpatt.str_pnt[pnt],
|
|
142
|
-
-1,
|
|
143
|
-
ancest_base,
|
|
144
|
-
VTree[_vpatt.seq_ID[pnt]].seq,
|
|
145
|
-
0,
|
|
146
|
-
1);
|
|
147
|
-
}
|
|
104
|
+
if (_vpatt.str_pnt[pnt] < 0)
|
|
105
|
+
Mine_vec(_vpatt.seq_ID[pnt], -_vpatt.str_pnt[pnt], -1,
|
|
106
|
+
ancest_base, CTree[_vpatt.seq_ID[pnt]].seq, 0, -1);
|
|
107
|
+
else
|
|
108
|
+
Mine_vec(_vpatt.seq_ID[pnt], _vpatt.str_pnt[pnt], -1,
|
|
109
|
+
ancest_base, VTree[_vpatt.seq_ID[pnt]].seq, 0, 1);
|
|
148
110
|
}
|
|
149
111
|
}
|
|
150
112
|
|
|
151
|
-
|
|
152
|
-
|
|
113
|
+
vector<unsigned long long int> DFS_itm;
|
|
114
|
+
vector<unsigned long long int> DFS_seq;
|
|
153
115
|
if (ilist_nempty)
|
|
154
116
|
DFS_numfound.clear();
|
|
155
117
|
|
|
@@ -159,19 +121,13 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
159
121
|
unsigned long long int cur_sibl = DFS_itm.back();
|
|
160
122
|
DFS_itm.pop_back();
|
|
161
123
|
if (Tree[cur_sibl].itmset < 0) {
|
|
162
|
-
unsigned int carc = Tree[cur_sibl].chld;
|
|
163
|
-
Mine_vec(carc, 0, -1,
|
|
164
|
-
CTree[carc].
|
|
165
|
-
CTree[carc].seq,
|
|
166
|
-
_patt.str_pnt[pnt],
|
|
167
|
-
-1);
|
|
124
|
+
unsigned long long int carc = Tree[cur_sibl].chld;
|
|
125
|
+
Mine_vec(carc, 0, -1, CTree[carc].ancest,
|
|
126
|
+
CTree[carc].seq, _patt.str_pnt[pnt], -1);
|
|
168
127
|
cur_sibl = CTree[carc].ancest.back();
|
|
169
128
|
while (cur_sibl != 0) {
|
|
170
|
-
Mine_vec(cur_sibl - 1, 0, -1,
|
|
171
|
-
|
|
172
|
-
VTree[cur_sibl - 1].seq,
|
|
173
|
-
_patt.str_pnt[pnt],
|
|
174
|
-
1);
|
|
129
|
+
Mine_vec(cur_sibl - 1, 0, -1, CTree[carc].ancest,
|
|
130
|
+
VTree[cur_sibl - 1].seq, _patt.str_pnt[pnt], 1);
|
|
175
131
|
cur_sibl = VTree[cur_sibl - 1].sibl;
|
|
176
132
|
}
|
|
177
133
|
continue;
|
|
@@ -195,7 +151,8 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
195
151
|
DFS_numfound.push_back(0);
|
|
196
152
|
}
|
|
197
153
|
}
|
|
198
|
-
}
|
|
154
|
+
}
|
|
155
|
+
else {
|
|
199
156
|
if (ilist[cur_itm - 1]) {
|
|
200
157
|
pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
|
|
201
158
|
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
@@ -207,14 +164,12 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
207
164
|
cur_sibl = Tree[cur_sibl].sibl;
|
|
208
165
|
}
|
|
209
166
|
}
|
|
210
|
-
|
|
211
167
|
if (ilist_nempty) {
|
|
212
168
|
for (int i = 0; i < (int)L; ++i) {
|
|
213
169
|
if (ilist[i])
|
|
214
170
|
last_strpnt[i] = pot_patt[i + L].str_pnt.size();
|
|
215
171
|
}
|
|
216
172
|
}
|
|
217
|
-
|
|
218
173
|
while (!DFS_seq.empty()) {
|
|
219
174
|
unsigned long long int cur_sibl = DFS_seq.back();
|
|
220
175
|
DFS_seq.pop_back();
|
|
@@ -225,18 +180,12 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
225
180
|
}
|
|
226
181
|
if (Tree[cur_sibl].itmset < 0) {
|
|
227
182
|
unsigned int carc = Tree[cur_sibl].chld;
|
|
228
|
-
Mine_vec(carc, 0, num_found,
|
|
229
|
-
CTree[carc].
|
|
230
|
-
CTree[carc].seq,
|
|
231
|
-
_patt.str_pnt[pnt],
|
|
232
|
-
-1);
|
|
183
|
+
Mine_vec(carc, 0, num_found, CTree[carc].ancest,
|
|
184
|
+
CTree[carc].seq, _patt.str_pnt[pnt], -1);
|
|
233
185
|
cur_sibl = CTree[carc].ancest.back();
|
|
234
186
|
while (cur_sibl != 0) {
|
|
235
|
-
Mine_vec(cur_sibl - 1, 0, num_found,
|
|
236
|
-
|
|
237
|
-
VTree[cur_sibl - 1].seq,
|
|
238
|
-
_patt.str_pnt[pnt],
|
|
239
|
-
1);
|
|
187
|
+
Mine_vec(cur_sibl - 1, 0, num_found, CTree[carc].ancest,
|
|
188
|
+
VTree[cur_sibl - 1].seq, _patt.str_pnt[pnt], 1);
|
|
240
189
|
cur_sibl = VTree[cur_sibl - 1].sibl;
|
|
241
190
|
}
|
|
242
191
|
continue;
|
|
@@ -247,17 +196,18 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
247
196
|
if (cur_itm > 0) {
|
|
248
197
|
if (num_found == itmset_size &&
|
|
249
198
|
ilist[cur_itm - 1] &&
|
|
250
|
-
(
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
199
|
+
(abs(Tree[Tree[cur_sibl].anct].itmset) <
|
|
200
|
+
abs(Tree[_patt.str_pnt[pnt]].itmset) ||
|
|
201
|
+
!check_parent(Tree[cur_sibl].anct, _patt.str_pnt[pnt],
|
|
202
|
+
last_strpnt[cur_itm - 1],
|
|
203
|
+
pot_patt[cur_itm + L - 1].str_pnt))) {
|
|
255
204
|
pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
|
|
256
205
|
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
257
206
|
pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
|
|
258
207
|
}
|
|
259
208
|
if (slist[cur_itm - 1] &&
|
|
260
|
-
|
|
209
|
+
abs(Tree[Tree[cur_sibl].anct].itmset) <=
|
|
210
|
+
abs(Tree[_patt.str_pnt[pnt]].itmset)) {
|
|
261
211
|
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
262
212
|
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
263
213
|
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
@@ -266,16 +216,19 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
266
216
|
DFS_seq.push_back(cur_sibl);
|
|
267
217
|
if (ilist_nempty) {
|
|
268
218
|
if (num_found < itmset_size &&
|
|
269
|
-
cur_itm ==
|
|
219
|
+
cur_itm ==
|
|
220
|
+
abs(_patt.seq[last_neg + num_found]))
|
|
270
221
|
DFS_numfound.push_back(num_found + 1);
|
|
271
222
|
else
|
|
272
223
|
DFS_numfound.push_back(num_found);
|
|
273
224
|
}
|
|
274
225
|
}
|
|
275
|
-
}
|
|
226
|
+
}
|
|
227
|
+
else {
|
|
276
228
|
cur_itm = -cur_itm;
|
|
277
229
|
if (slist[cur_itm - 1] &&
|
|
278
|
-
|
|
230
|
+
abs(Tree[Tree[cur_sibl].anct].itmset) <=
|
|
231
|
+
abs(Tree[_patt.str_pnt[pnt]].itmset)) {
|
|
279
232
|
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
280
233
|
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
281
234
|
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
@@ -295,9 +248,10 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
295
248
|
}
|
|
296
249
|
}
|
|
297
250
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
for (
|
|
251
|
+
vector<int> ilistp;
|
|
252
|
+
vector<int> slistp;
|
|
253
|
+
for (vector<int>::iterator it = _patt.list.begin();
|
|
254
|
+
it != _patt.list.end(); ++it) {
|
|
301
255
|
if (*it > 0 && pot_patt[(*it) + L - 1].freq >= theta)
|
|
302
256
|
ilistp.push_back(*it);
|
|
303
257
|
else if (*it < 0 && pot_patt[-(*it) - 1].freq >= theta) {
|
|
@@ -308,7 +262,7 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
308
262
|
}
|
|
309
263
|
}
|
|
310
264
|
|
|
311
|
-
|
|
265
|
+
for (vector<int>::iterator it = ilistp.begin(); it != ilistp.end(); ++it) {
|
|
312
266
|
int p;
|
|
313
267
|
if (*it < 0)
|
|
314
268
|
p = -(*it) - 1;
|
|
@@ -317,113 +271,118 @@ void Extend_patt(Pattern& _pattern) {
|
|
|
317
271
|
|
|
318
272
|
pot_patt[p].str_pnt.shrink_to_fit();
|
|
319
273
|
DFS.push_back(pot_patt[p]);
|
|
274
|
+
|
|
275
|
+
// Build the full pattern sequence
|
|
320
276
|
DFS.back().seq = _patt.seq;
|
|
321
277
|
DFS.back().seq.push_back(*it);
|
|
278
|
+
|
|
279
|
+
// Update candidate list
|
|
322
280
|
if (*it < 0)
|
|
323
281
|
DFS.back().list = slistp;
|
|
324
282
|
else
|
|
325
283
|
DFS.back().list = ilistp;
|
|
326
284
|
|
|
285
|
+
// Attach VPatterns if needed
|
|
327
286
|
if (!CTree.empty() && !pot_vpatt[p].str_pnt.empty()) {
|
|
328
|
-
pot_vpatt[p].ass_patt =
|
|
287
|
+
pot_vpatt[p].ass_patt = DFS.size() - 1;
|
|
329
288
|
VDFS.push_back(pot_vpatt[p]);
|
|
330
289
|
}
|
|
331
290
|
|
|
332
|
-
//
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
//
|
|
336
|
-
|
|
337
|
-
|
|
291
|
+
// ✅ Always record the pattern for Python, independent of b_disp / b_write.
|
|
292
|
+
collectedPatterns.push_back(DFS.back().seq);
|
|
293
|
+
|
|
294
|
+
// Original output behavior (only if requested)
|
|
295
|
+
if (b_disp || b_write)
|
|
296
|
+
Out_patt(DFS.back().seq, DFS.back().freq);
|
|
338
297
|
|
|
339
298
|
++num_patt;
|
|
340
299
|
}
|
|
300
|
+
|
|
341
301
|
}
|
|
342
302
|
|
|
343
|
-
void Mine_vec(
|
|
344
|
-
int
|
|
345
|
-
int
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
{
|
|
351
|
-
std::vector<bool> found(L + L * (ilist_nempty ? 1 : 0), false);
|
|
303
|
+
void Mine_vec(unsigned long long int seq_ID, int pos, int num_found,
|
|
304
|
+
vector<unsigned long long int>& ancest,
|
|
305
|
+
vector<int>& items,
|
|
306
|
+
unsigned long long int pnt,
|
|
307
|
+
int sgn) {
|
|
308
|
+
|
|
309
|
+
vector<bool> found(L + L * ilist_nempty, 0);
|
|
352
310
|
|
|
353
311
|
if (num_found == -1) {
|
|
354
|
-
while (pos <
|
|
312
|
+
while (pos < (int)items.size() && items[pos] > 0) {
|
|
355
313
|
int cur_itm = items[pos];
|
|
356
314
|
if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
|
|
357
|
-
if (pos + 1 <
|
|
315
|
+
if (pos + 1 < (int)items.size()) {
|
|
358
316
|
pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
|
|
359
317
|
pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (pos + 1));
|
|
360
318
|
}
|
|
361
319
|
++pot_patt[cur_itm + L - 1].freq;
|
|
362
|
-
found[cur_itm + L - 1] =
|
|
320
|
+
found[cur_itm + L - 1] = 1;
|
|
363
321
|
}
|
|
364
322
|
++pos;
|
|
365
323
|
}
|
|
366
324
|
}
|
|
367
325
|
|
|
368
326
|
for (unsigned int k = pos; k < items.size(); ++k) {
|
|
369
|
-
int cur_itm =
|
|
327
|
+
int cur_itm = abs(items[k]);
|
|
370
328
|
if (items[k] < 0)
|
|
371
329
|
num_found = 0;
|
|
372
|
-
|
|
373
330
|
if (slist[cur_itm - 1] && !found[cur_itm - 1]) {
|
|
374
|
-
if (ancest.empty() ||
|
|
375
|
-
|
|
331
|
+
if (ancest.empty() ||
|
|
332
|
+
abs(Tree[ancest[cur_itm - 1]].itmset) <= abs(Tree[pnt].itmset)) {
|
|
333
|
+
if (k + 1 < (int)items.size()) {
|
|
376
334
|
pot_vpatt[cur_itm - 1].seq_ID.push_back(seq_ID);
|
|
377
335
|
pot_vpatt[cur_itm - 1].str_pnt.push_back(sgn * (k + 1));
|
|
378
336
|
}
|
|
379
337
|
++pot_patt[cur_itm - 1].freq;
|
|
380
338
|
}
|
|
381
|
-
found[cur_itm - 1] =
|
|
339
|
+
found[cur_itm - 1] = 1;
|
|
382
340
|
}
|
|
383
|
-
|
|
384
341
|
if (num_found == itmset_size) {
|
|
385
342
|
if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
|
|
386
343
|
if (ancest.empty() ||
|
|
387
|
-
|
|
388
|
-
!check_parent(ancest[cur_itm - 1], pnt,
|
|
389
|
-
|
|
390
|
-
|
|
344
|
+
abs(Tree[ancest[cur_itm - 1]].itmset) < abs(Tree[pnt].itmset) ||
|
|
345
|
+
!check_parent(ancest[cur_itm - 1], pnt,
|
|
346
|
+
last_strpnt[cur_itm - 1],
|
|
347
|
+
pot_patt[cur_itm + L - 1].str_pnt)) {
|
|
348
|
+
if (k + 1 < (int)items.size()) {
|
|
391
349
|
pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
|
|
392
350
|
pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (k + 1));
|
|
393
351
|
}
|
|
394
352
|
++pot_patt[cur_itm + L - 1].freq;
|
|
395
353
|
}
|
|
396
|
-
found[cur_itm + L - 1] =
|
|
354
|
+
found[cur_itm + L - 1] = 1;
|
|
397
355
|
}
|
|
398
|
-
} else if (cur_itm == std::abs(_patt.seq[last_neg + num_found])) {
|
|
399
|
-
++num_found;
|
|
400
356
|
}
|
|
357
|
+
else if (cur_itm == abs(_patt.seq[last_neg + num_found]))
|
|
358
|
+
++num_found;
|
|
401
359
|
}
|
|
402
360
|
}
|
|
403
361
|
|
|
404
|
-
void Out_patt(
|
|
405
|
-
// Always collect:
|
|
406
|
-
largehm::collected.push_back(seq);
|
|
362
|
+
void Out_patt(vector<int>& seq, unsigned int freq) {
|
|
407
363
|
|
|
408
|
-
|
|
364
|
+
ofstream file_o;
|
|
409
365
|
if (b_write)
|
|
410
366
|
file_o.open(out_file, std::ios::app);
|
|
411
367
|
|
|
412
|
-
for (int ii = 0; ii <
|
|
368
|
+
for (int ii = 0; ii < (int)seq.size(); ii++) {
|
|
413
369
|
if (b_disp)
|
|
414
|
-
|
|
370
|
+
cout << seq[ii] << " ";
|
|
415
371
|
if (b_write)
|
|
416
372
|
file_o << seq[ii] << " ";
|
|
417
373
|
}
|
|
418
374
|
if (b_disp)
|
|
419
|
-
|
|
375
|
+
cout << endl;
|
|
376
|
+
if (b_write)
|
|
377
|
+
file_o << endl;
|
|
378
|
+
|
|
379
|
+
if (b_disp)
|
|
380
|
+
cout << "************** Freq: " << freq << endl;
|
|
420
381
|
if (b_write) {
|
|
421
|
-
file_o <<
|
|
422
|
-
file_o << "************** Freq: " << freq << std::endl;
|
|
382
|
+
file_o << "************** Freq: " << freq << endl;
|
|
423
383
|
file_o.close();
|
|
424
384
|
}
|
|
425
|
-
if (b_disp)
|
|
426
|
-
std::cout << "************** Freq: " << freq << std::endl;
|
|
427
385
|
}
|
|
428
386
|
|
|
387
|
+
|
|
429
388
|
} // namespace largehm
|