effspm 0.1.5__cp310-cp310-win_amd64.whl → 0.3.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/__init__.py +9 -2
- effspm/_core.cpp +91 -13
- effspm/_effspm.cp310-win_amd64.pyd +0 -0
- effspm/_effspm.cpp +679 -0
- effspm/btminer/src/build_mdd.cpp +88 -0
- effspm/btminer/src/build_mdd.hpp +34 -0
- effspm/btminer/src/freq_miner.cpp +264 -0
- effspm/btminer/src/freq_miner.hpp +55 -0
- effspm/btminer/src/load_inst.cpp +275 -0
- effspm/btminer/src/load_inst.hpp +43 -0
- effspm/btminer/src/utility.cpp +50 -0
- effspm/btminer/src/utility.hpp +16 -0
- effspm/freq_miner.hpp +7 -1
- effspm/htminer/src/build_mdd.cpp +139 -0
- effspm/htminer/src/build_mdd.hpp +64 -0
- effspm/htminer/src/freq_miner.cpp +350 -0
- effspm/htminer/src/freq_miner.hpp +60 -0
- effspm/htminer/src/load_inst.cpp +394 -0
- effspm/htminer/src/load_inst.hpp +23 -0
- effspm/htminer/src/utility.cpp +72 -0
- effspm/htminer/src/utility.hpp +77 -0
- effspm/largebm/src/build_mdd.cpp +96 -0
- effspm/largebm/src/build_mdd.hpp +32 -0
- effspm/largebm/src/freq_miner.cpp +299 -0
- effspm/largebm/src/freq_miner.hpp +37 -0
- effspm/largebm/src/load_inst.cpp +224 -0
- effspm/largebm/src/load_inst.hpp +35 -0
- effspm/largebm/src/utility.cpp +35 -0
- effspm/largebm/src/utility.hpp +15 -0
- effspm/largehm/src/build_mdd.cpp +174 -0
- effspm/largehm/src/build_mdd.hpp +93 -0
- effspm/largehm/src/freq_miner.cpp +429 -0
- effspm/largehm/src/freq_miner.hpp +77 -0
- effspm/largehm/src/load_inst.cpp +375 -0
- effspm/largehm/src/load_inst.hpp +64 -0
- effspm/largehm/src/utility.cpp +38 -0
- effspm/largehm/src/utility.hpp +29 -0
- effspm/largepp/src/freq_miner.cpp +198 -0
- effspm/largepp/src/freq_miner.hpp +18 -0
- effspm/largepp/src/load_inst.cpp +238 -0
- effspm/largepp/src/load_inst.hpp +34 -0
- effspm/largepp/src/pattern.hpp +31 -0
- effspm/largepp/src/utility.cpp +34 -0
- effspm/largepp/src/utility.hpp +21 -0
- effspm/load_inst.hpp +18 -12
- effspm-0.3.0.dist-info/METADATA +237 -0
- effspm-0.3.0.dist-info/RECORD +54 -0
- {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/WHEEL +1 -1
- effspm/_core.cp310-win_amd64.pyd +0 -0
- effspm-0.1.5.dist-info/METADATA +0 -38
- effspm-0.1.5.dist-info/RECORD +0 -14
- {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
// ─── effspm/largehm/src/build_mdd.cpp ─────────────────────────────────────────
|
|
2
|
+
|
|
3
|
+
#include "build_mdd.hpp"
|
|
4
|
+
|
|
5
|
+
// ─── Definitions of the extern globals declared in build_mdd.hpp ─────────────
|
|
6
|
+
std::vector<largehm::Arc> largehm::Tree;
|
|
7
|
+
std::vector<largehm::VArc> largehm::VTree;
|
|
8
|
+
std::vector<largehm::CArc> largehm::CTree;
|
|
9
|
+
|
|
10
|
+
#include <vector>
|
|
11
|
+
#include <iostream>
|
|
12
|
+
#include <cmath> // for std::abs
|
|
13
|
+
#include <unordered_map>
|
|
14
|
+
#include <cstdint> // for std::uint64_t
|
|
15
|
+
#include "load_inst.hpp"
|
|
16
|
+
#include "freq_miner.hpp"
|
|
17
|
+
#include "utility.hpp"
|
|
18
|
+
|
|
19
|
+
namespace largehm {
|
|
20
|
+
|
|
21
|
+
//
|
|
22
|
+
// ─── Build the MDD by sequentially calling Add_arc() then possibly Add_vec() ──
|
|
23
|
+
//
|
|
24
|
+
void Build_MDD(std::vector<int>& items, std::vector<int>& items_lim) {
|
|
25
|
+
// SANITY CHECK: show sizes before building
|
|
26
|
+
|
|
27
|
+
std::unordered_map<int, std::uint64_t> ancest_map;
|
|
28
|
+
std::uint64_t last_arc = 0;
|
|
29
|
+
int itmset = 0;
|
|
30
|
+
|
|
31
|
+
// Insert each prefix item as an arc
|
|
32
|
+
for (auto it = items.begin(); it != items.end(); ++it) {
|
|
33
|
+
last_arc = Add_arc(*it, last_arc, itmset, ancest_map);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// If there is a suffix beyond mlim, attach it via Add_vec()
|
|
37
|
+
if (!items_lim.empty()) {
|
|
38
|
+
Add_vec(items_lim, ancest_map, last_arc, itmset);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
//
|
|
44
|
+
// ─── Add_arc: insert a single “item” into the MDD under parent last_arc. ──────
|
|
45
|
+
//
|
|
46
|
+
int Add_arc(int item,
|
|
47
|
+
std::uint64_t last_arc,
|
|
48
|
+
int& itmset,
|
|
49
|
+
std::unordered_map<int, std::uint64_t>& ancest_map)
|
|
50
|
+
{
|
|
51
|
+
// Ensure DFS is at least size |item|
|
|
52
|
+
size_t needed = static_cast<size_t>(std::abs(item));
|
|
53
|
+
if (DFS.size() < needed) {
|
|
54
|
+
size_t old = DFS.size();
|
|
55
|
+
DFS.resize(needed);
|
|
56
|
+
for (size_t i = old; i < needed; ++i) {
|
|
57
|
+
DFS[i] = Pattern(-static_cast<int>(i) - 1);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
unsigned int anct = 0;
|
|
62
|
+
auto p = ancest_map.find(std::abs(item));
|
|
63
|
+
if (p != ancest_map.end()) {
|
|
64
|
+
anct = p->second;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (item < 0) {
|
|
68
|
+
++itmset;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
std::uint64_t last_sibl = Tree[last_arc].chld;
|
|
72
|
+
if (last_sibl == 0) {
|
|
73
|
+
// No child yet: create a new Arc
|
|
74
|
+
Tree.emplace_back(item, itmset, anct);
|
|
75
|
+
last_sibl = Tree.size() - 1;
|
|
76
|
+
Tree[last_arc].chld = last_sibl;
|
|
77
|
+
if (anct == 0) {
|
|
78
|
+
DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
// Traverse siblings until we find a match or append
|
|
83
|
+
while (Tree[last_sibl].item != item) {
|
|
84
|
+
if (Tree[last_sibl].sibl == 0) {
|
|
85
|
+
Tree.emplace_back(item, itmset, anct);
|
|
86
|
+
Tree[last_sibl].sibl = Tree.size() - 1;
|
|
87
|
+
last_sibl = Tree.size() - 1;
|
|
88
|
+
if (anct == 0) {
|
|
89
|
+
DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
|
|
90
|
+
}
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
last_sibl = Tree[last_sibl].sibl;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (anct == 0) {
|
|
98
|
+
++DFS[std::abs(item) - 1].freq;
|
|
99
|
+
}
|
|
100
|
+
++Tree[last_sibl].freq;
|
|
101
|
+
ancest_map[std::abs(item)] = last_sibl;
|
|
102
|
+
return static_cast<int>(last_sibl);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
//
|
|
107
|
+
// ─── Add_vec: attach the “items_lim” vector as children/vertical arcs ─────────
|
|
108
|
+
//
|
|
109
|
+
void Add_vec(std::vector<int>& items_lim,
|
|
110
|
+
std::unordered_map<int, std::uint64_t>& ancest_map,
|
|
111
|
+
std::uint64_t last_arc,
|
|
112
|
+
int itmset)
|
|
113
|
+
{
|
|
114
|
+
// Ensure VDFS and DFS are at least size L
|
|
115
|
+
if (VDFS.size() < static_cast<size_t>(L)) {
|
|
116
|
+
size_t old = VDFS.size();
|
|
117
|
+
VDFS.resize(static_cast<size_t>(L));
|
|
118
|
+
for (size_t i = old; i < VDFS.size(); ++i) {
|
|
119
|
+
VDFS[i] = VPattern(static_cast<int>(i));
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
if (DFS.size() < static_cast<size_t>(L)) {
|
|
123
|
+
size_t old = DFS.size();
|
|
124
|
+
DFS.resize(static_cast<size_t>(L));
|
|
125
|
+
for (size_t i = old; i < DFS.size(); ++i) {
|
|
126
|
+
DFS[i] = Pattern(-static_cast<int>(i) - 1);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
items_lim.shrink_to_fit();
|
|
131
|
+
std::vector<bool> counted(L, false);
|
|
132
|
+
|
|
133
|
+
// If this node has positive itmset (>0) or no CTree child yet, create first child entry
|
|
134
|
+
if (Tree[last_arc].itmset > 0 || Tree[last_arc].chld == 0) {
|
|
135
|
+
std::vector<std::uint64_t> ancest(L + 1, 0ULL);
|
|
136
|
+
for (auto& kv : ancest_map) {
|
|
137
|
+
ancest[kv.first - 1] = kv.second;
|
|
138
|
+
counted[kv.first - 1] = true;
|
|
139
|
+
}
|
|
140
|
+
for (int i = 0; i < static_cast<int>(items_lim.size()); ++i) {
|
|
141
|
+
int cur_itm = std::abs(items_lim[i]);
|
|
142
|
+
if (!counted[cur_itm - 1]) {
|
|
143
|
+
if (i + 1 < static_cast<int>(items_lim.size())) {
|
|
144
|
+
VDFS[cur_itm - 1].str_pnt.push_back(-i - 1);
|
|
145
|
+
VDFS[cur_itm - 1].seq_ID.push_back(CTree.size());
|
|
146
|
+
}
|
|
147
|
+
++DFS[cur_itm - 1].freq;
|
|
148
|
+
counted[cur_itm - 1] = true;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
CTree.emplace_back(ancest, items_lim);
|
|
152
|
+
Tree[last_arc].chld = CTree.size() - 1;
|
|
153
|
+
Tree[last_arc].itmset = -itmset;
|
|
154
|
+
}
|
|
155
|
+
else {
|
|
156
|
+
// Normal “existing CTree child” path
|
|
157
|
+
auto& ancest = CTree[ Tree[last_arc].chld ].ancest;
|
|
158
|
+
for (int i = 0; i < static_cast<int>(items_lim.size()); ++i) {
|
|
159
|
+
int cur_itm = std::abs(items_lim[i]);
|
|
160
|
+
if (!counted[cur_itm - 1] && ancest[cur_itm - 1] == 0ULL) {
|
|
161
|
+
if (i + 1 < static_cast<int>(items_lim.size())) {
|
|
162
|
+
VDFS[cur_itm - 1].str_pnt.push_back(i + 1);
|
|
163
|
+
VDFS[cur_itm - 1].seq_ID.push_back(VTree.size());
|
|
164
|
+
}
|
|
165
|
+
++DFS[cur_itm - 1].freq;
|
|
166
|
+
counted[cur_itm - 1] = true;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
VTree.emplace_back(items_lim, CTree[ Tree[last_arc].chld ].ancest.back());
|
|
170
|
+
CTree[ Tree[last_arc].chld ].ancest.back() = VTree.size();
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
} // namespace largehm
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
#ifndef LARGEHM_BUILD_MDD_HPP
|
|
2
|
+
#define LARGEHM_BUILD_MDD_HPP
|
|
3
|
+
|
|
4
|
+
#include <vector>
|
|
5
|
+
#include <unordered_map>
|
|
6
|
+
#include <cstddef> // for size_t
|
|
7
|
+
#include <cstdint> // for uint64_t
|
|
8
|
+
|
|
9
|
+
#include "load_inst.hpp" // defines L, DFS, VDFS, Tree, etc.
|
|
10
|
+
#include "freq_miner.hpp" // for Pattern, VPattern
|
|
11
|
+
#include "utility.hpp" // if you need check_parent or collected
|
|
12
|
+
|
|
13
|
+
namespace largehm {
|
|
14
|
+
|
|
15
|
+
//
|
|
16
|
+
// ─── Types & Globals ─────────────────────────────────────────────────────────
|
|
17
|
+
//
|
|
18
|
+
|
|
19
|
+
struct Arc;
|
|
20
|
+
struct VArc;
|
|
21
|
+
struct CArc;
|
|
22
|
+
|
|
23
|
+
extern std::vector<Arc> Tree;
|
|
24
|
+
extern std::vector<VArc> VTree;
|
|
25
|
+
extern std::vector<CArc> CTree;
|
|
26
|
+
|
|
27
|
+
//
|
|
28
|
+
// ─── Public API ───────────────────────────────────────────────────────────────
|
|
29
|
+
//
|
|
30
|
+
|
|
31
|
+
void Build_MDD(std::vector<int>& items,
|
|
32
|
+
std::vector<int>& items_lim);
|
|
33
|
+
|
|
34
|
+
//
|
|
35
|
+
// ─── Internal Helpers ─────────────────────────────────────────────────────────
|
|
36
|
+
//
|
|
37
|
+
|
|
38
|
+
int Add_arc(int item,
|
|
39
|
+
std::uint64_t last_arc,
|
|
40
|
+
int& itmset,
|
|
41
|
+
std::unordered_map<int, std::uint64_t>& ancest_map);
|
|
42
|
+
|
|
43
|
+
void Add_vec(std::vector<int>& items_lim,
|
|
44
|
+
std::unordered_map<int, std::uint64_t>& ancest_map,
|
|
45
|
+
std::uint64_t last_arc,
|
|
46
|
+
int itmset);
|
|
47
|
+
|
|
48
|
+
//
|
|
49
|
+
// ─── Struct Definitions ───────────────────────────────────────────────────────
|
|
50
|
+
//
|
|
51
|
+
|
|
52
|
+
struct Arc {
|
|
53
|
+
int item;
|
|
54
|
+
int itmset;
|
|
55
|
+
std::uint64_t anct;
|
|
56
|
+
std::uint64_t chld;
|
|
57
|
+
std::uint64_t sibl;
|
|
58
|
+
unsigned long long freq;
|
|
59
|
+
|
|
60
|
+
Arc(int _item, int _itmset, std::uint64_t _anct)
|
|
61
|
+
: item(_item), itmset(_itmset), anct(_anct),
|
|
62
|
+
chld(0), sibl(0), freq(0u) {}
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
struct VArc {
|
|
66
|
+
std::vector<int> seq;
|
|
67
|
+
std::uint64_t sibl;
|
|
68
|
+
unsigned long long freq;
|
|
69
|
+
|
|
70
|
+
explicit VArc(std::vector<int>& items, std::uint64_t _sibl)
|
|
71
|
+
: seq(), sibl(_sibl), freq(0u)
|
|
72
|
+
{
|
|
73
|
+
seq.swap(items);
|
|
74
|
+
}
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
struct CArc {
|
|
78
|
+
std::vector<std::uint64_t> ancest;
|
|
79
|
+
std::vector<int> seq;
|
|
80
|
+
unsigned long long freq;
|
|
81
|
+
|
|
82
|
+
explicit CArc(std::vector<std::uint64_t>& _anc,
|
|
83
|
+
std::vector<int>& items)
|
|
84
|
+
: ancest(), seq(), freq(0u)
|
|
85
|
+
{
|
|
86
|
+
ancest.swap(_anc);
|
|
87
|
+
seq.swap(items);
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
} // namespace largehm
|
|
92
|
+
|
|
93
|
+
#endif // LARGEHM_BUILD_MDD_HPP
|
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
2
|
+
// NEW CHANGE (2025-10-24):
|
|
3
|
+
// - Always call Out_patt(...) so patterns are collected regardless of verbosity.
|
|
4
|
+
// - Printing/writing remains guarded inside Out_patt by b_disp/b_write.
|
|
5
|
+
// - This fixes LargeHT returning 0 patterns when verbose=False.
|
|
6
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
7
|
+
|
|
8
|
+
#include <cstdint>
|
|
9
|
+
#include <vector>
|
|
10
|
+
|
|
11
|
+
#include <iostream>
|
|
12
|
+
#include <time.h>
|
|
13
|
+
#include <cmath> // for std::ceil
|
|
14
|
+
|
|
15
|
+
#include "freq_miner.hpp"
|
|
16
|
+
#include "build_mdd.hpp"
|
|
17
|
+
#include "utility.hpp"
|
|
18
|
+
|
|
19
|
+
std::vector<std::uint64_t> ancest_base;
|
|
20
|
+
|
|
21
|
+
namespace largehm {
|
|
22
|
+
|
|
23
|
+
void Out_patt(std::vector<int>& seq, unsigned int freq);
|
|
24
|
+
void Extend_patt(Pattern& _patt);
|
|
25
|
+
void Mine_vec(std::uint64_t seq_ID,
|
|
26
|
+
int pos,
|
|
27
|
+
int num_found,
|
|
28
|
+
std::vector<std::uint64_t>& ancest,
|
|
29
|
+
std::vector<int>& items,
|
|
30
|
+
std::uint64_t pnt,
|
|
31
|
+
int sgn);
|
|
32
|
+
|
|
33
|
+
unsigned long long int num_patt = 0;
|
|
34
|
+
|
|
35
|
+
std::vector<bool> ilist;
|
|
36
|
+
std::vector<bool> slist;
|
|
37
|
+
|
|
38
|
+
std::vector<Pattern> pot_patt;
|
|
39
|
+
std::vector<VPattern> pot_vpatt;
|
|
40
|
+
std::vector<unsigned long long int> last_strpnt;
|
|
41
|
+
|
|
42
|
+
std::vector<int> DFS_numfound;
|
|
43
|
+
|
|
44
|
+
Pattern _patt;
|
|
45
|
+
VPattern _vpatt;
|
|
46
|
+
|
|
47
|
+
int itmset_size;
|
|
48
|
+
int last_neg;
|
|
49
|
+
|
|
50
|
+
bool ilist_nempty;
|
|
51
|
+
|
|
52
|
+
void Freq_miner() {
|
|
53
|
+
// Ensure DFS and VDFS are at least size L
|
|
54
|
+
if (DFS.size() < static_cast<size_t>(L)) {
|
|
55
|
+
size_t old = DFS.size();
|
|
56
|
+
DFS.resize(static_cast<size_t>(L));
|
|
57
|
+
for (size_t i = old; i < DFS.size(); ++i) {
|
|
58
|
+
DFS[i] = Pattern(-static_cast<int>(i) - 1);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (VDFS.size() < static_cast<size_t>(L)) {
|
|
62
|
+
size_t old = VDFS.size();
|
|
63
|
+
VDFS.resize(static_cast<size_t>(L));
|
|
64
|
+
for (size_t i = old; i < VDFS.size(); ++i) {
|
|
65
|
+
VDFS[i] = VPattern(static_cast<int>(i));
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
std::vector<int> tmp_list;
|
|
70
|
+
for (int i = 0; i < static_cast<int>(L); ++i) {
|
|
71
|
+
if (DFS[i].freq >= theta) {
|
|
72
|
+
tmp_list.push_back(-i - 1);
|
|
73
|
+
if (itmset_exists) {
|
|
74
|
+
tmp_list.push_back(i + 1);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
for (int i = 0; i < static_cast<int>(DFS.size()); ++i) {
|
|
79
|
+
DFS[i].list = tmp_list;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
while (!DFS.empty() && give_time(std::clock() - start_time) < time_limit) {
|
|
83
|
+
if (DFS.back().freq >= theta) {
|
|
84
|
+
Extend_patt(DFS.back());
|
|
85
|
+
} else {
|
|
86
|
+
DFS.pop_back();
|
|
87
|
+
if (!VDFS.empty() && VDFS.back().ass_patt == static_cast<int>(DFS.size())) {
|
|
88
|
+
VDFS.pop_back();
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
void Extend_patt(Pattern& _pattern) {
|
|
95
|
+
swap(_patt, _pattern);
|
|
96
|
+
DFS.pop_back();
|
|
97
|
+
|
|
98
|
+
slist = std::vector<bool>(L, false);
|
|
99
|
+
ilist_nempty = false;
|
|
100
|
+
|
|
101
|
+
if (itmset_exists) {
|
|
102
|
+
ilist = std::vector<bool>(L, false);
|
|
103
|
+
for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
|
|
104
|
+
if (*it < 0)
|
|
105
|
+
slist[-(*it) - 1] = true;
|
|
106
|
+
else {
|
|
107
|
+
ilist[(*it) - 1] = true;
|
|
108
|
+
ilist_nempty = true;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
} else {
|
|
112
|
+
for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it)
|
|
113
|
+
slist[-(*it) - 1] = true;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
last_neg = _patt.seq.size() - 1;
|
|
117
|
+
while (_patt.seq[last_neg] > 0)
|
|
118
|
+
--last_neg;
|
|
119
|
+
itmset_size = _patt.seq.size() - last_neg;
|
|
120
|
+
|
|
121
|
+
pot_patt = std::vector<Pattern>(L + L * (ilist_nempty ? 1 : 0));
|
|
122
|
+
if (!CTree.empty())
|
|
123
|
+
pot_vpatt = std::vector<VPattern>(L + L * (ilist_nempty ? 1 : 0));
|
|
124
|
+
|
|
125
|
+
last_strpnt = std::vector<unsigned long long int>(L, 0ULL);
|
|
126
|
+
|
|
127
|
+
if (!VDFS.empty() && VDFS.back().ass_patt == static_cast<int>(DFS.size())) {
|
|
128
|
+
swap(_vpatt, VDFS.back());
|
|
129
|
+
VDFS.pop_back();
|
|
130
|
+
for (unsigned long long int pnt = 0; pnt < _vpatt.str_pnt.size(); ++pnt) {
|
|
131
|
+
if (_vpatt.str_pnt[pnt] < 0) {
|
|
132
|
+
Mine_vec(_vpatt.seq_ID[pnt],
|
|
133
|
+
-_vpatt.str_pnt[pnt],
|
|
134
|
+
-1,
|
|
135
|
+
ancest_base,
|
|
136
|
+
CTree[_vpatt.seq_ID[pnt]].seq,
|
|
137
|
+
0,
|
|
138
|
+
-1);
|
|
139
|
+
} else {
|
|
140
|
+
Mine_vec(_vpatt.seq_ID[pnt],
|
|
141
|
+
_vpatt.str_pnt[pnt],
|
|
142
|
+
-1,
|
|
143
|
+
ancest_base,
|
|
144
|
+
VTree[_vpatt.seq_ID[pnt]].seq,
|
|
145
|
+
0,
|
|
146
|
+
1);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
std::vector<unsigned long long int> DFS_itm;
|
|
152
|
+
std::vector<unsigned long long int> DFS_seq;
|
|
153
|
+
if (ilist_nempty)
|
|
154
|
+
DFS_numfound.clear();
|
|
155
|
+
|
|
156
|
+
for (unsigned long long int pnt = 0; pnt < _patt.str_pnt.size(); ++pnt) {
|
|
157
|
+
DFS_itm.push_back(_patt.str_pnt[pnt]);
|
|
158
|
+
while (!DFS_itm.empty()) {
|
|
159
|
+
unsigned long long int cur_sibl = DFS_itm.back();
|
|
160
|
+
DFS_itm.pop_back();
|
|
161
|
+
if (Tree[cur_sibl].itmset < 0) {
|
|
162
|
+
unsigned int carc = Tree[cur_sibl].chld;
|
|
163
|
+
Mine_vec(carc, 0, -1,
|
|
164
|
+
CTree[carc].ancest,
|
|
165
|
+
CTree[carc].seq,
|
|
166
|
+
_patt.str_pnt[pnt],
|
|
167
|
+
-1);
|
|
168
|
+
cur_sibl = CTree[carc].ancest.back();
|
|
169
|
+
while (cur_sibl != 0) {
|
|
170
|
+
Mine_vec(cur_sibl - 1, 0, -1,
|
|
171
|
+
CTree[carc].ancest,
|
|
172
|
+
VTree[cur_sibl - 1].seq,
|
|
173
|
+
_patt.str_pnt[pnt],
|
|
174
|
+
1);
|
|
175
|
+
cur_sibl = VTree[cur_sibl - 1].sibl;
|
|
176
|
+
}
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
cur_sibl = Tree[cur_sibl].chld;
|
|
180
|
+
while (cur_sibl != 0) {
|
|
181
|
+
int cur_itm = Tree[cur_sibl].item;
|
|
182
|
+
if (cur_itm < 0) {
|
|
183
|
+
cur_itm = -cur_itm;
|
|
184
|
+
if (slist[cur_itm - 1]) {
|
|
185
|
+
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
186
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
187
|
+
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
188
|
+
}
|
|
189
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
|
|
190
|
+
DFS_seq.push_back(cur_sibl);
|
|
191
|
+
if (ilist_nempty) {
|
|
192
|
+
if (cur_itm == -_patt.seq[last_neg])
|
|
193
|
+
DFS_numfound.push_back(1);
|
|
194
|
+
else
|
|
195
|
+
DFS_numfound.push_back(0);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
} else {
|
|
199
|
+
if (ilist[cur_itm - 1]) {
|
|
200
|
+
pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
|
|
201
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
202
|
+
pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
|
|
203
|
+
}
|
|
204
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
205
|
+
DFS_itm.push_back(cur_sibl);
|
|
206
|
+
}
|
|
207
|
+
cur_sibl = Tree[cur_sibl].sibl;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (ilist_nempty) {
|
|
212
|
+
for (int i = 0; i < (int)L; ++i) {
|
|
213
|
+
if (ilist[i])
|
|
214
|
+
last_strpnt[i] = pot_patt[i + L].str_pnt.size();
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
while (!DFS_seq.empty()) {
|
|
219
|
+
unsigned long long int cur_sibl = DFS_seq.back();
|
|
220
|
+
DFS_seq.pop_back();
|
|
221
|
+
int num_found = 0;
|
|
222
|
+
if (ilist_nempty) {
|
|
223
|
+
num_found = DFS_numfound.back();
|
|
224
|
+
DFS_numfound.pop_back();
|
|
225
|
+
}
|
|
226
|
+
if (Tree[cur_sibl].itmset < 0) {
|
|
227
|
+
unsigned int carc = Tree[cur_sibl].chld;
|
|
228
|
+
Mine_vec(carc, 0, num_found,
|
|
229
|
+
CTree[carc].ancest,
|
|
230
|
+
CTree[carc].seq,
|
|
231
|
+
_patt.str_pnt[pnt],
|
|
232
|
+
-1);
|
|
233
|
+
cur_sibl = CTree[carc].ancest.back();
|
|
234
|
+
while (cur_sibl != 0) {
|
|
235
|
+
Mine_vec(cur_sibl - 1, 0, num_found,
|
|
236
|
+
CTree[carc].ancest,
|
|
237
|
+
VTree[cur_sibl - 1].seq,
|
|
238
|
+
_patt.str_pnt[pnt],
|
|
239
|
+
1);
|
|
240
|
+
cur_sibl = VTree[cur_sibl - 1].sibl;
|
|
241
|
+
}
|
|
242
|
+
continue;
|
|
243
|
+
}
|
|
244
|
+
cur_sibl = Tree[cur_sibl].chld;
|
|
245
|
+
while (cur_sibl != 0) {
|
|
246
|
+
int cur_itm = Tree[cur_sibl].item;
|
|
247
|
+
if (cur_itm > 0) {
|
|
248
|
+
if (num_found == itmset_size &&
|
|
249
|
+
ilist[cur_itm - 1] &&
|
|
250
|
+
(std::abs(Tree[Tree[cur_sibl].anct].itmset) < std::abs(Tree[_patt.str_pnt[pnt]].itmset)
|
|
251
|
+
|| !check_parent(Tree[cur_sibl].anct,
|
|
252
|
+
_patt.str_pnt[pnt],
|
|
253
|
+
last_strpnt[cur_itm - 1],
|
|
254
|
+
pot_patt[cur_itm + L - 1].str_pnt))) {
|
|
255
|
+
pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
|
|
256
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
257
|
+
pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
|
|
258
|
+
}
|
|
259
|
+
if (slist[cur_itm - 1] &&
|
|
260
|
+
std::abs(Tree[Tree[cur_sibl].anct].itmset) <= std::abs(Tree[_patt.str_pnt[pnt]].itmset)) {
|
|
261
|
+
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
262
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
263
|
+
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
264
|
+
}
|
|
265
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
|
|
266
|
+
DFS_seq.push_back(cur_sibl);
|
|
267
|
+
if (ilist_nempty) {
|
|
268
|
+
if (num_found < itmset_size &&
|
|
269
|
+
cur_itm == std::abs(_patt.seq[last_neg + num_found]))
|
|
270
|
+
DFS_numfound.push_back(num_found + 1);
|
|
271
|
+
else
|
|
272
|
+
DFS_numfound.push_back(num_found);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
} else {
|
|
276
|
+
cur_itm = -cur_itm;
|
|
277
|
+
if (slist[cur_itm - 1] &&
|
|
278
|
+
std::abs(Tree[Tree[cur_sibl].anct].itmset) <= std::abs(Tree[_patt.str_pnt[pnt]].itmset)) {
|
|
279
|
+
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
280
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
281
|
+
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
282
|
+
}
|
|
283
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
|
|
284
|
+
DFS_seq.push_back(cur_sibl);
|
|
285
|
+
if (ilist_nempty) {
|
|
286
|
+
if (cur_itm == -_patt.seq[last_neg])
|
|
287
|
+
DFS_numfound.push_back(1);
|
|
288
|
+
else
|
|
289
|
+
DFS_numfound.push_back(0);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
cur_sibl = Tree[cur_sibl].sibl;
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
std::vector<int> ilistp;
|
|
299
|
+
std::vector<int> slistp;
|
|
300
|
+
for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
|
|
301
|
+
if (*it > 0 && pot_patt[(*it) + L - 1].freq >= theta)
|
|
302
|
+
ilistp.push_back(*it);
|
|
303
|
+
else if (*it < 0 && pot_patt[-(*it) - 1].freq >= theta) {
|
|
304
|
+
if (itmset_exists)
|
|
305
|
+
slistp.push_back(-(*it));
|
|
306
|
+
ilistp.push_back(*it);
|
|
307
|
+
slistp.push_back(*it);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
for (auto it = ilistp.begin(); it != ilistp.end(); ++it) {
|
|
312
|
+
int p;
|
|
313
|
+
if (*it < 0)
|
|
314
|
+
p = -(*it) - 1;
|
|
315
|
+
else
|
|
316
|
+
p = (*it) - 1 + L;
|
|
317
|
+
|
|
318
|
+
pot_patt[p].str_pnt.shrink_to_fit();
|
|
319
|
+
DFS.push_back(pot_patt[p]);
|
|
320
|
+
DFS.back().seq = _patt.seq;
|
|
321
|
+
DFS.back().seq.push_back(*it);
|
|
322
|
+
if (*it < 0)
|
|
323
|
+
DFS.back().list = slistp;
|
|
324
|
+
else
|
|
325
|
+
DFS.back().list = ilistp;
|
|
326
|
+
|
|
327
|
+
if (!CTree.empty() && !pot_vpatt[p].str_pnt.empty()) {
|
|
328
|
+
pot_vpatt[p].ass_patt = static_cast<int>(DFS.size()) - 1;
|
|
329
|
+
VDFS.push_back(pot_vpatt[p]);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
333
|
+
// NEW CHANGE: Call Out_patt ALWAYS to populate `collected` even when
|
|
334
|
+
// verbose is false and we are not writing to a file.
|
|
335
|
+
// Out_patt itself guards printing/writing with b_disp/b_write.
|
|
336
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
337
|
+
Out_patt(DFS.back().seq, DFS.back().freq);
|
|
338
|
+
|
|
339
|
+
++num_patt;
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
void Mine_vec(std::uint64_t seq_ID,
|
|
344
|
+
int pos,
|
|
345
|
+
int num_found,
|
|
346
|
+
std::vector<std::uint64_t>& ancest,
|
|
347
|
+
std::vector<int>& items,
|
|
348
|
+
std::uint64_t pnt,
|
|
349
|
+
int sgn)
|
|
350
|
+
{
|
|
351
|
+
std::vector<bool> found(L + L * (ilist_nempty ? 1 : 0), false);
|
|
352
|
+
|
|
353
|
+
if (num_found == -1) {
|
|
354
|
+
while (pos < static_cast<int>(items.size()) && items[pos] > 0) {
|
|
355
|
+
int cur_itm = items[pos];
|
|
356
|
+
if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
|
|
357
|
+
if (pos + 1 < static_cast<int>(items.size())) {
|
|
358
|
+
pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
|
|
359
|
+
pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (pos + 1));
|
|
360
|
+
}
|
|
361
|
+
++pot_patt[cur_itm + L - 1].freq;
|
|
362
|
+
found[cur_itm + L - 1] = true;
|
|
363
|
+
}
|
|
364
|
+
++pos;
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
for (unsigned int k = pos; k < items.size(); ++k) {
|
|
369
|
+
int cur_itm = std::abs(items[k]);
|
|
370
|
+
if (items[k] < 0)
|
|
371
|
+
num_found = 0;
|
|
372
|
+
|
|
373
|
+
if (slist[cur_itm - 1] && !found[cur_itm - 1]) {
|
|
374
|
+
if (ancest.empty() || std::abs(Tree[ancest[cur_itm - 1]].itmset) <= std::abs(Tree[pnt].itmset)) {
|
|
375
|
+
if (k + 1 < static_cast<int>(items.size())) {
|
|
376
|
+
pot_vpatt[cur_itm - 1].seq_ID.push_back(seq_ID);
|
|
377
|
+
pot_vpatt[cur_itm - 1].str_pnt.push_back(sgn * (k + 1));
|
|
378
|
+
}
|
|
379
|
+
++pot_patt[cur_itm - 1].freq;
|
|
380
|
+
}
|
|
381
|
+
found[cur_itm - 1] = true;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
if (num_found == itmset_size) {
|
|
385
|
+
if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
|
|
386
|
+
if (ancest.empty() ||
|
|
387
|
+
std::abs(Tree[ancest[cur_itm - 1]].itmset) < std::abs(Tree[pnt].itmset) ||
|
|
388
|
+
!check_parent(ancest[cur_itm - 1], pnt, last_strpnt[cur_itm - 1], pot_patt[cur_itm + L - 1].str_pnt))
|
|
389
|
+
{
|
|
390
|
+
if (k + 1 < static_cast<int>(items.size())) {
|
|
391
|
+
pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
|
|
392
|
+
pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (k + 1));
|
|
393
|
+
}
|
|
394
|
+
++pot_patt[cur_itm + L - 1].freq;
|
|
395
|
+
}
|
|
396
|
+
found[cur_itm + L - 1] = true;
|
|
397
|
+
}
|
|
398
|
+
} else if (cur_itm == std::abs(_patt.seq[last_neg + num_found])) {
|
|
399
|
+
++num_found;
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
void Out_patt(std::vector<int>& seq, unsigned int freq) {
|
|
405
|
+
// Always collect:
|
|
406
|
+
largehm::collected.push_back(seq);
|
|
407
|
+
|
|
408
|
+
std::ofstream file_o;
|
|
409
|
+
if (b_write)
|
|
410
|
+
file_o.open(out_file, std::ios::app);
|
|
411
|
+
|
|
412
|
+
for (int ii = 0; ii < static_cast<int>(seq.size()); ii++) {
|
|
413
|
+
if (b_disp)
|
|
414
|
+
std::cout << seq[ii] << " ";
|
|
415
|
+
if (b_write)
|
|
416
|
+
file_o << seq[ii] << " ";
|
|
417
|
+
}
|
|
418
|
+
if (b_disp)
|
|
419
|
+
std::cout << std::endl;
|
|
420
|
+
if (b_write) {
|
|
421
|
+
file_o << std::endl;
|
|
422
|
+
file_o << "************** Freq: " << freq << std::endl;
|
|
423
|
+
file_o.close();
|
|
424
|
+
}
|
|
425
|
+
if (b_disp)
|
|
426
|
+
std::cout << "************** Freq: " << freq << std::endl;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
} // namespace largehm
|