effspm 0.1.5__cp310-cp310-win_amd64.whl → 0.3.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. effspm/__init__.py +9 -2
  2. effspm/_core.cpp +91 -13
  3. effspm/_effspm.cp310-win_amd64.pyd +0 -0
  4. effspm/_effspm.cpp +679 -0
  5. effspm/btminer/src/build_mdd.cpp +88 -0
  6. effspm/btminer/src/build_mdd.hpp +34 -0
  7. effspm/btminer/src/freq_miner.cpp +264 -0
  8. effspm/btminer/src/freq_miner.hpp +55 -0
  9. effspm/btminer/src/load_inst.cpp +275 -0
  10. effspm/btminer/src/load_inst.hpp +43 -0
  11. effspm/btminer/src/utility.cpp +50 -0
  12. effspm/btminer/src/utility.hpp +16 -0
  13. effspm/freq_miner.hpp +7 -1
  14. effspm/htminer/src/build_mdd.cpp +139 -0
  15. effspm/htminer/src/build_mdd.hpp +64 -0
  16. effspm/htminer/src/freq_miner.cpp +350 -0
  17. effspm/htminer/src/freq_miner.hpp +60 -0
  18. effspm/htminer/src/load_inst.cpp +394 -0
  19. effspm/htminer/src/load_inst.hpp +23 -0
  20. effspm/htminer/src/utility.cpp +72 -0
  21. effspm/htminer/src/utility.hpp +77 -0
  22. effspm/largebm/src/build_mdd.cpp +96 -0
  23. effspm/largebm/src/build_mdd.hpp +32 -0
  24. effspm/largebm/src/freq_miner.cpp +299 -0
  25. effspm/largebm/src/freq_miner.hpp +37 -0
  26. effspm/largebm/src/load_inst.cpp +224 -0
  27. effspm/largebm/src/load_inst.hpp +35 -0
  28. effspm/largebm/src/utility.cpp +35 -0
  29. effspm/largebm/src/utility.hpp +15 -0
  30. effspm/largehm/src/build_mdd.cpp +174 -0
  31. effspm/largehm/src/build_mdd.hpp +93 -0
  32. effspm/largehm/src/freq_miner.cpp +429 -0
  33. effspm/largehm/src/freq_miner.hpp +77 -0
  34. effspm/largehm/src/load_inst.cpp +375 -0
  35. effspm/largehm/src/load_inst.hpp +64 -0
  36. effspm/largehm/src/utility.cpp +38 -0
  37. effspm/largehm/src/utility.hpp +29 -0
  38. effspm/largepp/src/freq_miner.cpp +198 -0
  39. effspm/largepp/src/freq_miner.hpp +18 -0
  40. effspm/largepp/src/load_inst.cpp +238 -0
  41. effspm/largepp/src/load_inst.hpp +34 -0
  42. effspm/largepp/src/pattern.hpp +31 -0
  43. effspm/largepp/src/utility.cpp +34 -0
  44. effspm/largepp/src/utility.hpp +21 -0
  45. effspm/load_inst.hpp +18 -12
  46. effspm-0.3.0.dist-info/METADATA +237 -0
  47. effspm-0.3.0.dist-info/RECORD +54 -0
  48. {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/WHEEL +1 -1
  49. effspm/_core.cp310-win_amd64.pyd +0 -0
  50. effspm-0.1.5.dist-info/METADATA +0 -38
  51. effspm-0.1.5.dist-info/RECORD +0 -14
  52. {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/licenses/LICENSE +0 -0
  53. {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,174 @@
1
+ // ─── effspm/largehm/src/build_mdd.cpp ─────────────────────────────────────────
2
+
3
+ #include "build_mdd.hpp"
4
+
5
+ // ─── Definitions of the extern globals declared in build_mdd.hpp ─────────────
6
+ std::vector<largehm::Arc> largehm::Tree;
7
+ std::vector<largehm::VArc> largehm::VTree;
8
+ std::vector<largehm::CArc> largehm::CTree;
9
+
10
+ #include <vector>
11
+ #include <iostream>
12
+ #include <cmath> // for std::abs
13
+ #include <unordered_map>
14
+ #include <cstdint> // for std::uint64_t
15
+ #include "load_inst.hpp"
16
+ #include "freq_miner.hpp"
17
+ #include "utility.hpp"
18
+
19
+ namespace largehm {
20
+
21
+ //
22
+ // ─── Build the MDD by sequentially calling Add_arc() then possibly Add_vec() ──
23
+ //
24
+ void Build_MDD(std::vector<int>& items, std::vector<int>& items_lim) {
25
+ // SANITY CHECK: show sizes before building
26
+
27
+ std::unordered_map<int, std::uint64_t> ancest_map;
28
+ std::uint64_t last_arc = 0;
29
+ int itmset = 0;
30
+
31
+ // Insert each prefix item as an arc
32
+ for (auto it = items.begin(); it != items.end(); ++it) {
33
+ last_arc = Add_arc(*it, last_arc, itmset, ancest_map);
34
+ }
35
+
36
+ // If there is a suffix beyond mlim, attach it via Add_vec()
37
+ if (!items_lim.empty()) {
38
+ Add_vec(items_lim, ancest_map, last_arc, itmset);
39
+ }
40
+ }
41
+
42
+
43
+ //
44
+ // ─── Add_arc: insert a single “item” into the MDD under parent last_arc. ──────
45
+ //
46
+ int Add_arc(int item,
47
+ std::uint64_t last_arc,
48
+ int& itmset,
49
+ std::unordered_map<int, std::uint64_t>& ancest_map)
50
+ {
51
+ // Ensure DFS is at least size |item|
52
+ size_t needed = static_cast<size_t>(std::abs(item));
53
+ if (DFS.size() < needed) {
54
+ size_t old = DFS.size();
55
+ DFS.resize(needed);
56
+ for (size_t i = old; i < needed; ++i) {
57
+ DFS[i] = Pattern(-static_cast<int>(i) - 1);
58
+ }
59
+ }
60
+
61
+ unsigned int anct = 0;
62
+ auto p = ancest_map.find(std::abs(item));
63
+ if (p != ancest_map.end()) {
64
+ anct = p->second;
65
+ }
66
+
67
+ if (item < 0) {
68
+ ++itmset;
69
+ }
70
+
71
+ std::uint64_t last_sibl = Tree[last_arc].chld;
72
+ if (last_sibl == 0) {
73
+ // No child yet: create a new Arc
74
+ Tree.emplace_back(item, itmset, anct);
75
+ last_sibl = Tree.size() - 1;
76
+ Tree[last_arc].chld = last_sibl;
77
+ if (anct == 0) {
78
+ DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
79
+ }
80
+ }
81
+ else {
82
+ // Traverse siblings until we find a match or append
83
+ while (Tree[last_sibl].item != item) {
84
+ if (Tree[last_sibl].sibl == 0) {
85
+ Tree.emplace_back(item, itmset, anct);
86
+ Tree[last_sibl].sibl = Tree.size() - 1;
87
+ last_sibl = Tree.size() - 1;
88
+ if (anct == 0) {
89
+ DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
90
+ }
91
+ break;
92
+ }
93
+ last_sibl = Tree[last_sibl].sibl;
94
+ }
95
+ }
96
+
97
+ if (anct == 0) {
98
+ ++DFS[std::abs(item) - 1].freq;
99
+ }
100
+ ++Tree[last_sibl].freq;
101
+ ancest_map[std::abs(item)] = last_sibl;
102
+ return static_cast<int>(last_sibl);
103
+ }
104
+
105
+
106
+ //
107
+ // ─── Add_vec: attach the “items_lim” vector as children/vertical arcs ─────────
108
+ //
109
+ void Add_vec(std::vector<int>& items_lim,
110
+ std::unordered_map<int, std::uint64_t>& ancest_map,
111
+ std::uint64_t last_arc,
112
+ int itmset)
113
+ {
114
+ // Ensure VDFS and DFS are at least size L
115
+ if (VDFS.size() < static_cast<size_t>(L)) {
116
+ size_t old = VDFS.size();
117
+ VDFS.resize(static_cast<size_t>(L));
118
+ for (size_t i = old; i < VDFS.size(); ++i) {
119
+ VDFS[i] = VPattern(static_cast<int>(i));
120
+ }
121
+ }
122
+ if (DFS.size() < static_cast<size_t>(L)) {
123
+ size_t old = DFS.size();
124
+ DFS.resize(static_cast<size_t>(L));
125
+ for (size_t i = old; i < DFS.size(); ++i) {
126
+ DFS[i] = Pattern(-static_cast<int>(i) - 1);
127
+ }
128
+ }
129
+
130
+ items_lim.shrink_to_fit();
131
+ std::vector<bool> counted(L, false);
132
+
133
+ // If this node has positive itmset (>0) or no CTree child yet, create first child entry
134
+ if (Tree[last_arc].itmset > 0 || Tree[last_arc].chld == 0) {
135
+ std::vector<std::uint64_t> ancest(L + 1, 0ULL);
136
+ for (auto& kv : ancest_map) {
137
+ ancest[kv.first - 1] = kv.second;
138
+ counted[kv.first - 1] = true;
139
+ }
140
+ for (int i = 0; i < static_cast<int>(items_lim.size()); ++i) {
141
+ int cur_itm = std::abs(items_lim[i]);
142
+ if (!counted[cur_itm - 1]) {
143
+ if (i + 1 < static_cast<int>(items_lim.size())) {
144
+ VDFS[cur_itm - 1].str_pnt.push_back(-i - 1);
145
+ VDFS[cur_itm - 1].seq_ID.push_back(CTree.size());
146
+ }
147
+ ++DFS[cur_itm - 1].freq;
148
+ counted[cur_itm - 1] = true;
149
+ }
150
+ }
151
+ CTree.emplace_back(ancest, items_lim);
152
+ Tree[last_arc].chld = CTree.size() - 1;
153
+ Tree[last_arc].itmset = -itmset;
154
+ }
155
+ else {
156
+ // Normal “existing CTree child” path
157
+ auto& ancest = CTree[ Tree[last_arc].chld ].ancest;
158
+ for (int i = 0; i < static_cast<int>(items_lim.size()); ++i) {
159
+ int cur_itm = std::abs(items_lim[i]);
160
+ if (!counted[cur_itm - 1] && ancest[cur_itm - 1] == 0ULL) {
161
+ if (i + 1 < static_cast<int>(items_lim.size())) {
162
+ VDFS[cur_itm - 1].str_pnt.push_back(i + 1);
163
+ VDFS[cur_itm - 1].seq_ID.push_back(VTree.size());
164
+ }
165
+ ++DFS[cur_itm - 1].freq;
166
+ counted[cur_itm - 1] = true;
167
+ }
168
+ }
169
+ VTree.emplace_back(items_lim, CTree[ Tree[last_arc].chld ].ancest.back());
170
+ CTree[ Tree[last_arc].chld ].ancest.back() = VTree.size();
171
+ }
172
+ }
173
+
174
+ } // namespace largehm
@@ -0,0 +1,93 @@
1
+ #ifndef LARGEHM_BUILD_MDD_HPP
2
+ #define LARGEHM_BUILD_MDD_HPP
3
+
4
+ #include <vector>
5
+ #include <unordered_map>
6
+ #include <cstddef> // for size_t
7
+ #include <cstdint> // for uint64_t
8
+
9
+ #include "load_inst.hpp" // defines L, DFS, VDFS, Tree, etc.
10
+ #include "freq_miner.hpp" // for Pattern, VPattern
11
+ #include "utility.hpp" // if you need check_parent or collected
12
+
13
+ namespace largehm {
14
+
15
+ //
16
+ // ─── Types & Globals ─────────────────────────────────────────────────────────
17
+ //
18
+
19
+ struct Arc;
20
+ struct VArc;
21
+ struct CArc;
22
+
23
+ extern std::vector<Arc> Tree;
24
+ extern std::vector<VArc> VTree;
25
+ extern std::vector<CArc> CTree;
26
+
27
+ //
28
+ // ─── Public API ───────────────────────────────────────────────────────────────
29
+ //
30
+
31
+ void Build_MDD(std::vector<int>& items,
32
+ std::vector<int>& items_lim);
33
+
34
+ //
35
+ // ─── Internal Helpers ─────────────────────────────────────────────────────────
36
+ //
37
+
38
+ int Add_arc(int item,
39
+ std::uint64_t last_arc,
40
+ int& itmset,
41
+ std::unordered_map<int, std::uint64_t>& ancest_map);
42
+
43
+ void Add_vec(std::vector<int>& items_lim,
44
+ std::unordered_map<int, std::uint64_t>& ancest_map,
45
+ std::uint64_t last_arc,
46
+ int itmset);
47
+
48
+ //
49
+ // ─── Struct Definitions ───────────────────────────────────────────────────────
50
+ //
51
+
52
+ struct Arc {
53
+ int item;
54
+ int itmset;
55
+ std::uint64_t anct;
56
+ std::uint64_t chld;
57
+ std::uint64_t sibl;
58
+ unsigned long long freq;
59
+
60
+ Arc(int _item, int _itmset, std::uint64_t _anct)
61
+ : item(_item), itmset(_itmset), anct(_anct),
62
+ chld(0), sibl(0), freq(0u) {}
63
+ };
64
+
65
+ struct VArc {
66
+ std::vector<int> seq;
67
+ std::uint64_t sibl;
68
+ unsigned long long freq;
69
+
70
+ explicit VArc(std::vector<int>& items, std::uint64_t _sibl)
71
+ : seq(), sibl(_sibl), freq(0u)
72
+ {
73
+ seq.swap(items);
74
+ }
75
+ };
76
+
77
+ struct CArc {
78
+ std::vector<std::uint64_t> ancest;
79
+ std::vector<int> seq;
80
+ unsigned long long freq;
81
+
82
+ explicit CArc(std::vector<std::uint64_t>& _anc,
83
+ std::vector<int>& items)
84
+ : ancest(), seq(), freq(0u)
85
+ {
86
+ ancest.swap(_anc);
87
+ seq.swap(items);
88
+ }
89
+ };
90
+
91
+ } // namespace largehm
92
+
93
+ #endif // LARGEHM_BUILD_MDD_HPP
@@ -0,0 +1,429 @@
1
+ // ─────────────────────────────────────────────────────────────────────────────
2
+ // NEW CHANGE (2025-10-24):
3
+ // - Always call Out_patt(...) so patterns are collected regardless of verbosity.
4
+ // - Printing/writing remains guarded inside Out_patt by b_disp/b_write.
5
+ // - This fixes LargeHT returning 0 patterns when verbose=False.
6
+ // ─────────────────────────────────────────────────────────────────────────────
7
+
8
+ #include <cstdint>
9
+ #include <vector>
10
+
11
+ #include <iostream>
12
+ #include <time.h>
13
+ #include <cmath> // for std::ceil
14
+
15
+ #include "freq_miner.hpp"
16
+ #include "build_mdd.hpp"
17
+ #include "utility.hpp"
18
+
19
+ std::vector<std::uint64_t> ancest_base;
20
+
21
+ namespace largehm {
22
+
23
+ void Out_patt(std::vector<int>& seq, unsigned int freq);
24
+ void Extend_patt(Pattern& _patt);
25
+ void Mine_vec(std::uint64_t seq_ID,
26
+ int pos,
27
+ int num_found,
28
+ std::vector<std::uint64_t>& ancest,
29
+ std::vector<int>& items,
30
+ std::uint64_t pnt,
31
+ int sgn);
32
+
33
+ unsigned long long int num_patt = 0;
34
+
35
+ std::vector<bool> ilist;
36
+ std::vector<bool> slist;
37
+
38
+ std::vector<Pattern> pot_patt;
39
+ std::vector<VPattern> pot_vpatt;
40
+ std::vector<unsigned long long int> last_strpnt;
41
+
42
+ std::vector<int> DFS_numfound;
43
+
44
+ Pattern _patt;
45
+ VPattern _vpatt;
46
+
47
+ int itmset_size;
48
+ int last_neg;
49
+
50
+ bool ilist_nempty;
51
+
52
+ void Freq_miner() {
53
+ // Ensure DFS and VDFS are at least size L
54
+ if (DFS.size() < static_cast<size_t>(L)) {
55
+ size_t old = DFS.size();
56
+ DFS.resize(static_cast<size_t>(L));
57
+ for (size_t i = old; i < DFS.size(); ++i) {
58
+ DFS[i] = Pattern(-static_cast<int>(i) - 1);
59
+ }
60
+ }
61
+ if (VDFS.size() < static_cast<size_t>(L)) {
62
+ size_t old = VDFS.size();
63
+ VDFS.resize(static_cast<size_t>(L));
64
+ for (size_t i = old; i < VDFS.size(); ++i) {
65
+ VDFS[i] = VPattern(static_cast<int>(i));
66
+ }
67
+ }
68
+
69
+ std::vector<int> tmp_list;
70
+ for (int i = 0; i < static_cast<int>(L); ++i) {
71
+ if (DFS[i].freq >= theta) {
72
+ tmp_list.push_back(-i - 1);
73
+ if (itmset_exists) {
74
+ tmp_list.push_back(i + 1);
75
+ }
76
+ }
77
+ }
78
+ for (int i = 0; i < static_cast<int>(DFS.size()); ++i) {
79
+ DFS[i].list = tmp_list;
80
+ }
81
+
82
+ while (!DFS.empty() && give_time(std::clock() - start_time) < time_limit) {
83
+ if (DFS.back().freq >= theta) {
84
+ Extend_patt(DFS.back());
85
+ } else {
86
+ DFS.pop_back();
87
+ if (!VDFS.empty() && VDFS.back().ass_patt == static_cast<int>(DFS.size())) {
88
+ VDFS.pop_back();
89
+ }
90
+ }
91
+ }
92
+ }
93
+
94
+ void Extend_patt(Pattern& _pattern) {
95
+ swap(_patt, _pattern);
96
+ DFS.pop_back();
97
+
98
+ slist = std::vector<bool>(L, false);
99
+ ilist_nempty = false;
100
+
101
+ if (itmset_exists) {
102
+ ilist = std::vector<bool>(L, false);
103
+ for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
104
+ if (*it < 0)
105
+ slist[-(*it) - 1] = true;
106
+ else {
107
+ ilist[(*it) - 1] = true;
108
+ ilist_nempty = true;
109
+ }
110
+ }
111
+ } else {
112
+ for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it)
113
+ slist[-(*it) - 1] = true;
114
+ }
115
+
116
+ last_neg = _patt.seq.size() - 1;
117
+ while (_patt.seq[last_neg] > 0)
118
+ --last_neg;
119
+ itmset_size = _patt.seq.size() - last_neg;
120
+
121
+ pot_patt = std::vector<Pattern>(L + L * (ilist_nempty ? 1 : 0));
122
+ if (!CTree.empty())
123
+ pot_vpatt = std::vector<VPattern>(L + L * (ilist_nempty ? 1 : 0));
124
+
125
+ last_strpnt = std::vector<unsigned long long int>(L, 0ULL);
126
+
127
+ if (!VDFS.empty() && VDFS.back().ass_patt == static_cast<int>(DFS.size())) {
128
+ swap(_vpatt, VDFS.back());
129
+ VDFS.pop_back();
130
+ for (unsigned long long int pnt = 0; pnt < _vpatt.str_pnt.size(); ++pnt) {
131
+ if (_vpatt.str_pnt[pnt] < 0) {
132
+ Mine_vec(_vpatt.seq_ID[pnt],
133
+ -_vpatt.str_pnt[pnt],
134
+ -1,
135
+ ancest_base,
136
+ CTree[_vpatt.seq_ID[pnt]].seq,
137
+ 0,
138
+ -1);
139
+ } else {
140
+ Mine_vec(_vpatt.seq_ID[pnt],
141
+ _vpatt.str_pnt[pnt],
142
+ -1,
143
+ ancest_base,
144
+ VTree[_vpatt.seq_ID[pnt]].seq,
145
+ 0,
146
+ 1);
147
+ }
148
+ }
149
+ }
150
+
151
+ std::vector<unsigned long long int> DFS_itm;
152
+ std::vector<unsigned long long int> DFS_seq;
153
+ if (ilist_nempty)
154
+ DFS_numfound.clear();
155
+
156
+ for (unsigned long long int pnt = 0; pnt < _patt.str_pnt.size(); ++pnt) {
157
+ DFS_itm.push_back(_patt.str_pnt[pnt]);
158
+ while (!DFS_itm.empty()) {
159
+ unsigned long long int cur_sibl = DFS_itm.back();
160
+ DFS_itm.pop_back();
161
+ if (Tree[cur_sibl].itmset < 0) {
162
+ unsigned int carc = Tree[cur_sibl].chld;
163
+ Mine_vec(carc, 0, -1,
164
+ CTree[carc].ancest,
165
+ CTree[carc].seq,
166
+ _patt.str_pnt[pnt],
167
+ -1);
168
+ cur_sibl = CTree[carc].ancest.back();
169
+ while (cur_sibl != 0) {
170
+ Mine_vec(cur_sibl - 1, 0, -1,
171
+ CTree[carc].ancest,
172
+ VTree[cur_sibl - 1].seq,
173
+ _patt.str_pnt[pnt],
174
+ 1);
175
+ cur_sibl = VTree[cur_sibl - 1].sibl;
176
+ }
177
+ continue;
178
+ }
179
+ cur_sibl = Tree[cur_sibl].chld;
180
+ while (cur_sibl != 0) {
181
+ int cur_itm = Tree[cur_sibl].item;
182
+ if (cur_itm < 0) {
183
+ cur_itm = -cur_itm;
184
+ if (slist[cur_itm - 1]) {
185
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
186
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
187
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
188
+ }
189
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
190
+ DFS_seq.push_back(cur_sibl);
191
+ if (ilist_nempty) {
192
+ if (cur_itm == -_patt.seq[last_neg])
193
+ DFS_numfound.push_back(1);
194
+ else
195
+ DFS_numfound.push_back(0);
196
+ }
197
+ }
198
+ } else {
199
+ if (ilist[cur_itm - 1]) {
200
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
201
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
202
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
203
+ }
204
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
205
+ DFS_itm.push_back(cur_sibl);
206
+ }
207
+ cur_sibl = Tree[cur_sibl].sibl;
208
+ }
209
+ }
210
+
211
+ if (ilist_nempty) {
212
+ for (int i = 0; i < (int)L; ++i) {
213
+ if (ilist[i])
214
+ last_strpnt[i] = pot_patt[i + L].str_pnt.size();
215
+ }
216
+ }
217
+
218
+ while (!DFS_seq.empty()) {
219
+ unsigned long long int cur_sibl = DFS_seq.back();
220
+ DFS_seq.pop_back();
221
+ int num_found = 0;
222
+ if (ilist_nempty) {
223
+ num_found = DFS_numfound.back();
224
+ DFS_numfound.pop_back();
225
+ }
226
+ if (Tree[cur_sibl].itmset < 0) {
227
+ unsigned int carc = Tree[cur_sibl].chld;
228
+ Mine_vec(carc, 0, num_found,
229
+ CTree[carc].ancest,
230
+ CTree[carc].seq,
231
+ _patt.str_pnt[pnt],
232
+ -1);
233
+ cur_sibl = CTree[carc].ancest.back();
234
+ while (cur_sibl != 0) {
235
+ Mine_vec(cur_sibl - 1, 0, num_found,
236
+ CTree[carc].ancest,
237
+ VTree[cur_sibl - 1].seq,
238
+ _patt.str_pnt[pnt],
239
+ 1);
240
+ cur_sibl = VTree[cur_sibl - 1].sibl;
241
+ }
242
+ continue;
243
+ }
244
+ cur_sibl = Tree[cur_sibl].chld;
245
+ while (cur_sibl != 0) {
246
+ int cur_itm = Tree[cur_sibl].item;
247
+ if (cur_itm > 0) {
248
+ if (num_found == itmset_size &&
249
+ ilist[cur_itm - 1] &&
250
+ (std::abs(Tree[Tree[cur_sibl].anct].itmset) < std::abs(Tree[_patt.str_pnt[pnt]].itmset)
251
+ || !check_parent(Tree[cur_sibl].anct,
252
+ _patt.str_pnt[pnt],
253
+ last_strpnt[cur_itm - 1],
254
+ pot_patt[cur_itm + L - 1].str_pnt))) {
255
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
256
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
257
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
258
+ }
259
+ if (slist[cur_itm - 1] &&
260
+ std::abs(Tree[Tree[cur_sibl].anct].itmset) <= std::abs(Tree[_patt.str_pnt[pnt]].itmset)) {
261
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
262
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
263
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
264
+ }
265
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
266
+ DFS_seq.push_back(cur_sibl);
267
+ if (ilist_nempty) {
268
+ if (num_found < itmset_size &&
269
+ cur_itm == std::abs(_patt.seq[last_neg + num_found]))
270
+ DFS_numfound.push_back(num_found + 1);
271
+ else
272
+ DFS_numfound.push_back(num_found);
273
+ }
274
+ }
275
+ } else {
276
+ cur_itm = -cur_itm;
277
+ if (slist[cur_itm - 1] &&
278
+ std::abs(Tree[Tree[cur_sibl].anct].itmset) <= std::abs(Tree[_patt.str_pnt[pnt]].itmset)) {
279
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
280
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
281
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
282
+ }
283
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
284
+ DFS_seq.push_back(cur_sibl);
285
+ if (ilist_nempty) {
286
+ if (cur_itm == -_patt.seq[last_neg])
287
+ DFS_numfound.push_back(1);
288
+ else
289
+ DFS_numfound.push_back(0);
290
+ }
291
+ }
292
+ }
293
+ cur_sibl = Tree[cur_sibl].sibl;
294
+ }
295
+ }
296
+ }
297
+
298
+ std::vector<int> ilistp;
299
+ std::vector<int> slistp;
300
+ for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
301
+ if (*it > 0 && pot_patt[(*it) + L - 1].freq >= theta)
302
+ ilistp.push_back(*it);
303
+ else if (*it < 0 && pot_patt[-(*it) - 1].freq >= theta) {
304
+ if (itmset_exists)
305
+ slistp.push_back(-(*it));
306
+ ilistp.push_back(*it);
307
+ slistp.push_back(*it);
308
+ }
309
+ }
310
+
311
+ for (auto it = ilistp.begin(); it != ilistp.end(); ++it) {
312
+ int p;
313
+ if (*it < 0)
314
+ p = -(*it) - 1;
315
+ else
316
+ p = (*it) - 1 + L;
317
+
318
+ pot_patt[p].str_pnt.shrink_to_fit();
319
+ DFS.push_back(pot_patt[p]);
320
+ DFS.back().seq = _patt.seq;
321
+ DFS.back().seq.push_back(*it);
322
+ if (*it < 0)
323
+ DFS.back().list = slistp;
324
+ else
325
+ DFS.back().list = ilistp;
326
+
327
+ if (!CTree.empty() && !pot_vpatt[p].str_pnt.empty()) {
328
+ pot_vpatt[p].ass_patt = static_cast<int>(DFS.size()) - 1;
329
+ VDFS.push_back(pot_vpatt[p]);
330
+ }
331
+
332
+ // ─────────────────────────────────────────────────────────────────────
333
+ // NEW CHANGE: Call Out_patt ALWAYS to populate `collected` even when
334
+ // verbose is false and we are not writing to a file.
335
+ // Out_patt itself guards printing/writing with b_disp/b_write.
336
+ // ─────────────────────────────────────────────────────────────────────
337
+ Out_patt(DFS.back().seq, DFS.back().freq);
338
+
339
+ ++num_patt;
340
+ }
341
+ }
342
+
343
+ void Mine_vec(std::uint64_t seq_ID,
344
+ int pos,
345
+ int num_found,
346
+ std::vector<std::uint64_t>& ancest,
347
+ std::vector<int>& items,
348
+ std::uint64_t pnt,
349
+ int sgn)
350
+ {
351
+ std::vector<bool> found(L + L * (ilist_nempty ? 1 : 0), false);
352
+
353
+ if (num_found == -1) {
354
+ while (pos < static_cast<int>(items.size()) && items[pos] > 0) {
355
+ int cur_itm = items[pos];
356
+ if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
357
+ if (pos + 1 < static_cast<int>(items.size())) {
358
+ pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
359
+ pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (pos + 1));
360
+ }
361
+ ++pot_patt[cur_itm + L - 1].freq;
362
+ found[cur_itm + L - 1] = true;
363
+ }
364
+ ++pos;
365
+ }
366
+ }
367
+
368
+ for (unsigned int k = pos; k < items.size(); ++k) {
369
+ int cur_itm = std::abs(items[k]);
370
+ if (items[k] < 0)
371
+ num_found = 0;
372
+
373
+ if (slist[cur_itm - 1] && !found[cur_itm - 1]) {
374
+ if (ancest.empty() || std::abs(Tree[ancest[cur_itm - 1]].itmset) <= std::abs(Tree[pnt].itmset)) {
375
+ if (k + 1 < static_cast<int>(items.size())) {
376
+ pot_vpatt[cur_itm - 1].seq_ID.push_back(seq_ID);
377
+ pot_vpatt[cur_itm - 1].str_pnt.push_back(sgn * (k + 1));
378
+ }
379
+ ++pot_patt[cur_itm - 1].freq;
380
+ }
381
+ found[cur_itm - 1] = true;
382
+ }
383
+
384
+ if (num_found == itmset_size) {
385
+ if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
386
+ if (ancest.empty() ||
387
+ std::abs(Tree[ancest[cur_itm - 1]].itmset) < std::abs(Tree[pnt].itmset) ||
388
+ !check_parent(ancest[cur_itm - 1], pnt, last_strpnt[cur_itm - 1], pot_patt[cur_itm + L - 1].str_pnt))
389
+ {
390
+ if (k + 1 < static_cast<int>(items.size())) {
391
+ pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
392
+ pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (k + 1));
393
+ }
394
+ ++pot_patt[cur_itm + L - 1].freq;
395
+ }
396
+ found[cur_itm + L - 1] = true;
397
+ }
398
+ } else if (cur_itm == std::abs(_patt.seq[last_neg + num_found])) {
399
+ ++num_found;
400
+ }
401
+ }
402
+ }
403
+
404
+ void Out_patt(std::vector<int>& seq, unsigned int freq) {
405
+ // Always collect:
406
+ largehm::collected.push_back(seq);
407
+
408
+ std::ofstream file_o;
409
+ if (b_write)
410
+ file_o.open(out_file, std::ios::app);
411
+
412
+ for (int ii = 0; ii < static_cast<int>(seq.size()); ii++) {
413
+ if (b_disp)
414
+ std::cout << seq[ii] << " ";
415
+ if (b_write)
416
+ file_o << seq[ii] << " ";
417
+ }
418
+ if (b_disp)
419
+ std::cout << std::endl;
420
+ if (b_write) {
421
+ file_o << std::endl;
422
+ file_o << "************** Freq: " << freq << std::endl;
423
+ file_o.close();
424
+ }
425
+ if (b_disp)
426
+ std::cout << "************** Freq: " << freq << std::endl;
427
+ }
428
+
429
+ } // namespace largehm