effspm 0.2.7__cp39-cp39-win_amd64.whl → 0.3.3__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. effspm/_effspm.cp39-win_amd64.pyd +0 -0
  2. effspm/_effspm.cpp +961 -210
  3. effspm/btminer/src/build_mdd.cpp +42 -17
  4. effspm/btminer/src/build_mdd.hpp +13 -19
  5. effspm/btminer/src/freq_miner.cpp +134 -49
  6. effspm/btminer/src/freq_miner.hpp +16 -0
  7. effspm/btminer/src/load_inst.cpp +211 -126
  8. effspm/btminer/src/load_inst.hpp +22 -4
  9. effspm/btminer/src/main.cpp +83 -0
  10. effspm/btminer/src/utility.cpp +26 -41
  11. effspm/btminer/src/utility.hpp +6 -30
  12. effspm/freq_miner.hpp +2 -1
  13. effspm/htminer/src/build_mdd.cpp +46 -124
  14. effspm/htminer/src/build_mdd.hpp +56 -49
  15. effspm/htminer/src/freq_miner.cpp +341 -307
  16. effspm/htminer/src/freq_miner.hpp +39 -40
  17. effspm/htminer/src/load_inst.cpp +287 -336
  18. effspm/htminer/src/load_inst.hpp +23 -6
  19. effspm/htminer/src/main.cpp +97 -0
  20. effspm/htminer/src/utility.cpp +38 -57
  21. effspm/htminer/src/utility.hpp +9 -64
  22. effspm/largebm/src/build_mdd.cpp +69 -110
  23. effspm/largebm/src/build_mdd.hpp +22 -37
  24. effspm/largebm/src/freq_miner.cpp +241 -291
  25. effspm/largebm/src/freq_miner.hpp +25 -36
  26. effspm/largebm/src/load_inst.cpp +20 -26
  27. effspm/largebm/src/load_inst.hpp +24 -34
  28. effspm/largebm/src/main.cpp +95 -0
  29. effspm/largebm/src/utility.cpp +11 -21
  30. effspm/largebm/src/utility.hpp +7 -10
  31. effspm/largehm/src/build_mdd.cpp +75 -110
  32. effspm/largehm/src/build_mdd.hpp +53 -73
  33. effspm/largehm/src/freq_miner.cpp +134 -191
  34. effspm/largehm/src/freq_miner.hpp +37 -60
  35. effspm/largehm/src/load_inst.cpp +137 -174
  36. effspm/largehm/src/load_inst.hpp +13 -50
  37. effspm/largehm/src/main.cpp +95 -0
  38. effspm/largehm/src/utility.cpp +46 -28
  39. effspm/largehm/src/utility.hpp +18 -16
  40. effspm/largepp/src/freq_miner.cpp +184 -156
  41. effspm/largepp/src/freq_miner.hpp +11 -36
  42. effspm/largepp/src/load_inst.cpp +32 -12
  43. effspm/largepp/src/load_inst.hpp +15 -9
  44. effspm/largepp/src/main.cpp +108 -0
  45. effspm/largepp/src/pattern.hpp +31 -0
  46. effspm/load_inst.cpp +8 -8
  47. effspm/load_inst.hpp +1 -1
  48. effspm/main.cpp +103 -0
  49. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
  50. effspm-0.3.3.dist-info/RECORD +60 -0
  51. effspm-0.2.7.dist-info/RECORD +0 -53
  52. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
  53. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
  54. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
@@ -5,22 +5,27 @@
5
5
  #include "utility.hpp"
6
6
 
7
7
  namespace htminer {
8
+
9
+ using std::vector;
10
+
8
11
  void Out_patt(vector<int>& seq, unsigned int freq);
9
12
  void Extend_patt(Pattern& _patt);
10
- void Mine_vec(unsigned int seq_ID, int pos, int num_found, vector<unsigned int>& ancest, vector<int>& items, unsigned int inod, int sgn);
13
+ void Mine_vec(unsigned int seq_ID, int pos, int num_found,
14
+ vector<unsigned int>& ancest, vector<int>& items,
15
+ unsigned int inod, int sgn);
11
16
 
12
17
  unsigned long long int num_patt = 0;
13
18
 
14
19
  vector<bool> ilist;
15
20
  vector<bool> slist;
16
21
 
17
- vector<Pattern> pot_patt;
22
+ vector<Pattern> pot_patt;
18
23
  vector<VPattern> pot_vpatt;
19
24
  vector<unsigned int> last_strpnt;
20
25
  vector<unsigned int> ancest_base;
21
26
  vector<int> DFS_numfound;
22
27
 
23
- Pattern _patt;
28
+ Pattern _patt;
24
29
  VPattern _vpatt;
25
30
 
26
31
  int itmset_size;
@@ -29,322 +34,351 @@ int last_neg;
29
34
  bool ilist_nempty;
30
35
 
31
36
  void Freq_miner() {
32
- collectedPatterns.clear();
33
- vector<int> list;
34
-
35
- for (int i = 0; i < L; ++i) {
36
- if (DFS[i].freq >= theta) {
37
- list.push_back(-i-1);
38
- if (itmset_exists)
39
- list.push_back(i+1);
40
- }
41
- }
42
-
43
- for (int i = 0; i < DFS.size(); ++i)
44
- DFS[i].list = list;
45
-
46
- while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
47
- if (DFS.back().freq >= theta)
48
- Extend_patt(DFS.back());
49
- else {
50
- DFS.pop_back();
51
- if (!VDFS.empty() && VDFS.back().ass_patt == DFS.size())
52
- VDFS.pop_back();
53
- }
54
- }
37
+
38
+ vector<int> list;
39
+
40
+ for (int i = 0; i < (int)L; ++i) {
41
+ if (DFS[i].freq >= theta) {
42
+ list.push_back(-i - 1);
43
+ if (itmset_exists)
44
+ list.push_back(i + 1);
45
+ }
46
+ }
47
+
48
+ for (int i = 0; i < (int)DFS.size(); ++i)
49
+ DFS[i].list = list;
50
+
51
+ while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
52
+ if (DFS.back().freq >= theta)
53
+ Extend_patt(DFS.back());
54
+ else {
55
+ DFS.pop_back();
56
+ if (!VDFS.empty() && VDFS.back().ass_patt == DFS.size())
57
+ VDFS.pop_back();
58
+ }
59
+ }
55
60
  }
56
61
 
57
62
  void Extend_patt(Pattern& _pattern) {
58
63
 
59
- swap(_patt, _pattern);
60
- DFS.pop_back();
61
-
62
- slist = vector<bool>(L, 0);
63
- ilist_nempty = 0;
64
-
65
- if (itmset_exists) {
66
- ilist = vector<bool>(L, 0);
67
- for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it) {
68
- if (*it < 0)
69
- slist[-(*it) - 1] = 1;
70
- else {
71
- ilist[(*it) - 1] = 1;
72
- ilist_nempty = 1;
73
- }
74
- }
75
- }
76
- else {
77
- for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it)
78
- slist[-(*it) - 1] = 1;
79
- }
80
-
81
- last_neg = _patt.seq.size() - 1;
82
- while (_patt.seq[last_neg] > 0)
83
- --last_neg;
84
- itmset_size = _patt.seq.size() - last_neg;
85
-
86
- pot_patt = vector<Pattern>(L + L * ilist_nempty);
87
- if (!CTree.empty())
88
- pot_vpatt = vector<VPattern>(L + L * ilist_nempty);
89
-
90
- last_strpnt = vector<unsigned int>(L, 0);
91
-
92
- if (!VDFS.empty() && VDFS.back().ass_patt == DFS.size()) {
93
- swap(_vpatt, VDFS.back());
94
- VDFS.pop_back();
95
- for (unsigned int pnt = 0; pnt < _vpatt.str_pnt.size(); ++pnt) {
96
- if (_vpatt.str_pnt[pnt] < 0)
97
- Mine_vec(_vpatt.seq_ID[pnt], -_vpatt.str_pnt[pnt], -1, ancest_base, CTree[_vpatt.seq_ID[pnt]].seq, 0, -1); //starting search from vpatt should start from 1 position ahead of pointer
98
- else //-1:no need to check ancest for remaining itemset items
99
- Mine_vec(_vpatt.seq_ID[pnt], _vpatt.str_pnt[pnt], -1, ancest_base, VTree[_vpatt.seq_ID[pnt]].seq, 0, 1);
100
- }
101
- }
102
-
103
- vector<unsigned int> DFS_itm; //for initial itemset extention
104
- vector<unsigned int> DFS_seq; //for initial itemset extention
105
- if (ilist_nempty)
106
- DFS_numfound.clear(); //tracks whether the current itemset is found anywhere along the search path, by counting how many of the current items in the itemset are found
107
- for (unsigned int pnt = 0; pnt < _patt.str_pnt.size(); ++pnt) {
108
- DFS_itm.push_back(_patt.str_pnt[pnt]);
109
- while(!DFS_itm.empty()) {
110
- unsigned int cur_sibl = DFS_itm.back();
111
- DFS_itm.pop_back();
112
- if (Tree[cur_sibl].itmset < 0) {
113
- unsigned int carc = Tree[cur_sibl].chld;
114
- Mine_vec(carc, 0, -1, CTree[carc].ancest, CTree[carc].seq, _patt.str_pnt[pnt], -1);
115
- cur_sibl = CTree[carc].ancest.back();
116
- while (cur_sibl != 0) {
117
- Mine_vec(cur_sibl - 1, 0, -1, CTree[carc].ancest, VTree[cur_sibl - 1].seq, _patt.str_pnt[pnt], 1);
118
- cur_sibl = VTree[cur_sibl - 1].sibl;
119
- }
120
- continue;
121
- }
122
- cur_sibl = Tree[cur_sibl].chld;
123
- while (cur_sibl != 0) {
124
- int cur_itm = Tree[cur_sibl].item;
125
- if (cur_itm < 0) {
126
- cur_itm = -cur_itm;
127
- if (slist[cur_itm - 1]) {
128
- pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
129
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
130
- pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
131
- }
132
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
133
- DFS_seq.push_back(cur_sibl);
134
- if (ilist_nempty) {
135
- if (cur_itm == -_patt.seq[last_neg])
136
- DFS_numfound.push_back(1);
137
- else
138
- DFS_numfound.push_back(0);
139
- }
140
- }
141
- }
142
- else {
143
- if (ilist[cur_itm - 1]) {
144
- pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
145
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
146
- pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
147
- }
148
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
149
- DFS_itm.push_back(cur_sibl);
150
- }
151
- cur_sibl = Tree[cur_sibl].sibl;
152
- }
153
- }
154
- if (ilist_nempty) {
155
- for (int i = 0; i < L; ++i) {
156
- if (ilist[i])
157
- last_strpnt[i] = pot_patt[i + L].str_pnt.size();
158
- }
159
- }
160
- while(!DFS_seq.empty()) {
161
- unsigned int cur_sibl = DFS_seq.back();
162
- DFS_seq.pop_back();
163
- int num_found = 0;
164
- if (ilist_nempty) {
165
- num_found = DFS_numfound.back();
166
- DFS_numfound.pop_back();
167
- }
168
- if (Tree[cur_sibl].itmset < 0) {
169
- unsigned int carc = Tree[cur_sibl].chld;
170
- Mine_vec(carc, 0, num_found, CTree[carc].ancest, CTree[carc].seq, _patt.str_pnt[pnt], -1);
171
- cur_sibl = CTree[carc].ancest.back();
172
- while (cur_sibl != 0) {
173
- Mine_vec(cur_sibl - 1, 0, num_found, CTree[carc].ancest, VTree[cur_sibl - 1].seq, _patt.str_pnt[pnt], 1);
174
- cur_sibl = VTree[cur_sibl - 1].sibl;
175
- }
176
- continue;
177
- }
178
- cur_sibl = Tree[cur_sibl].chld;
179
- while (cur_sibl != 0) {
180
- int cur_itm = Tree[cur_sibl].item;
181
- if (cur_itm > 0) {
182
- if (num_found == itmset_size && ilist[cur_itm - 1] && (abs(Tree[Tree[cur_sibl].anct].itmset) < abs(Tree[_patt.str_pnt[pnt]].itmset) || !check_parent(Tree[cur_sibl].anct, _patt.str_pnt[pnt], last_strpnt[cur_itm - 1], pot_patt[cur_itm + L - 1].str_pnt))) {
183
- pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
184
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
185
- pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
186
- }
187
- if (slist[cur_itm - 1] && abs(Tree[Tree[cur_sibl].anct].itmset) <= abs(Tree[_patt.str_pnt[pnt]].itmset)) {
188
- pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
189
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
190
- pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
191
- }
192
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
193
- DFS_seq.push_back(cur_sibl);
194
- if (ilist_nempty) {
195
- if (num_found < itmset_size && cur_itm == abs(_patt.seq[last_neg + num_found]))
196
- DFS_numfound.push_back(num_found + 1);
197
- else
198
- DFS_numfound.push_back(num_found);
199
- }
200
- }
201
- }
202
- else {
203
- cur_itm = -cur_itm;
204
- if (slist[cur_itm - 1] && abs(Tree[Tree[cur_sibl].anct].itmset) <= abs(Tree[_patt.str_pnt[pnt]].itmset)) {
205
- pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
206
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
207
- pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
208
- }
209
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
210
- DFS_seq.push_back(cur_sibl);
211
- if (ilist_nempty) {
212
- if (cur_itm == -_patt.seq[last_neg])
213
- DFS_numfound.push_back(1);
214
- else
215
- DFS_numfound.push_back(0);
216
- }
217
- }
218
- }
219
- cur_sibl = Tree[cur_sibl].sibl;
220
- }
221
- }
222
- }
223
-
224
- vector<int> ilistp;
225
- vector<int> slistp;
226
- for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it) {
227
- if (*it > 0 && pot_patt[(*it) + L - 1].freq >= theta)
228
- ilistp.push_back(*it);
229
- else if (*it < 0 && pot_patt[-(*it) - 1].freq >= theta) {
230
- if (itmset_exists)
231
- slistp.push_back(-(*it));
232
- ilistp.push_back(*it);
233
- slistp.push_back(*it);
234
- }
235
- }
236
-
237
- for (vector<int>::iterator it = ilistp.begin(); it != ilistp.end(); ++it) {
238
- int p;
239
- if (*it < 0)
240
- p = -(*it) - 1;
241
- else
242
- p = (*it) - 1 + L;
243
- DFS.emplace_back();
244
- swap(DFS.back(), pot_patt[p]);
245
- DFS.back().seq = _patt.seq;
246
- DFS.back().seq.push_back(*it);
247
- if (*it < 0)
248
- DFS.back().list = slistp;
249
- else
250
- DFS.back().list = ilistp;
251
- if (!CTree.empty() && !pot_vpatt[p].str_pnt.empty()) {
252
- pot_vpatt[p].ass_patt = DFS.size() - 1;
253
- VDFS.emplace_back();
254
- swap(VDFS.back(), pot_vpatt[p]);
255
- }
256
- if (b_disp || b_write)
257
- Out_patt(DFS.back().seq, DFS.back().freq);
258
- htminer::collectedPatterns.emplace_back(DFS.back().seq);
259
- ++num_patt;
260
-
261
- }
262
- }
64
+ std::swap(_patt, _pattern);
65
+ DFS.pop_back();
263
66
 
67
+ slist = vector<bool>(L, 0);
68
+ ilist_nempty = false;
264
69
 
265
- void Mine_vec(unsigned int seq_ID, int pos, int num_found, vector<unsigned int>& ancest, vector<int>& items, unsigned int pnt, int sgn) {
266
-
267
- vector<bool> found(L + L * ilist_nempty, 0);
268
- int num_ext = 0;
269
-
270
- if (num_found == -1) {
271
- while (pos < items.size() && items[pos] > 0 && num_ext < _patt.list.size()) {
272
- int cur_itm = items[pos];
273
- if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
274
- if (pos + 1 < items.size()) {
275
- pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
276
- pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (pos + 1));
277
- }
278
- ++pot_patt[cur_itm + L - 1].freq;
279
- found[cur_itm + L - 1] = 1;
280
- ++num_ext;
281
- }
282
- ++pos;
283
- }
284
- }
285
-
286
- for (unsigned int k = pos; k < items.size() && num_ext < _patt.list.size(); ++k) {
287
- int cur_itm = abs(items[k]);
288
- if (items[k] < 0)
289
- num_found = 0;
290
- if (slist[cur_itm - 1] && !found[cur_itm - 1]) {
291
- if (ancest.empty() || abs(Tree[ancest[cur_itm - 1]].itmset) <= abs(Tree[pnt].itmset)) {
292
- if (k + 1 < items.size()) {
293
- pot_vpatt[cur_itm - 1].seq_ID.push_back(seq_ID);
294
- pot_vpatt[cur_itm - 1].str_pnt.push_back(sgn * (k + 1));
295
- }
296
- ++pot_patt[cur_itm - 1].freq;
297
- }
298
- found[cur_itm - 1] = 1;
299
- ++num_ext;
300
- }
301
- if (num_found == itmset_size) {
302
- if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
303
- if (ancest.empty() || abs(Tree[ancest[cur_itm - 1]].itmset) < abs(Tree[pnt].itmset) || !check_parent(ancest[cur_itm - 1], pnt, last_strpnt[cur_itm - 1], pot_patt[cur_itm + L - 1].str_pnt)) {
304
- if (k + 1 < items.size()) {
305
- pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
306
- pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (k + 1));
307
- }
308
- ++pot_patt[cur_itm + L - 1].freq;
309
- }
310
- found[cur_itm + L - 1] = 1;
311
- ++num_ext;
312
- }
313
- }
314
- else if (cur_itm == abs(_patt.seq[last_neg + num_found]))
315
- ++num_found;
316
- }
317
- }
70
+ if (itmset_exists) {
71
+ ilist = vector<bool>(L, 0);
72
+ for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it) {
73
+ if (*it < 0)
74
+ slist[-(*it) - 1] = 1;
75
+ else {
76
+ ilist[(*it) - 1] = 1;
77
+ ilist_nempty = true;
78
+ }
79
+ }
80
+ }
81
+ else {
82
+ for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it)
83
+ slist[-(*it) - 1] = 1;
84
+ }
318
85
 
86
+ last_neg = (int)_patt.seq.size() - 1;
87
+ while (_patt.seq[last_neg] > 0)
88
+ --last_neg;
89
+ itmset_size = (int)_patt.seq.size() - last_neg;
319
90
 
320
- void Out_patt(vector<int>& seq, unsigned int freq) {
91
+ pot_patt = vector<Pattern>(L + L * ilist_nempty);
92
+ if (!CTree.empty())
93
+ pot_vpatt = vector<VPattern>(L + L * ilist_nempty);
94
+
95
+ last_strpnt = vector<unsigned int>(L, 0);
96
+
97
+ if (!VDFS.empty() && VDFS.back().ass_patt == DFS.size()) {
98
+ std::swap(_vpatt, VDFS.back());
99
+ VDFS.pop_back();
100
+ for (unsigned int pnt = 0; pnt < _vpatt.str_pnt.size(); ++pnt) {
101
+ if (_vpatt.str_pnt[pnt] < 0)
102
+ Mine_vec(_vpatt.seq_ID[pnt], -_vpatt.str_pnt[pnt], -1,
103
+ ancest_base, CTree[_vpatt.seq_ID[pnt]].seq, 0, -1);
104
+ else
105
+ Mine_vec(_vpatt.seq_ID[pnt], _vpatt.str_pnt[pnt], -1,
106
+ ancest_base, VTree[_vpatt.seq_ID[pnt]].seq, 0, 1);
107
+ }
108
+ }
109
+
110
+ vector<unsigned int> DFS_itm;
111
+ vector<unsigned int> DFS_seq;
112
+ if (ilist_nempty)
113
+ DFS_numfound.clear();
114
+
115
+ for (unsigned int pnt = 0; pnt < _patt.str_pnt.size(); ++pnt) {
116
+ DFS_itm.push_back(_patt.str_pnt[pnt]);
117
+ while (!DFS_itm.empty()) {
118
+ unsigned int cur_sibl = DFS_itm.back();
119
+ DFS_itm.pop_back();
120
+ if (Tree[cur_sibl].itmset < 0) {
121
+ unsigned int carc = Tree[cur_sibl].chld;
122
+ Mine_vec(carc, 0, -1, CTree[carc].ancest, CTree[carc].seq,
123
+ _patt.str_pnt[pnt], -1);
124
+ cur_sibl = CTree[carc].ancest.back();
125
+ while (cur_sibl != 0) {
126
+ Mine_vec(cur_sibl - 1, 0, -1, CTree[carc].ancest,
127
+ VTree[cur_sibl - 1].seq, _patt.str_pnt[pnt], 1);
128
+ cur_sibl = VTree[cur_sibl - 1].sibl;
129
+ }
130
+ continue;
131
+ }
132
+ cur_sibl = Tree[cur_sibl].chld;
133
+ while (cur_sibl != 0) {
134
+ int cur_itm = Tree[cur_sibl].item;
135
+ if (cur_itm < 0) {
136
+ cur_itm = -cur_itm;
137
+ if (slist[cur_itm - 1]) {
138
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
139
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
140
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
141
+ }
142
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
143
+ DFS_seq.push_back(cur_sibl);
144
+ if (ilist_nempty) {
145
+ if (cur_itm == -_patt.seq[last_neg])
146
+ DFS_numfound.push_back(1);
147
+ else
148
+ DFS_numfound.push_back(0);
149
+ }
150
+ }
151
+ }
152
+ else {
153
+ if (ilist[cur_itm - 1]) {
154
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
155
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
156
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
157
+ }
158
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
159
+ DFS_itm.push_back(cur_sibl);
160
+ }
161
+ cur_sibl = Tree[cur_sibl].sibl;
162
+ }
163
+ }
164
+ if (ilist_nempty) {
165
+ for (int i = 0; i < (int)L; ++i) {
166
+ if (ilist[i])
167
+ last_strpnt[i] = (unsigned int)pot_patt[i + L].str_pnt.size();
168
+ }
169
+ }
170
+ while (!DFS_seq.empty()) {
171
+ unsigned int cur_sibl = DFS_seq.back();
172
+ DFS_seq.pop_back();
173
+ int num_found = 0;
174
+ if (ilist_nempty) {
175
+ num_found = DFS_numfound.back();
176
+ DFS_numfound.pop_back();
177
+ }
178
+ if (Tree[cur_sibl].itmset < 0) {
179
+ unsigned int carc = Tree[cur_sibl].chld;
180
+ Mine_vec(carc, 0, num_found, CTree[carc].ancest, CTree[carc].seq,
181
+ _patt.str_pnt[pnt], -1);
182
+ cur_sibl = CTree[carc].ancest.back();
183
+ while (cur_sibl != 0) {
184
+ Mine_vec(cur_sibl - 1, 0, num_found, CTree[carc].ancest,
185
+ VTree[cur_sibl - 1].seq, _patt.str_pnt[pnt], 1);
186
+ cur_sibl = VTree[cur_sibl - 1].sibl;
187
+ }
188
+ continue;
189
+ }
190
+ cur_sibl = Tree[cur_sibl].chld;
191
+ while (cur_sibl != 0) {
192
+ int cur_itm = Tree[cur_sibl].item;
193
+ if (cur_itm > 0) {
194
+ if (num_found == itmset_size && ilist[cur_itm - 1] &&
195
+ (std::abs(Tree[Tree[cur_sibl].anct].itmset) <
196
+ std::abs(Tree[_patt.str_pnt[pnt]].itmset) ||
197
+ !check_parent(Tree[cur_sibl].anct, _patt.str_pnt[pnt],
198
+ last_strpnt[cur_itm - 1],
199
+ pot_patt[cur_itm + L - 1].str_pnt))) {
200
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
201
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
202
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
203
+ }
204
+ if (slist[cur_itm - 1] &&
205
+ std::abs(Tree[Tree[cur_sibl].anct].itmset) <=
206
+ std::abs(Tree[_patt.str_pnt[pnt]].itmset)) {
207
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
208
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
209
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
210
+ }
211
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
212
+ DFS_seq.push_back(cur_sibl);
213
+ if (ilist_nempty) {
214
+ if (num_found < itmset_size &&
215
+ cur_itm ==
216
+ std::abs(
217
+ _patt.seq[last_neg + num_found]))
218
+ DFS_numfound.push_back(num_found + 1);
219
+ else
220
+ DFS_numfound.push_back(num_found);
221
+ }
222
+ }
223
+ }
224
+ else {
225
+ cur_itm = -cur_itm;
226
+ if (slist[cur_itm - 1] &&
227
+ std::abs(Tree[Tree[cur_sibl].anct].itmset) <=
228
+ std::abs(Tree[_patt.str_pnt[pnt]].itmset)) {
229
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
230
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
231
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
232
+ }
233
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
234
+ DFS_seq.push_back(cur_sibl);
235
+ if (ilist_nempty) {
236
+ if (cur_itm == -_patt.seq[last_neg])
237
+ DFS_numfound.push_back(1);
238
+ else
239
+ DFS_numfound.push_back(0);
240
+ }
241
+ }
242
+ }
243
+ cur_sibl = Tree[cur_sibl].sibl;
244
+ }
245
+ }
246
+ }
321
247
 
322
- ofstream file_o;
323
- if (b_write)
324
- file_o.open(out_file, std::ios::app);
325
-
326
- for (int ii = 0; ii < seq.size(); ii++) {
327
- if (b_disp)
328
- cout << seq[ii] << " ";
329
- if (b_write)
330
- file_o << seq[ii] << " ";
331
- }
332
- if (b_disp)
333
- cout << endl;
334
- if (b_write)
335
- file_o << endl;
336
-
337
- if (b_disp)
338
- cout << "************** Freq: " << freq << endl;
339
- if (b_write) {
340
- file_o << "************** Freq: " << freq << endl;
341
- file_o.close();
342
- }
343
-
248
+ vector<int> ilistp;
249
+ vector<int> slistp;
250
+ for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it) {
251
+ if (*it > 0 && pot_patt[*it + L - 1].freq >= theta)
252
+ ilistp.push_back(*it);
253
+ else if (*it < 0 && pot_patt[-(*it) - 1].freq >= theta) {
254
+ if (itmset_exists)
255
+ slistp.push_back(-(*it));
256
+ ilistp.push_back(*it);
257
+ slistp.push_back(*it);
258
+ }
259
+ }
260
+
261
+ for (vector<int>::iterator it = ilistp.begin(); it != ilistp.end(); ++it) {
262
+ int p;
263
+ if (*it < 0)
264
+ p = -(*it) - 1;
265
+ else
266
+ p = *it - 1 + L;
267
+
268
+ DFS.emplace_back();
269
+ std::swap(DFS.back(), pot_patt[p]);
270
+ DFS.back().seq = _patt.seq;
271
+ DFS.back().seq.push_back(*it);
272
+ if (*it < 0)
273
+ DFS.back().list = slistp;
274
+ else
275
+ DFS.back().list = ilistp;
276
+
277
+ if (!CTree.empty() && !pot_vpatt[p].str_pnt.empty()) {
278
+ pot_vpatt[p].ass_patt = DFS.size() - 1;
279
+ VDFS.emplace_back();
280
+ std::swap(VDFS.back(), pot_vpatt[p]);
281
+ }
282
+
283
+ // ✅ Always collect for Python, regardless of printing
284
+ collectedPatterns.push_back(DFS.back().seq);
285
+
286
+ if (b_disp || b_write)
287
+ Out_patt(DFS.back().seq, DFS.back().freq);
288
+
289
+ ++num_patt;
290
+ }
344
291
  }
345
-
346
292
 
293
+ void Mine_vec(unsigned int seq_ID, int pos, int num_found,
294
+ vector<unsigned int>& ancest, vector<int>& items,
295
+ unsigned int pnt, int sgn) {
296
+
297
+ vector<bool> found(L + L * ilist_nempty, 0);
298
+ int num_ext = 0;
299
+
300
+ if (num_found == -1) {
301
+ while (pos < (int)items.size() && items[pos] > 0 &&
302
+ num_ext < (int)_patt.list.size()) {
303
+ int cur_itm = items[pos];
304
+ if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
305
+ if (pos + 1 < (int)items.size()) {
306
+ pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
307
+ pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (pos + 1));
308
+ }
309
+ ++pot_patt[cur_itm + L - 1].freq;
310
+ found[cur_itm + L - 1] = 1;
311
+ ++num_ext;
312
+ }
313
+ ++pos;
314
+ }
315
+ }
316
+
317
+ for (unsigned int k = pos;
318
+ k < items.size() && num_ext < (int)_patt.list.size(); ++k) {
319
+ int cur_itm = std::abs(items[k]);
320
+ if (items[k] < 0)
321
+ num_found = 0;
322
+ if (slist[cur_itm - 1] && !found[cur_itm - 1]) {
323
+ if (ancest.empty() ||
324
+ std::abs(Tree[ancest[cur_itm - 1]].itmset) <=
325
+ std::abs(Tree[pnt].itmset)) {
326
+ if (k + 1 < items.size()) {
327
+ pot_vpatt[cur_itm - 1].seq_ID.push_back(seq_ID);
328
+ pot_vpatt[cur_itm - 1].str_pnt.push_back(sgn * (k + 1));
329
+ }
330
+ ++pot_patt[cur_itm - 1].freq;
331
+ }
332
+ found[cur_itm - 1] = 1;
333
+ ++num_ext;
334
+ }
335
+ if (num_found == itmset_size) {
336
+ if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
337
+ if (ancest.empty() ||
338
+ std::abs(Tree[ancest[cur_itm - 1]].itmset) <
339
+ std::abs(Tree[pnt].itmset) ||
340
+ !check_parent(ancest[cur_itm - 1], pnt,
341
+ last_strpnt[cur_itm - 1],
342
+ pot_patt[cur_itm + L - 1].str_pnt)) {
343
+ if (k + 1 < items.size()) {
344
+ pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
345
+ pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (k + 1));
346
+ }
347
+ ++pot_patt[cur_itm + L - 1].freq;
348
+ }
349
+ found[cur_itm + L - 1] = 1;
350
+ ++num_ext;
351
+ }
352
+ }
353
+ else if (cur_itm ==
354
+ std::abs(_patt.seq[last_neg + num_found]))
355
+ ++num_found;
356
+ }
347
357
  }
348
358
 
359
+ void Out_patt(vector<int>& seq, unsigned int freq) {
360
+
361
+ std::ofstream file_o;
362
+ if (b_write)
363
+ file_o.open(out_file, std::ios::app);
364
+
365
+ for (int ii = 0; ii < (int)seq.size(); ii++) {
366
+ if (b_disp)
367
+ std::cout << seq[ii] << " ";
368
+ if (b_write)
369
+ file_o << seq[ii] << " ";
370
+ }
371
+ if (b_disp)
372
+ std::cout << std::endl;
373
+ if (b_write)
374
+ file_o << std::endl;
349
375
 
376
+ if (b_disp)
377
+ std::cout << "************** Freq: " << freq << std::endl;
378
+ if (b_write) {
379
+ file_o << "************** Freq: " << freq << std::endl;
380
+ file_o.close();
381
+ }
382
+ }
350
383
 
384
+ } // namespace htminer