effspm 0.3.0__cp310-cp310-macosx_11_0_arm64.whl → 0.3.3__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,22 +5,27 @@
5
5
  #include "utility.hpp"
6
6
 
7
7
  namespace htminer {
8
+
9
+ using std::vector;
10
+
8
11
  void Out_patt(vector<int>& seq, unsigned int freq);
9
12
  void Extend_patt(Pattern& _patt);
10
- void Mine_vec(unsigned int seq_ID, int pos, int num_found, vector<unsigned int>& ancest, vector<int>& items, unsigned int inod, int sgn);
13
+ void Mine_vec(unsigned int seq_ID, int pos, int num_found,
14
+ vector<unsigned int>& ancest, vector<int>& items,
15
+ unsigned int inod, int sgn);
11
16
 
12
17
  unsigned long long int num_patt = 0;
13
18
 
14
19
  vector<bool> ilist;
15
20
  vector<bool> slist;
16
21
 
17
- vector<Pattern> pot_patt;
22
+ vector<Pattern> pot_patt;
18
23
  vector<VPattern> pot_vpatt;
19
24
  vector<unsigned int> last_strpnt;
20
25
  vector<unsigned int> ancest_base;
21
26
  vector<int> DFS_numfound;
22
27
 
23
- Pattern _patt;
28
+ Pattern _patt;
24
29
  VPattern _vpatt;
25
30
 
26
31
  int itmset_size;
@@ -29,322 +34,351 @@ int last_neg;
29
34
  bool ilist_nempty;
30
35
 
31
36
  void Freq_miner() {
32
- collectedPatterns.clear();
33
- vector<int> list;
34
-
35
- for (int i = 0; i < L; ++i) {
36
- if (DFS[i].freq >= theta) {
37
- list.push_back(-i-1);
38
- if (itmset_exists)
39
- list.push_back(i+1);
40
- }
41
- }
42
-
43
- for (int i = 0; i < DFS.size(); ++i)
44
- DFS[i].list = list;
45
-
46
- while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
47
- if (DFS.back().freq >= theta)
48
- Extend_patt(DFS.back());
49
- else {
50
- DFS.pop_back();
51
- if (!VDFS.empty() && VDFS.back().ass_patt == DFS.size())
52
- VDFS.pop_back();
53
- }
54
- }
37
+
38
+ vector<int> list;
39
+
40
+ for (int i = 0; i < (int)L; ++i) {
41
+ if (DFS[i].freq >= theta) {
42
+ list.push_back(-i - 1);
43
+ if (itmset_exists)
44
+ list.push_back(i + 1);
45
+ }
46
+ }
47
+
48
+ for (int i = 0; i < (int)DFS.size(); ++i)
49
+ DFS[i].list = list;
50
+
51
+ while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
52
+ if (DFS.back().freq >= theta)
53
+ Extend_patt(DFS.back());
54
+ else {
55
+ DFS.pop_back();
56
+ if (!VDFS.empty() && VDFS.back().ass_patt == DFS.size())
57
+ VDFS.pop_back();
58
+ }
59
+ }
55
60
  }
56
61
 
57
62
  void Extend_patt(Pattern& _pattern) {
58
63
 
59
- swap(_patt, _pattern);
60
- DFS.pop_back();
61
-
62
- slist = vector<bool>(L, 0);
63
- ilist_nempty = 0;
64
-
65
- if (itmset_exists) {
66
- ilist = vector<bool>(L, 0);
67
- for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it) {
68
- if (*it < 0)
69
- slist[-(*it) - 1] = 1;
70
- else {
71
- ilist[(*it) - 1] = 1;
72
- ilist_nempty = 1;
73
- }
74
- }
75
- }
76
- else {
77
- for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it)
78
- slist[-(*it) - 1] = 1;
79
- }
80
-
81
- last_neg = _patt.seq.size() - 1;
82
- while (_patt.seq[last_neg] > 0)
83
- --last_neg;
84
- itmset_size = _patt.seq.size() - last_neg;
85
-
86
- pot_patt = vector<Pattern>(L + L * ilist_nempty);
87
- if (!CTree.empty())
88
- pot_vpatt = vector<VPattern>(L + L * ilist_nempty);
89
-
90
- last_strpnt = vector<unsigned int>(L, 0);
91
-
92
- if (!VDFS.empty() && VDFS.back().ass_patt == DFS.size()) {
93
- swap(_vpatt, VDFS.back());
94
- VDFS.pop_back();
95
- for (unsigned int pnt = 0; pnt < _vpatt.str_pnt.size(); ++pnt) {
96
- if (_vpatt.str_pnt[pnt] < 0)
97
- Mine_vec(_vpatt.seq_ID[pnt], -_vpatt.str_pnt[pnt], -1, ancest_base, CTree[_vpatt.seq_ID[pnt]].seq, 0, -1); //starting search from vpatt should start from 1 position ahead of pointer
98
- else //-1:no need to check ancest for remaining itemset items
99
- Mine_vec(_vpatt.seq_ID[pnt], _vpatt.str_pnt[pnt], -1, ancest_base, VTree[_vpatt.seq_ID[pnt]].seq, 0, 1);
100
- }
101
- }
102
-
103
- vector<unsigned int> DFS_itm; //for initial itemset extention
104
- vector<unsigned int> DFS_seq; //for initial itemset extention
105
- if (ilist_nempty)
106
- DFS_numfound.clear(); //tracks whether the current itemset is found anywhere along the search path, by counting how many of the current items in the itemset are found
107
- for (unsigned int pnt = 0; pnt < _patt.str_pnt.size(); ++pnt) {
108
- DFS_itm.push_back(_patt.str_pnt[pnt]);
109
- while(!DFS_itm.empty()) {
110
- unsigned int cur_sibl = DFS_itm.back();
111
- DFS_itm.pop_back();
112
- if (Tree[cur_sibl].itmset < 0) {
113
- unsigned int carc = Tree[cur_sibl].chld;
114
- Mine_vec(carc, 0, -1, CTree[carc].ancest, CTree[carc].seq, _patt.str_pnt[pnt], -1);
115
- cur_sibl = CTree[carc].ancest.back();
116
- while (cur_sibl != 0) {
117
- Mine_vec(cur_sibl - 1, 0, -1, CTree[carc].ancest, VTree[cur_sibl - 1].seq, _patt.str_pnt[pnt], 1);
118
- cur_sibl = VTree[cur_sibl - 1].sibl;
119
- }
120
- continue;
121
- }
122
- cur_sibl = Tree[cur_sibl].chld;
123
- while (cur_sibl != 0) {
124
- int cur_itm = Tree[cur_sibl].item;
125
- if (cur_itm < 0) {
126
- cur_itm = -cur_itm;
127
- if (slist[cur_itm - 1]) {
128
- pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
129
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
130
- pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
131
- }
132
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
133
- DFS_seq.push_back(cur_sibl);
134
- if (ilist_nempty) {
135
- if (cur_itm == -_patt.seq[last_neg])
136
- DFS_numfound.push_back(1);
137
- else
138
- DFS_numfound.push_back(0);
139
- }
140
- }
141
- }
142
- else {
143
- if (ilist[cur_itm - 1]) {
144
- pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
145
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
146
- pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
147
- }
148
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
149
- DFS_itm.push_back(cur_sibl);
150
- }
151
- cur_sibl = Tree[cur_sibl].sibl;
152
- }
153
- }
154
- if (ilist_nempty) {
155
- for (int i = 0; i < L; ++i) {
156
- if (ilist[i])
157
- last_strpnt[i] = pot_patt[i + L].str_pnt.size();
158
- }
159
- }
160
- while(!DFS_seq.empty()) {
161
- unsigned int cur_sibl = DFS_seq.back();
162
- DFS_seq.pop_back();
163
- int num_found = 0;
164
- if (ilist_nempty) {
165
- num_found = DFS_numfound.back();
166
- DFS_numfound.pop_back();
167
- }
168
- if (Tree[cur_sibl].itmset < 0) {
169
- unsigned int carc = Tree[cur_sibl].chld;
170
- Mine_vec(carc, 0, num_found, CTree[carc].ancest, CTree[carc].seq, _patt.str_pnt[pnt], -1);
171
- cur_sibl = CTree[carc].ancest.back();
172
- while (cur_sibl != 0) {
173
- Mine_vec(cur_sibl - 1, 0, num_found, CTree[carc].ancest, VTree[cur_sibl - 1].seq, _patt.str_pnt[pnt], 1);
174
- cur_sibl = VTree[cur_sibl - 1].sibl;
175
- }
176
- continue;
177
- }
178
- cur_sibl = Tree[cur_sibl].chld;
179
- while (cur_sibl != 0) {
180
- int cur_itm = Tree[cur_sibl].item;
181
- if (cur_itm > 0) {
182
- if (num_found == itmset_size && ilist[cur_itm - 1] && (abs(Tree[Tree[cur_sibl].anct].itmset) < abs(Tree[_patt.str_pnt[pnt]].itmset) || !check_parent(Tree[cur_sibl].anct, _patt.str_pnt[pnt], last_strpnt[cur_itm - 1], pot_patt[cur_itm + L - 1].str_pnt))) {
183
- pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
184
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
185
- pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
186
- }
187
- if (slist[cur_itm - 1] && abs(Tree[Tree[cur_sibl].anct].itmset) <= abs(Tree[_patt.str_pnt[pnt]].itmset)) {
188
- pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
189
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
190
- pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
191
- }
192
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
193
- DFS_seq.push_back(cur_sibl);
194
- if (ilist_nempty) {
195
- if (num_found < itmset_size && cur_itm == abs(_patt.seq[last_neg + num_found]))
196
- DFS_numfound.push_back(num_found + 1);
197
- else
198
- DFS_numfound.push_back(num_found);
199
- }
200
- }
201
- }
202
- else {
203
- cur_itm = -cur_itm;
204
- if (slist[cur_itm - 1] && abs(Tree[Tree[cur_sibl].anct].itmset) <= abs(Tree[_patt.str_pnt[pnt]].itmset)) {
205
- pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
206
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
207
- pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
208
- }
209
- if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
210
- DFS_seq.push_back(cur_sibl);
211
- if (ilist_nempty) {
212
- if (cur_itm == -_patt.seq[last_neg])
213
- DFS_numfound.push_back(1);
214
- else
215
- DFS_numfound.push_back(0);
216
- }
217
- }
218
- }
219
- cur_sibl = Tree[cur_sibl].sibl;
220
- }
221
- }
222
- }
223
-
224
- vector<int> ilistp;
225
- vector<int> slistp;
226
- for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it) {
227
- if (*it > 0 && pot_patt[(*it) + L - 1].freq >= theta)
228
- ilistp.push_back(*it);
229
- else if (*it < 0 && pot_patt[-(*it) - 1].freq >= theta) {
230
- if (itmset_exists)
231
- slistp.push_back(-(*it));
232
- ilistp.push_back(*it);
233
- slistp.push_back(*it);
234
- }
235
- }
236
-
237
- for (vector<int>::iterator it = ilistp.begin(); it != ilistp.end(); ++it) {
238
- int p;
239
- if (*it < 0)
240
- p = -(*it) - 1;
241
- else
242
- p = (*it) - 1 + L;
243
- DFS.emplace_back();
244
- swap(DFS.back(), pot_patt[p]);
245
- DFS.back().seq = _patt.seq;
246
- DFS.back().seq.push_back(*it);
247
- if (*it < 0)
248
- DFS.back().list = slistp;
249
- else
250
- DFS.back().list = ilistp;
251
- if (!CTree.empty() && !pot_vpatt[p].str_pnt.empty()) {
252
- pot_vpatt[p].ass_patt = DFS.size() - 1;
253
- VDFS.emplace_back();
254
- swap(VDFS.back(), pot_vpatt[p]);
255
- }
256
- if (b_disp || b_write)
257
- Out_patt(DFS.back().seq, DFS.back().freq);
258
- htminer::collectedPatterns.emplace_back(DFS.back().seq);
259
- ++num_patt;
260
-
261
- }
262
- }
64
+ std::swap(_patt, _pattern);
65
+ DFS.pop_back();
263
66
 
67
+ slist = vector<bool>(L, 0);
68
+ ilist_nempty = false;
264
69
 
265
- void Mine_vec(unsigned int seq_ID, int pos, int num_found, vector<unsigned int>& ancest, vector<int>& items, unsigned int pnt, int sgn) {
266
-
267
- vector<bool> found(L + L * ilist_nempty, 0);
268
- int num_ext = 0;
269
-
270
- if (num_found == -1) {
271
- while (pos < items.size() && items[pos] > 0 && num_ext < _patt.list.size()) {
272
- int cur_itm = items[pos];
273
- if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
274
- if (pos + 1 < items.size()) {
275
- pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
276
- pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (pos + 1));
277
- }
278
- ++pot_patt[cur_itm + L - 1].freq;
279
- found[cur_itm + L - 1] = 1;
280
- ++num_ext;
281
- }
282
- ++pos;
283
- }
284
- }
285
-
286
- for (unsigned int k = pos; k < items.size() && num_ext < _patt.list.size(); ++k) {
287
- int cur_itm = abs(items[k]);
288
- if (items[k] < 0)
289
- num_found = 0;
290
- if (slist[cur_itm - 1] && !found[cur_itm - 1]) {
291
- if (ancest.empty() || abs(Tree[ancest[cur_itm - 1]].itmset) <= abs(Tree[pnt].itmset)) {
292
- if (k + 1 < items.size()) {
293
- pot_vpatt[cur_itm - 1].seq_ID.push_back(seq_ID);
294
- pot_vpatt[cur_itm - 1].str_pnt.push_back(sgn * (k + 1));
295
- }
296
- ++pot_patt[cur_itm - 1].freq;
297
- }
298
- found[cur_itm - 1] = 1;
299
- ++num_ext;
300
- }
301
- if (num_found == itmset_size) {
302
- if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
303
- if (ancest.empty() || abs(Tree[ancest[cur_itm - 1]].itmset) < abs(Tree[pnt].itmset) || !check_parent(ancest[cur_itm - 1], pnt, last_strpnt[cur_itm - 1], pot_patt[cur_itm + L - 1].str_pnt)) {
304
- if (k + 1 < items.size()) {
305
- pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
306
- pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (k + 1));
307
- }
308
- ++pot_patt[cur_itm + L - 1].freq;
309
- }
310
- found[cur_itm + L - 1] = 1;
311
- ++num_ext;
312
- }
313
- }
314
- else if (cur_itm == abs(_patt.seq[last_neg + num_found]))
315
- ++num_found;
316
- }
317
- }
70
+ if (itmset_exists) {
71
+ ilist = vector<bool>(L, 0);
72
+ for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it) {
73
+ if (*it < 0)
74
+ slist[-(*it) - 1] = 1;
75
+ else {
76
+ ilist[(*it) - 1] = 1;
77
+ ilist_nempty = true;
78
+ }
79
+ }
80
+ }
81
+ else {
82
+ for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it)
83
+ slist[-(*it) - 1] = 1;
84
+ }
318
85
 
86
+ last_neg = (int)_patt.seq.size() - 1;
87
+ while (_patt.seq[last_neg] > 0)
88
+ --last_neg;
89
+ itmset_size = (int)_patt.seq.size() - last_neg;
319
90
 
320
- void Out_patt(vector<int>& seq, unsigned int freq) {
91
+ pot_patt = vector<Pattern>(L + L * ilist_nempty);
92
+ if (!CTree.empty())
93
+ pot_vpatt = vector<VPattern>(L + L * ilist_nempty);
94
+
95
+ last_strpnt = vector<unsigned int>(L, 0);
96
+
97
+ if (!VDFS.empty() && VDFS.back().ass_patt == DFS.size()) {
98
+ std::swap(_vpatt, VDFS.back());
99
+ VDFS.pop_back();
100
+ for (unsigned int pnt = 0; pnt < _vpatt.str_pnt.size(); ++pnt) {
101
+ if (_vpatt.str_pnt[pnt] < 0)
102
+ Mine_vec(_vpatt.seq_ID[pnt], -_vpatt.str_pnt[pnt], -1,
103
+ ancest_base, CTree[_vpatt.seq_ID[pnt]].seq, 0, -1);
104
+ else
105
+ Mine_vec(_vpatt.seq_ID[pnt], _vpatt.str_pnt[pnt], -1,
106
+ ancest_base, VTree[_vpatt.seq_ID[pnt]].seq, 0, 1);
107
+ }
108
+ }
109
+
110
+ vector<unsigned int> DFS_itm;
111
+ vector<unsigned int> DFS_seq;
112
+ if (ilist_nempty)
113
+ DFS_numfound.clear();
114
+
115
+ for (unsigned int pnt = 0; pnt < _patt.str_pnt.size(); ++pnt) {
116
+ DFS_itm.push_back(_patt.str_pnt[pnt]);
117
+ while (!DFS_itm.empty()) {
118
+ unsigned int cur_sibl = DFS_itm.back();
119
+ DFS_itm.pop_back();
120
+ if (Tree[cur_sibl].itmset < 0) {
121
+ unsigned int carc = Tree[cur_sibl].chld;
122
+ Mine_vec(carc, 0, -1, CTree[carc].ancest, CTree[carc].seq,
123
+ _patt.str_pnt[pnt], -1);
124
+ cur_sibl = CTree[carc].ancest.back();
125
+ while (cur_sibl != 0) {
126
+ Mine_vec(cur_sibl - 1, 0, -1, CTree[carc].ancest,
127
+ VTree[cur_sibl - 1].seq, _patt.str_pnt[pnt], 1);
128
+ cur_sibl = VTree[cur_sibl - 1].sibl;
129
+ }
130
+ continue;
131
+ }
132
+ cur_sibl = Tree[cur_sibl].chld;
133
+ while (cur_sibl != 0) {
134
+ int cur_itm = Tree[cur_sibl].item;
135
+ if (cur_itm < 0) {
136
+ cur_itm = -cur_itm;
137
+ if (slist[cur_itm - 1]) {
138
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
139
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
140
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
141
+ }
142
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
143
+ DFS_seq.push_back(cur_sibl);
144
+ if (ilist_nempty) {
145
+ if (cur_itm == -_patt.seq[last_neg])
146
+ DFS_numfound.push_back(1);
147
+ else
148
+ DFS_numfound.push_back(0);
149
+ }
150
+ }
151
+ }
152
+ else {
153
+ if (ilist[cur_itm - 1]) {
154
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
155
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
156
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
157
+ }
158
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
159
+ DFS_itm.push_back(cur_sibl);
160
+ }
161
+ cur_sibl = Tree[cur_sibl].sibl;
162
+ }
163
+ }
164
+ if (ilist_nempty) {
165
+ for (int i = 0; i < (int)L; ++i) {
166
+ if (ilist[i])
167
+ last_strpnt[i] = (unsigned int)pot_patt[i + L].str_pnt.size();
168
+ }
169
+ }
170
+ while (!DFS_seq.empty()) {
171
+ unsigned int cur_sibl = DFS_seq.back();
172
+ DFS_seq.pop_back();
173
+ int num_found = 0;
174
+ if (ilist_nempty) {
175
+ num_found = DFS_numfound.back();
176
+ DFS_numfound.pop_back();
177
+ }
178
+ if (Tree[cur_sibl].itmset < 0) {
179
+ unsigned int carc = Tree[cur_sibl].chld;
180
+ Mine_vec(carc, 0, num_found, CTree[carc].ancest, CTree[carc].seq,
181
+ _patt.str_pnt[pnt], -1);
182
+ cur_sibl = CTree[carc].ancest.back();
183
+ while (cur_sibl != 0) {
184
+ Mine_vec(cur_sibl - 1, 0, num_found, CTree[carc].ancest,
185
+ VTree[cur_sibl - 1].seq, _patt.str_pnt[pnt], 1);
186
+ cur_sibl = VTree[cur_sibl - 1].sibl;
187
+ }
188
+ continue;
189
+ }
190
+ cur_sibl = Tree[cur_sibl].chld;
191
+ while (cur_sibl != 0) {
192
+ int cur_itm = Tree[cur_sibl].item;
193
+ if (cur_itm > 0) {
194
+ if (num_found == itmset_size && ilist[cur_itm - 1] &&
195
+ (std::abs(Tree[Tree[cur_sibl].anct].itmset) <
196
+ std::abs(Tree[_patt.str_pnt[pnt]].itmset) ||
197
+ !check_parent(Tree[cur_sibl].anct, _patt.str_pnt[pnt],
198
+ last_strpnt[cur_itm - 1],
199
+ pot_patt[cur_itm + L - 1].str_pnt))) {
200
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
201
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
202
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
203
+ }
204
+ if (slist[cur_itm - 1] &&
205
+ std::abs(Tree[Tree[cur_sibl].anct].itmset) <=
206
+ std::abs(Tree[_patt.str_pnt[pnt]].itmset)) {
207
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
208
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
209
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
210
+ }
211
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
212
+ DFS_seq.push_back(cur_sibl);
213
+ if (ilist_nempty) {
214
+ if (num_found < itmset_size &&
215
+ cur_itm ==
216
+ std::abs(
217
+ _patt.seq[last_neg + num_found]))
218
+ DFS_numfound.push_back(num_found + 1);
219
+ else
220
+ DFS_numfound.push_back(num_found);
221
+ }
222
+ }
223
+ }
224
+ else {
225
+ cur_itm = -cur_itm;
226
+ if (slist[cur_itm - 1] &&
227
+ std::abs(Tree[Tree[cur_sibl].anct].itmset) <=
228
+ std::abs(Tree[_patt.str_pnt[pnt]].itmset)) {
229
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
230
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
231
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
232
+ }
233
+ if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
234
+ DFS_seq.push_back(cur_sibl);
235
+ if (ilist_nempty) {
236
+ if (cur_itm == -_patt.seq[last_neg])
237
+ DFS_numfound.push_back(1);
238
+ else
239
+ DFS_numfound.push_back(0);
240
+ }
241
+ }
242
+ }
243
+ cur_sibl = Tree[cur_sibl].sibl;
244
+ }
245
+ }
246
+ }
321
247
 
322
- ofstream file_o;
323
- if (b_write)
324
- file_o.open(out_file, std::ios::app);
325
-
326
- for (int ii = 0; ii < seq.size(); ii++) {
327
- if (b_disp)
328
- cout << seq[ii] << " ";
329
- if (b_write)
330
- file_o << seq[ii] << " ";
331
- }
332
- if (b_disp)
333
- cout << endl;
334
- if (b_write)
335
- file_o << endl;
336
-
337
- if (b_disp)
338
- cout << "************** Freq: " << freq << endl;
339
- if (b_write) {
340
- file_o << "************** Freq: " << freq << endl;
341
- file_o.close();
342
- }
343
-
248
+ vector<int> ilistp;
249
+ vector<int> slistp;
250
+ for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it) {
251
+ if (*it > 0 && pot_patt[*it + L - 1].freq >= theta)
252
+ ilistp.push_back(*it);
253
+ else if (*it < 0 && pot_patt[-(*it) - 1].freq >= theta) {
254
+ if (itmset_exists)
255
+ slistp.push_back(-(*it));
256
+ ilistp.push_back(*it);
257
+ slistp.push_back(*it);
258
+ }
259
+ }
260
+
261
+ for (vector<int>::iterator it = ilistp.begin(); it != ilistp.end(); ++it) {
262
+ int p;
263
+ if (*it < 0)
264
+ p = -(*it) - 1;
265
+ else
266
+ p = *it - 1 + L;
267
+
268
+ DFS.emplace_back();
269
+ std::swap(DFS.back(), pot_patt[p]);
270
+ DFS.back().seq = _patt.seq;
271
+ DFS.back().seq.push_back(*it);
272
+ if (*it < 0)
273
+ DFS.back().list = slistp;
274
+ else
275
+ DFS.back().list = ilistp;
276
+
277
+ if (!CTree.empty() && !pot_vpatt[p].str_pnt.empty()) {
278
+ pot_vpatt[p].ass_patt = DFS.size() - 1;
279
+ VDFS.emplace_back();
280
+ std::swap(VDFS.back(), pot_vpatt[p]);
281
+ }
282
+
283
+ // ✅ Always collect for Python, regardless of printing
284
+ collectedPatterns.push_back(DFS.back().seq);
285
+
286
+ if (b_disp || b_write)
287
+ Out_patt(DFS.back().seq, DFS.back().freq);
288
+
289
+ ++num_patt;
290
+ }
344
291
  }
345
-
346
292
 
293
+ void Mine_vec(unsigned int seq_ID, int pos, int num_found,
294
+ vector<unsigned int>& ancest, vector<int>& items,
295
+ unsigned int pnt, int sgn) {
296
+
297
+ vector<bool> found(L + L * ilist_nempty, 0);
298
+ int num_ext = 0;
299
+
300
+ if (num_found == -1) {
301
+ while (pos < (int)items.size() && items[pos] > 0 &&
302
+ num_ext < (int)_patt.list.size()) {
303
+ int cur_itm = items[pos];
304
+ if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
305
+ if (pos + 1 < (int)items.size()) {
306
+ pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
307
+ pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (pos + 1));
308
+ }
309
+ ++pot_patt[cur_itm + L - 1].freq;
310
+ found[cur_itm + L - 1] = 1;
311
+ ++num_ext;
312
+ }
313
+ ++pos;
314
+ }
315
+ }
316
+
317
+ for (unsigned int k = pos;
318
+ k < items.size() && num_ext < (int)_patt.list.size(); ++k) {
319
+ int cur_itm = std::abs(items[k]);
320
+ if (items[k] < 0)
321
+ num_found = 0;
322
+ if (slist[cur_itm - 1] && !found[cur_itm - 1]) {
323
+ if (ancest.empty() ||
324
+ std::abs(Tree[ancest[cur_itm - 1]].itmset) <=
325
+ std::abs(Tree[pnt].itmset)) {
326
+ if (k + 1 < items.size()) {
327
+ pot_vpatt[cur_itm - 1].seq_ID.push_back(seq_ID);
328
+ pot_vpatt[cur_itm - 1].str_pnt.push_back(sgn * (k + 1));
329
+ }
330
+ ++pot_patt[cur_itm - 1].freq;
331
+ }
332
+ found[cur_itm - 1] = 1;
333
+ ++num_ext;
334
+ }
335
+ if (num_found == itmset_size) {
336
+ if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
337
+ if (ancest.empty() ||
338
+ std::abs(Tree[ancest[cur_itm - 1]].itmset) <
339
+ std::abs(Tree[pnt].itmset) ||
340
+ !check_parent(ancest[cur_itm - 1], pnt,
341
+ last_strpnt[cur_itm - 1],
342
+ pot_patt[cur_itm + L - 1].str_pnt)) {
343
+ if (k + 1 < items.size()) {
344
+ pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
345
+ pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (k + 1));
346
+ }
347
+ ++pot_patt[cur_itm + L - 1].freq;
348
+ }
349
+ found[cur_itm + L - 1] = 1;
350
+ ++num_ext;
351
+ }
352
+ }
353
+ else if (cur_itm ==
354
+ std::abs(_patt.seq[last_neg + num_found]))
355
+ ++num_found;
356
+ }
347
357
  }
348
358
 
359
+ void Out_patt(vector<int>& seq, unsigned int freq) {
360
+
361
+ std::ofstream file_o;
362
+ if (b_write)
363
+ file_o.open(out_file, std::ios::app);
364
+
365
+ for (int ii = 0; ii < (int)seq.size(); ii++) {
366
+ if (b_disp)
367
+ std::cout << seq[ii] << " ";
368
+ if (b_write)
369
+ file_o << seq[ii] << " ";
370
+ }
371
+ if (b_disp)
372
+ std::cout << std::endl;
373
+ if (b_write)
374
+ file_o << std::endl;
349
375
 
376
+ if (b_disp)
377
+ std::cout << "************** Freq: " << freq << std::endl;
378
+ if (b_write) {
379
+ file_o << "************** Freq: " << freq << std::endl;
380
+ file_o.close();
381
+ }
382
+ }
350
383
 
384
+ } // namespace htminer