effspm 0.2.7__cp311-cp311-macosx_11_0_arm64.whl → 0.3.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,26 +1,33 @@
1
+ // ─────────────────────────────────────────────────────────────────────────────
2
+ // NEW CHANGE (2025-10-24):
3
+ // - Always call Out_patt(...) so patterns are collected regardless of verbosity.
4
+ // - Printing/writing remains guarded inside Out_patt by b_disp/b_write.
5
+ // - This fixes LargeHT returning 0 patterns when verbose=False.
6
+ // ─────────────────────────────────────────────────────────────────────────────
1
7
 
2
8
  #include <cstdint>
3
9
  #include <vector>
4
10
 
5
11
  #include <iostream>
6
12
  #include <time.h>
7
- // for std::vector
8
13
  #include <cmath> // for std::ceil
9
14
 
10
15
  #include "freq_miner.hpp"
11
16
  #include "build_mdd.hpp"
12
17
  #include "utility.hpp"
18
+
13
19
  std::vector<std::uint64_t> ancest_base;
20
+
14
21
  namespace largehm {
15
22
 
16
23
  void Out_patt(std::vector<int>& seq, unsigned int freq);
17
24
  void Extend_patt(Pattern& _patt);
18
- void Mine_vec(unsigned long long int seq_ID,
25
+ void Mine_vec(std::uint64_t seq_ID,
19
26
  int pos,
20
27
  int num_found,
21
-
28
+ std::vector<std::uint64_t>& ancest,
22
29
  std::vector<int>& items,
23
- unsigned long long int inod,
30
+ std::uint64_t pnt,
24
31
  int sgn);
25
32
 
26
33
  unsigned long long int num_patt = 0;
@@ -28,25 +35,22 @@ unsigned long long int num_patt = 0;
28
35
  std::vector<bool> ilist;
29
36
  std::vector<bool> slist;
30
37
 
31
- std::vector<Pattern> pot_patt;
38
+ std::vector<Pattern> pot_patt;
32
39
  std::vector<VPattern> pot_vpatt;
33
40
  std::vector<unsigned long long int> last_strpnt;
34
41
 
35
42
  std::vector<int> DFS_numfound;
36
43
 
37
- Pattern _patt;
44
+ Pattern _patt;
38
45
  VPattern _vpatt;
39
46
 
40
- int itmset_size;
41
- int last_neg;
47
+ int itmset_size;
48
+ int last_neg;
42
49
 
43
50
  bool ilist_nempty;
44
51
 
45
-
46
52
  void Freq_miner() {
47
-
48
-
49
- // ─── Make sure DFS and VDFS are at least size L ─────────────────────────────
53
+ // Ensure DFS and VDFS are at least size L
50
54
  if (DFS.size() < static_cast<size_t>(L)) {
51
55
  size_t old = DFS.size();
52
56
  DFS.resize(static_cast<size_t>(L));
@@ -61,19 +65,6 @@ void Freq_miner() {
61
65
  VDFS[i] = VPattern(static_cast<int>(i));
62
66
  }
63
67
  }
64
- // ─────────────────────────────────────────────────────────────────────────────
65
-
66
-
67
- if (!Tree.empty()) {
68
- // std::cout << ", Tree[0].chld=" << Tree[0].chld
69
- // << ", Tree[0].sibl=" << Tree[0].sibl
70
- // << ", Tree[0].freq=" << Tree[0].freq;
71
- }
72
- // std::cout << ", DFS.size()=" << DFS.size()
73
- // << ", theta=" << theta
74
- // << ", M=" << M
75
- // << ", E=" << E
76
- // << std::endl;
77
68
 
78
69
  std::vector<int> tmp_list;
79
70
  for (int i = 0; i < static_cast<int>(L); ++i) {
@@ -84,7 +75,6 @@ void Freq_miner() {
84
75
  }
85
76
  }
86
77
  }
87
-
88
78
  for (int i = 0; i < static_cast<int>(DFS.size()); ++i) {
89
79
  DFS[i].list = tmp_list;
90
80
  }
@@ -92,8 +82,7 @@ void Freq_miner() {
92
82
  while (!DFS.empty() && give_time(std::clock() - start_time) < time_limit) {
93
83
  if (DFS.back().freq >= theta) {
94
84
  Extend_patt(DFS.back());
95
- }
96
- else {
85
+ } else {
97
86
  DFS.pop_back();
98
87
  if (!VDFS.empty() && VDFS.back().ass_patt == static_cast<int>(DFS.size())) {
99
88
  VDFS.pop_back();
@@ -102,7 +91,6 @@ void Freq_miner() {
102
91
  }
103
92
  }
104
93
 
105
-
106
94
  void Extend_patt(Pattern& _pattern) {
107
95
  swap(_patt, _pattern);
108
96
  DFS.pop_back();
@@ -120,8 +108,7 @@ void Extend_patt(Pattern& _pattern) {
120
108
  ilist_nempty = true;
121
109
  }
122
110
  }
123
- }
124
- else {
111
+ } else {
125
112
  for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it)
126
113
  slist[-(*it) - 1] = true;
127
114
  }
@@ -149,8 +136,7 @@ void Extend_patt(Pattern& _pattern) {
149
136
  CTree[_vpatt.seq_ID[pnt]].seq,
150
137
  0,
151
138
  -1);
152
- }
153
- else {
139
+ } else {
154
140
  Mine_vec(_vpatt.seq_ID[pnt],
155
141
  _vpatt.str_pnt[pnt],
156
142
  -1,
@@ -174,18 +160,14 @@ void Extend_patt(Pattern& _pattern) {
174
160
  DFS_itm.pop_back();
175
161
  if (Tree[cur_sibl].itmset < 0) {
176
162
  unsigned int carc = Tree[cur_sibl].chld;
177
- Mine_vec(carc,
178
- 0,
179
- -1,
163
+ Mine_vec(carc, 0, -1,
180
164
  CTree[carc].ancest,
181
165
  CTree[carc].seq,
182
166
  _patt.str_pnt[pnt],
183
167
  -1);
184
168
  cur_sibl = CTree[carc].ancest.back();
185
169
  while (cur_sibl != 0) {
186
- Mine_vec(cur_sibl - 1,
187
- 0,
188
- -1,
170
+ Mine_vec(cur_sibl - 1, 0, -1,
189
171
  CTree[carc].ancest,
190
172
  VTree[cur_sibl - 1].seq,
191
173
  _patt.str_pnt[pnt],
@@ -213,8 +195,7 @@ void Extend_patt(Pattern& _pattern) {
213
195
  DFS_numfound.push_back(0);
214
196
  }
215
197
  }
216
- }
217
- else {
198
+ } else {
218
199
  if (ilist[cur_itm - 1]) {
219
200
  pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
220
201
  if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
@@ -226,13 +207,15 @@ void Extend_patt(Pattern& _pattern) {
226
207
  cur_sibl = Tree[cur_sibl].sibl;
227
208
  }
228
209
  }
210
+
229
211
  if (ilist_nempty) {
230
212
  for (int i = 0; i < (int)L; ++i) {
231
213
  if (ilist[i])
232
214
  last_strpnt[i] = pot_patt[i + L].str_pnt.size();
233
215
  }
234
216
  }
235
- while(!DFS_seq.empty()) {
217
+
218
+ while (!DFS_seq.empty()) {
236
219
  unsigned long long int cur_sibl = DFS_seq.back();
237
220
  DFS_seq.pop_back();
238
221
  int num_found = 0;
@@ -242,18 +225,14 @@ void Extend_patt(Pattern& _pattern) {
242
225
  }
243
226
  if (Tree[cur_sibl].itmset < 0) {
244
227
  unsigned int carc = Tree[cur_sibl].chld;
245
- Mine_vec(carc,
246
- 0,
247
- num_found,
228
+ Mine_vec(carc, 0, num_found,
248
229
  CTree[carc].ancest,
249
230
  CTree[carc].seq,
250
231
  _patt.str_pnt[pnt],
251
232
  -1);
252
233
  cur_sibl = CTree[carc].ancest.back();
253
234
  while (cur_sibl != 0) {
254
- Mine_vec(cur_sibl - 1,
255
- 0,
256
- num_found,
235
+ Mine_vec(cur_sibl - 1, 0, num_found,
257
236
  CTree[carc].ancest,
258
237
  VTree[cur_sibl - 1].seq,
259
238
  _patt.str_pnt[pnt],
@@ -268,19 +247,17 @@ void Extend_patt(Pattern& _pattern) {
268
247
  if (cur_itm > 0) {
269
248
  if (num_found == itmset_size &&
270
249
  ilist[cur_itm - 1] &&
271
- (std::abs(Tree[Tree[cur_sibl].anct].itmset) < std::abs(Tree[_patt.str_pnt[pnt]].itmset)
250
+ (std::abs(Tree[Tree[cur_sibl].anct].itmset) < std::abs(Tree[_patt.str_pnt[pnt]].itmset)
272
251
  || !check_parent(Tree[cur_sibl].anct,
273
- _patt.str_pnt[pnt],
274
- last_strpnt[cur_itm - 1],
275
- pot_patt[cur_itm + L - 1].str_pnt)))
276
- {
252
+ _patt.str_pnt[pnt],
253
+ last_strpnt[cur_itm - 1],
254
+ pot_patt[cur_itm + L - 1].str_pnt))) {
277
255
  pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
278
256
  if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
279
257
  pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
280
258
  }
281
259
  if (slist[cur_itm - 1] &&
282
- std::abs(Tree[Tree[cur_sibl].anct].itmset) <= std::abs(Tree[_patt.str_pnt[pnt]].itmset))
283
- {
260
+ std::abs(Tree[Tree[cur_sibl].anct].itmset) <= std::abs(Tree[_patt.str_pnt[pnt]].itmset)) {
284
261
  pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
285
262
  if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
286
263
  pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
@@ -288,19 +265,17 @@ void Extend_patt(Pattern& _pattern) {
288
265
  if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
289
266
  DFS_seq.push_back(cur_sibl);
290
267
  if (ilist_nempty) {
291
- if (num_found < itmset_size
292
- && cur_itm == std::abs(_patt.seq[last_neg + num_found]))
268
+ if (num_found < itmset_size &&
269
+ cur_itm == std::abs(_patt.seq[last_neg + num_found]))
293
270
  DFS_numfound.push_back(num_found + 1);
294
271
  else
295
272
  DFS_numfound.push_back(num_found);
296
273
  }
297
274
  }
298
- }
299
- else {
275
+ } else {
300
276
  cur_itm = -cur_itm;
301
277
  if (slist[cur_itm - 1] &&
302
- std::abs(Tree[Tree[cur_sibl].anct].itmset) <= std::abs(Tree[_patt.str_pnt[pnt]].itmset))
303
- {
278
+ std::abs(Tree[Tree[cur_sibl].anct].itmset) <= std::abs(Tree[_patt.str_pnt[pnt]].itmset)) {
304
279
  pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
305
280
  if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
306
281
  pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
@@ -323,22 +298,23 @@ void Extend_patt(Pattern& _pattern) {
323
298
  std::vector<int> ilistp;
324
299
  std::vector<int> slistp;
325
300
  for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
326
- if (*it > 0 && pot_patt[(*it) + L - 1].freq >= theta)
301
+ if (*it > 0 && pot_patt[(*it) + L - 1].freq >= theta)
327
302
  ilistp.push_back(*it);
328
303
  else if (*it < 0 && pot_patt[-(*it) - 1].freq >= theta) {
329
- if (itmset_exists)
304
+ if (itmset_exists)
330
305
  slistp.push_back(-(*it));
331
306
  ilistp.push_back(*it);
332
307
  slistp.push_back(*it);
333
308
  }
334
- }
309
+ }
335
310
 
336
311
  for (auto it = ilistp.begin(); it != ilistp.end(); ++it) {
337
312
  int p;
338
- if (*it < 0)
313
+ if (*it < 0)
339
314
  p = -(*it) - 1;
340
315
  else
341
316
  p = (*it) - 1 + L;
317
+
342
318
  pot_patt[p].str_pnt.shrink_to_fit();
343
319
  DFS.push_back(pot_patt[p]);
344
320
  DFS.back().seq = _patt.seq;
@@ -347,17 +323,23 @@ void Extend_patt(Pattern& _pattern) {
347
323
  DFS.back().list = slistp;
348
324
  else
349
325
  DFS.back().list = ilistp;
326
+
350
327
  if (!CTree.empty() && !pot_vpatt[p].str_pnt.empty()) {
351
328
  pot_vpatt[p].ass_patt = static_cast<int>(DFS.size()) - 1;
352
329
  VDFS.push_back(pot_vpatt[p]);
353
330
  }
354
- if (b_disp || b_write)
355
- Out_patt(DFS.back().seq, DFS.back().freq);
331
+
332
+ // ─────────────────────────────────────────────────────────────────────
333
+ // NEW CHANGE: Call Out_patt ALWAYS to populate `collected` even when
334
+ // verbose is false and we are not writing to a file.
335
+ // Out_patt itself guards printing/writing with b_disp/b_write.
336
+ // ─────────────────────────────────────────────────────────────────────
337
+ Out_patt(DFS.back().seq, DFS.back().freq);
338
+
356
339
  ++num_patt;
357
340
  }
358
341
  }
359
342
 
360
-
361
343
  void Mine_vec(std::uint64_t seq_ID,
362
344
  int pos,
363
345
  int num_found,
@@ -377,7 +359,7 @@ void Mine_vec(std::uint64_t seq_ID,
377
359
  pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (pos + 1));
378
360
  }
379
361
  ++pot_patt[cur_itm + L - 1].freq;
380
- found[cur_itm + L - 1] = true;
362
+ found[cur_itm + L - 1] = true;
381
363
  }
382
364
  ++pos;
383
365
  }
@@ -385,8 +367,9 @@ void Mine_vec(std::uint64_t seq_ID,
385
367
 
386
368
  for (unsigned int k = pos; k < items.size(); ++k) {
387
369
  int cur_itm = std::abs(items[k]);
388
- if (items[k] < 0)
370
+ if (items[k] < 0)
389
371
  num_found = 0;
372
+
390
373
  if (slist[cur_itm - 1] && !found[cur_itm - 1]) {
391
374
  if (ancest.empty() || std::abs(Tree[ancest[cur_itm - 1]].itmset) <= std::abs(Tree[pnt].itmset)) {
392
375
  if (k + 1 < static_cast<int>(items.size())) {
@@ -397,11 +380,12 @@ void Mine_vec(std::uint64_t seq_ID,
397
380
  }
398
381
  found[cur_itm - 1] = true;
399
382
  }
383
+
400
384
  if (num_found == itmset_size) {
401
385
  if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
402
- if (ancest.empty() ||
403
- std::abs(Tree[ancest[cur_itm - 1]].itmset) < std::abs(Tree[pnt].itmset)
404
- || !check_parent(ancest[cur_itm - 1], pnt, last_strpnt[cur_itm - 1], pot_patt[cur_itm + L - 1].str_pnt))
386
+ if (ancest.empty() ||
387
+ std::abs(Tree[ancest[cur_itm - 1]].itmset) < std::abs(Tree[pnt].itmset) ||
388
+ !check_parent(ancest[cur_itm - 1], pnt, last_strpnt[cur_itm - 1], pot_patt[cur_itm + L - 1].str_pnt))
405
389
  {
406
390
  if (k + 1 < static_cast<int>(items.size())) {
407
391
  pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
@@ -411,24 +395,24 @@ void Mine_vec(std::uint64_t seq_ID,
411
395
  }
412
396
  found[cur_itm + L - 1] = true;
413
397
  }
414
- }
415
- else if (cur_itm == std::abs(_patt.seq[last_neg + num_found])) {
398
+ } else if (cur_itm == std::abs(_patt.seq[last_neg + num_found])) {
416
399
  ++num_found;
417
400
  }
418
401
  }
419
402
  }
420
403
 
421
-
422
404
  void Out_patt(std::vector<int>& seq, unsigned int freq) {
405
+ // Always collect:
423
406
  largehm::collected.push_back(seq);
407
+
424
408
  std::ofstream file_o;
425
- if (b_write)
409
+ if (b_write)
426
410
  file_o.open(out_file, std::ios::app);
427
411
 
428
412
  for (int ii = 0; ii < static_cast<int>(seq.size()); ii++) {
429
413
  if (b_disp)
430
414
  std::cout << seq[ii] << " ";
431
- if (b_write)
415
+ if (b_write)
432
416
  file_o << seq[ii] << " ";
433
417
  }
434
418
  if (b_disp)