effspm 0.2.8__cp39-cp39-win_amd64.whl → 0.3.3__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. effspm/_effspm.cp39-win_amd64.pyd +0 -0
  2. effspm/_effspm.cpp +961 -210
  3. effspm/btminer/src/build_mdd.cpp +42 -17
  4. effspm/btminer/src/build_mdd.hpp +13 -19
  5. effspm/btminer/src/freq_miner.cpp +134 -49
  6. effspm/btminer/src/freq_miner.hpp +16 -0
  7. effspm/btminer/src/load_inst.cpp +211 -126
  8. effspm/btminer/src/load_inst.hpp +22 -4
  9. effspm/btminer/src/main.cpp +83 -0
  10. effspm/btminer/src/utility.cpp +26 -41
  11. effspm/btminer/src/utility.hpp +6 -30
  12. effspm/freq_miner.hpp +2 -1
  13. effspm/htminer/src/build_mdd.cpp +46 -124
  14. effspm/htminer/src/build_mdd.hpp +56 -49
  15. effspm/htminer/src/freq_miner.cpp +341 -307
  16. effspm/htminer/src/freq_miner.hpp +39 -40
  17. effspm/htminer/src/load_inst.cpp +287 -336
  18. effspm/htminer/src/load_inst.hpp +23 -6
  19. effspm/htminer/src/main.cpp +97 -0
  20. effspm/htminer/src/utility.cpp +38 -57
  21. effspm/htminer/src/utility.hpp +9 -64
  22. effspm/largebm/src/build_mdd.cpp +69 -110
  23. effspm/largebm/src/build_mdd.hpp +22 -37
  24. effspm/largebm/src/freq_miner.cpp +241 -291
  25. effspm/largebm/src/freq_miner.hpp +25 -36
  26. effspm/largebm/src/load_inst.cpp +20 -26
  27. effspm/largebm/src/load_inst.hpp +24 -34
  28. effspm/largebm/src/main.cpp +95 -0
  29. effspm/largebm/src/utility.cpp +11 -21
  30. effspm/largebm/src/utility.hpp +7 -10
  31. effspm/largehm/src/build_mdd.cpp +75 -110
  32. effspm/largehm/src/build_mdd.hpp +53 -73
  33. effspm/largehm/src/freq_miner.cpp +134 -191
  34. effspm/largehm/src/freq_miner.hpp +37 -60
  35. effspm/largehm/src/load_inst.cpp +137 -174
  36. effspm/largehm/src/load_inst.hpp +13 -50
  37. effspm/largehm/src/main.cpp +95 -0
  38. effspm/largehm/src/utility.cpp +46 -28
  39. effspm/largehm/src/utility.hpp +18 -16
  40. effspm/largepp/src/freq_miner.cpp +184 -156
  41. effspm/largepp/src/freq_miner.hpp +11 -36
  42. effspm/largepp/src/load_inst.cpp +32 -12
  43. effspm/largepp/src/load_inst.hpp +15 -9
  44. effspm/largepp/src/main.cpp +108 -0
  45. effspm/largepp/src/pattern.hpp +31 -0
  46. effspm/load_inst.cpp +8 -8
  47. effspm/load_inst.hpp +1 -1
  48. effspm/main.cpp +103 -0
  49. {effspm-0.2.8.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
  50. effspm-0.3.3.dist-info/RECORD +60 -0
  51. effspm-0.2.8.dist-info/RECORD +0 -53
  52. {effspm-0.2.8.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
  53. {effspm-0.2.8.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
  54. {effspm-0.2.8.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
@@ -1,349 +1,299 @@
1
- #include <vector>
2
1
  #include <algorithm>
3
- #include <iostream>
4
2
  #include <fstream>
5
- #include <ctime>
3
+ #include <iostream>
6
4
  #include <unordered_map>
7
5
  #include <unordered_set>
8
-
9
- #include "freq_miner.hpp" // must come before load_inst.hpp
6
+ #include <cstdlib> // ensure std::abs(int)
7
+ #include "freq_miner.hpp"
10
8
  #include "load_inst.hpp"
11
9
  #include "utility.hpp"
12
10
  #include "build_mdd.hpp"
13
11
 
14
12
  namespace largebm {
15
13
 
16
- // Helper declarations (must match headers exactly)
17
- static void Out_patt(const std::vector<int>& seq, unsigned long long freq);
18
- static void Extend_patt(Pattern& patt);
14
+ unsigned long long int num_patt = 0;
15
+ std::vector<bool> ilist;
16
+ std::vector<bool> slist;
17
+ std::vector<int> DFS_numfound;
18
+ Pattern _patt;
19
19
 
20
- // Globals (declared once; types must match freq_miner.hpp)
21
- unsigned long long int num_patt = 0;
22
- std::vector<bool> ilist;
23
- std::vector<bool> slist;
24
- std::vector<int> DFS_numfound;
25
- Pattern _patt;
20
+ static void Out_patt(const std::vector<int>& seq, unsigned long long freq);
21
+ static void Extend_patt(Pattern& patt);
26
22
 
27
- void Freq_miner() {
28
- // ─── RESET per‐run state ──────────────────────────────────────
29
- collected.clear();
30
- num_patt = 0;
31
- // Ensure DFS has at least L entries (so DFS[i] is valid for 0..L-1)
32
- if (static_cast<int>(DFS.size()) < static_cast<int>(L)) {
33
- DFS.resize(L);
34
- }
35
- // ─────────────────────────────────────────────────────────────
23
+ void Freq_miner() {
24
+ collected.clear();
25
+ num_patt = 0;
36
26
 
37
- std::vector<int> list;
27
+ if (static_cast<int>(DFS.size()) < static_cast<int>(L)) {
28
+ DFS.resize(L);
29
+ }
38
30
 
39
- if (use_list) {
40
- // List‐based routine
41
- std::vector<int> empty_pref;
42
- Freq_miner_list(items, empty_pref, theta, collected);
43
- return;
44
- }
31
+ std::vector<int> list;
45
32
 
46
- // MDD‐based initialization
47
- for (int i = 0; i < static_cast<int>(L); ++i) {
48
- if (DFS[i].freq >= theta) {
49
- list.push_back(-i - 1);
50
- if (itmset_exists) {
51
- list.push_back(i + 1);
52
- }
53
- }
54
- }
55
- for (size_t i = 0; i < DFS.size(); ++i) {
56
- DFS[i].list = list;
57
- }
33
+ if (use_list) {
34
+ std::vector<int> empty_pref;
35
+ Freq_miner_list(items, empty_pref, theta, collected);
36
+ return;
37
+ }
58
38
 
59
- while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
60
- if (DFS.back().freq >= theta) {
61
- Extend_patt(DFS.back());
62
- } else {
63
- DFS.pop_back();
64
- }
39
+ // seed candidates by DFS[i].freq
40
+ for (int i = 0; i < static_cast<int>(L); ++i) {
41
+ if (DFS[i].freq >= theta) {
42
+ list.push_back(-i - 1);
43
+ if (itmset_exists) list.push_back(i + 1);
65
44
  }
66
45
  }
67
46
 
68
- void Extend_patt(Pattern& _pattern) {
69
- swap(_patt, _pattern);
70
- DFS.pop_back();
71
-
72
- slist = std::vector<bool>(L, false);
73
- bool ilist_nempty = false;
47
+ for (size_t i = 0; i < DFS.size(); ++i) {
48
+ DFS[i].list = list;
49
+ }
74
50
 
75
- if (itmset_exists) {
76
- ilist = std::vector<bool>(L, false);
77
- for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
78
- if (*it < 0) {
79
- slist[-(*it) - 1] = true;
80
- } else {
81
- ilist[(*it) - 1] = true;
82
- ilist_nempty = true;
83
- }
84
- }
51
+ while (!DFS.empty() && give_time(std::clock() - start_time) < time_limit) {
52
+ if (DFS.back().freq >= theta) {
53
+ Extend_patt(DFS.back());
85
54
  } else {
86
- for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
87
- slist[-(*it) - 1] = true;
88
- }
55
+ DFS.pop_back();
89
56
  }
57
+ }
58
+ }
90
59
 
91
- int itmset_size = 1;
92
- int last_neg = static_cast<int>(_patt.seq.size()) - 1;
93
- while (_patt.seq[last_neg] > 0) {
94
- --last_neg;
95
- ++itmset_size;
96
- }
60
+ static void Extend_patt(Pattern& _pattern) {
61
+ std::swap(_patt, _pattern);
62
+ DFS.pop_back();
97
63
 
98
- std::vector<Pattern> pot_patt(L + (ilist_nempty ? L : 0));
99
- std::vector<unsigned long long int> DFS_patt_init;
100
- std::vector<unsigned long long int> DFS_patt;
101
- if (ilist_nempty) {
102
- DFS_numfound.clear();
64
+ slist = std::vector<bool>(L, false);
65
+ bool ilist_nempty = false;
66
+
67
+ if (itmset_exists) {
68
+ ilist = std::vector<bool>(L, false);
69
+ for (size_t i = 0; i < _patt.list.size(); ++i) {
70
+ int v = _patt.list[i];
71
+ if (v < 0) slist[-v - 1] = true;
72
+ else { ilist[v - 1] = true; ilist_nempty = true; }
103
73
  }
104
- std::vector<unsigned long long int> last_strpnt(L, 0);
74
+ } else {
75
+ for (size_t i = 0; i < _patt.list.size(); ++i) {
76
+ int v = _patt.list[i];
77
+ slist[-v - 1] = true;
78
+ }
79
+ }
105
80
 
106
- for (unsigned long long int pnt = 0; pnt < _patt.str_pnt.size(); ++pnt) {
107
- DFS_patt_init.push_back(_patt.str_pnt[pnt]);
108
- while (!DFS_patt_init.empty()) {
109
- unsigned long long int cur_sibl = Tree[DFS_patt_init.back()].chld;
110
- DFS_patt_init.pop_back();
111
- while (cur_sibl != 0) {
112
- int cur_itm = Tree[cur_sibl].item;
113
- if (cur_itm < 0) {
114
- cur_itm = -cur_itm;
115
- if (slist[cur_itm - 1]) {
116
- pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
117
- if (Tree[cur_sibl].chld != 0) {
118
- pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
119
- }
120
- }
121
- if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
122
- DFS_patt.push_back(cur_sibl);
123
- if (ilist_nempty) {
124
- if (cur_itm == -_patt.seq[last_neg]) {
125
- DFS_numfound.push_back(1);
126
- } else {
127
- DFS_numfound.push_back(0);
128
- }
129
- }
130
- }
131
- } else {
132
- if (ilist[cur_itm - 1]) {
133
- pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
134
- if (Tree[cur_sibl].chld != 0) {
135
- pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
136
- }
137
- }
138
- if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
139
- DFS_patt_init.push_back(cur_sibl);
81
+ int itmset_size = 1;
82
+ int last_neg = static_cast<int>(_patt.seq.size()) - 1;
83
+ while (_patt.seq[last_neg] > 0) {
84
+ --last_neg;
85
+ ++itmset_size;
86
+ }
87
+
88
+ std::vector<Pattern> pot_patt(L + (ilist_nempty ? L : 0));
89
+ std::vector<unsigned long long> DFS_patt_init;
90
+ std::vector<unsigned long long> DFS_patt;
91
+ if (ilist_nempty) DFS_numfound.clear();
92
+ std::vector<unsigned long long> last_strpnt(L, 0);
93
+
94
+ for (unsigned long long pnt = 0; pnt < _patt.str_pnt.size(); ++pnt) {
95
+ DFS_patt_init.push_back(_patt.str_pnt[pnt]);
96
+ while (!DFS_patt_init.empty()) {
97
+ unsigned long long cur_sibl = Tree[DFS_patt_init.back()].chld;
98
+ DFS_patt_init.pop_back();
99
+ while (cur_sibl != 0) {
100
+ int cur_itm = Tree[cur_sibl].item;
101
+ if (cur_itm < 0) {
102
+ cur_itm = -cur_itm;
103
+ if (slist[cur_itm - 1]) {
104
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
105
+ if (Tree[cur_sibl].chld != 0)
106
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
107
+ }
108
+ if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
109
+ DFS_patt.push_back(cur_sibl);
110
+ if (ilist_nempty) {
111
+ DFS_numfound.push_back(cur_itm == -_patt.seq[last_neg] ? 1 : 0);
140
112
  }
141
113
  }
142
- cur_sibl = Tree[cur_sibl].sibl;
143
- }
144
- }
145
- if (ilist_nempty) {
146
- for (int i = 0; i < static_cast<int>(L); ++i) {
147
- if (ilist[i]) {
148
- last_strpnt[i] = pot_patt[i + L].str_pnt.size();
114
+ } else {
115
+ if (ilist[cur_itm - 1]) {
116
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
117
+ if (Tree[cur_sibl].chld != 0)
118
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
149
119
  }
120
+ if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1))
121
+ DFS_patt_init.push_back(cur_sibl);
150
122
  }
123
+ cur_sibl = Tree[cur_sibl].sibl;
151
124
  }
152
- while (!DFS_patt.empty()) {
153
- unsigned long long int cur_sibl = Tree[DFS_patt.back()].chld;
154
- DFS_patt.pop_back();
155
- int num_found = 0;
156
- if (ilist_nempty) {
157
- num_found = DFS_numfound.back();
158
- DFS_numfound.pop_back();
159
- }
160
- while (cur_sibl != 0) {
161
- int cur_itm = Tree[cur_sibl].item;
162
- if (cur_itm > 0) {
163
- if (num_found == itmset_size &&
164
- ilist[cur_itm - 1] &&
165
- (Tree[Tree[cur_sibl].anct].itmset < Tree[_patt.str_pnt[pnt]].itmset ||
166
- !check_parent(cur_sibl, _patt.str_pnt[pnt],
167
- last_strpnt[cur_itm - 1],
168
- pot_patt[cur_itm + L - 1].str_pnt))) {
169
- pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
170
- if (Tree[cur_sibl].chld != 0) {
171
- pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
172
- }
173
- }
174
- if (slist[cur_itm - 1] &&
175
- Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
176
- pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
177
- if (Tree[cur_sibl].chld != 0) {
178
- pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
179
- }
180
- }
181
- if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
182
- DFS_patt.push_back(cur_sibl);
183
- if (ilist_nempty) {
184
- if (num_found < itmset_size &&
185
- cur_itm == std::abs(_patt.seq[last_neg + num_found])) {
186
- DFS_numfound.push_back(num_found + 1);
187
- } else {
188
- DFS_numfound.push_back(num_found);
189
- }
190
- }
191
- }
192
- } else {
193
- cur_itm = -cur_itm;
194
- if (slist[cur_itm - 1] &&
195
- Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
196
- pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
197
- if (Tree[cur_sibl].chld != 0) {
198
- pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
125
+ }
126
+ if (ilist_nempty) {
127
+ for (int i = 0; i < static_cast<int>(L); ++i) {
128
+ if (ilist[i]) last_strpnt[i] = pot_patt[i + L].str_pnt.size();
129
+ }
130
+ }
131
+ while (!DFS_patt.empty()) {
132
+ unsigned long long cur_sibl = Tree[DFS_patt.back()].chld;
133
+ DFS_patt.pop_back();
134
+ int num_found = 0;
135
+ if (ilist_nempty) { num_found = DFS_numfound.back(); DFS_numfound.pop_back(); }
136
+ while (cur_sibl != 0) {
137
+ int cur_itm = Tree[cur_sibl].item;
138
+ if (cur_itm > 0) {
139
+ if (num_found == itmset_size &&
140
+ ilist[cur_itm - 1] &&
141
+ (Tree[Tree[cur_sibl].anct].itmset < Tree[_patt.str_pnt[pnt]].itmset ||
142
+ !check_parent(cur_sibl, _patt.str_pnt[pnt],
143
+ last_strpnt[cur_itm - 1],
144
+ pot_patt[cur_itm + L - 1].str_pnt))) {
145
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
146
+ if (Tree[cur_sibl].chld != 0)
147
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
148
+ }
149
+ if (slist[cur_itm - 1] &&
150
+ Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
151
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
152
+ if (Tree[cur_sibl].chld != 0)
153
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
154
+ }
155
+ if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
156
+ DFS_patt.push_back(cur_sibl);
157
+ if (ilist_nempty) {
158
+ if (num_found < itmset_size &&
159
+ cur_itm == std::abs(_patt.seq[last_neg + num_found])) {
160
+ DFS_numfound.push_back(num_found + 1);
161
+ } else {
162
+ DFS_numfound.push_back(num_found);
199
163
  }
200
164
  }
201
- if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
202
- DFS_patt.push_back(cur_sibl);
203
- if (ilist_nempty) {
204
- if (cur_itm == -_patt.seq[last_neg]) {
205
- DFS_numfound.push_back(1);
206
- } else {
207
- DFS_numfound.push_back(0);
208
- }
209
- }
165
+ }
166
+ } else {
167
+ cur_itm = -cur_itm;
168
+ if (slist[cur_itm - 1] &&
169
+ Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
170
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
171
+ if (Tree[cur_sibl].chld != 0)
172
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
173
+ }
174
+ if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
175
+ DFS_patt.push_back(cur_sibl);
176
+ if (ilist_nempty) {
177
+ DFS_numfound.push_back(cur_itm == -_patt.seq[last_neg] ? 1 : 0);
210
178
  }
211
179
  }
212
- cur_sibl = Tree[cur_sibl].sibl;
213
180
  }
181
+ cur_sibl = Tree[cur_sibl].sibl;
214
182
  }
215
183
  }
184
+ }
216
185
 
217
- std::vector<int> ilistp;
218
- std::vector<int> slistp;
219
- for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
220
- int idx = (*it < 0) ? (-(*it) - 1) : ((*it) - 1 + static_cast<int>(L));
221
- if (*it > 0 && pot_patt[idx].freq >= theta) {
222
- ilistp.push_back(*it);
223
- } else if (*it < 0 && pot_patt[-(*it) - 1].freq >= theta) {
224
- if (itmset_exists) {
225
- slistp.push_back(-(*it));
226
- }
227
- ilistp.push_back(*it);
228
- slistp.push_back(*it);
229
- }
230
- }
231
-
232
- for (auto it = ilistp.begin(); it != ilistp.end(); ++it) {
233
- int p;
234
- if (*it < 0) {
235
- p = -(*it) - 1;
236
- } else {
237
- p = (*it) - 1 + static_cast<int>(L);
238
- }
239
-
240
- DFS.emplace_back();
241
- swap(DFS.back(), pot_patt[p]);
242
- DFS.back().seq = _patt.seq;
243
- DFS.back().seq.push_back(*it);
244
- if (*it < 0) {
245
- DFS.back().list = slistp;
246
- } else {
247
- DFS.back().list = ilistp;
248
- }
249
- if (b_disp || b_write) {
250
- Out_patt(DFS.back().seq, DFS.back().freq);
251
- }
252
- ++num_patt;
186
+ std::vector<int> ilistp;
187
+ std::vector<int> slistp;
188
+ for (size_t i = 0; i < _patt.list.size(); ++i) {
189
+ int v = _patt.list[i];
190
+ int idx = (v < 0) ? (-v - 1) : (v - 1 + static_cast<int>(L));
191
+ if (v > 0 && pot_patt[idx].freq >= theta) {
192
+ ilistp.push_back(v);
193
+ } else if (v < 0 && pot_patt[-v - 1].freq >= theta) {
194
+ if (itmset_exists) slistp.push_back(-v);
195
+ ilistp.push_back(v);
196
+ slistp.push_back(v);
253
197
  }
254
198
  }
255
199
 
256
- void Out_patt(const std::vector<int>& seq, unsigned long long freq) {
257
- if (b_disp || b_write) {
258
- std::ofstream file_o;
259
- if (b_write) {
260
- file_o.open(out_file, std::ios::app);
261
- }
262
- for (int v : seq) {
263
- if (b_disp) std::cout << v << ' ';
264
- if (b_write) file_o << v << ' ';
265
- }
266
- if (b_disp) std::cout << '\n';
267
- if (b_write) file_o << '\n';
200
+ for (size_t i = 0; i < ilistp.size(); ++i) {
201
+ int v = ilistp[i];
202
+ int p = (v < 0) ? (-v - 1) : (v - 1 + static_cast<int>(L));
203
+ DFS.emplace_back();
204
+ std::swap(DFS.back(), pot_patt[p]);
205
+ DFS.back().seq = _patt.seq;
206
+ DFS.back().seq.push_back(v);
207
+ DFS.back().list = (v < 0) ? slistp : ilistp;
208
+ Out_patt(DFS.back().seq, DFS.back().freq);
209
+ ++num_patt;
210
+ }
211
+ }
268
212
 
269
- if (b_disp) {
270
- std::cout << "************** Freq: " << freq << '\n';
271
- }
272
- if (b_write) {
273
- file_o << "************** Freq: " << freq << '\n';
274
- file_o.close();
275
- }
276
- }
213
+ static void Out_patt(const std::vector<int>& seq, unsigned long long freq) {
214
+ if (!(b_disp || b_write)) {
277
215
  collected.push_back(seq);
216
+ return;
278
217
  }
218
+ std::ofstream file_o;
219
+ if (b_write) file_o.open(out_file, std::ios::app);
279
220
 
280
- void Freq_miner_list(const std::vector<std::vector<int>>& db,
281
- std::vector<int>& prefix,
282
- unsigned long long minsup,
283
- std::vector<std::vector<int>>& out) {
284
- // 1) count single‐item support (one count per sequence)
285
- std::unordered_map<int, unsigned long long> freq;
286
- for (auto const& seq : db) {
287
- std::unordered_set<int> seen;
288
- for (int x : seq) {
289
- if (seen.insert(x).second) {
290
- ++freq[x];
291
- }
292
- }
293
- }
221
+ for (size_t i = 0; i < seq.size(); ++i) {
222
+ int v = seq[i];
223
+ if (b_disp) std::cout << v << ' ';
224
+ if (b_write) file_o << v << ' ';
225
+ }
226
+ if (b_disp) std::cout << '\n';
227
+ if (b_write) file_o << '\n';
294
228
 
295
- // 2) collect the frequent candidates
296
- std::vector<std::pair<int, unsigned long long>> cand;
297
- cand.reserve(freq.size());
298
- for (auto& p : freq) {
299
- if (p.second >= minsup) {
300
- cand.emplace_back(p.first, p.second);
301
- }
229
+ if (b_disp) std::cout << "************** Freq: " << freq << '\n';
230
+ if (b_write) {
231
+ file_o << "************** Freq: " << freq << '\n';
232
+ file_o.close();
233
+ }
234
+ collected.push_back(seq);
235
+ }
236
+
237
+ void Freq_miner_list(const std::vector<std::vector<int>>& db,
238
+ std::vector<int>& prefix,
239
+ unsigned long long minsup,
240
+ std::vector<std::vector<int>>& out) {
241
+ std::unordered_map<int, unsigned long long> freq;
242
+ for (size_t sidx = 0; sidx < db.size(); ++sidx) {
243
+ const std::vector<int>& seq = db[sidx];
244
+ std::unordered_set<int> seen;
245
+ for (size_t i = 0; i < seq.size(); ++i) {
246
+ int x = seq[i];
247
+ if (seen.insert(x).second) ++freq[x];
302
248
  }
249
+ }
303
250
 
304
- // 3) sort by absolute item ID
305
- std::sort(cand.begin(), cand.end(),
306
- [](const std::pair<int, unsigned long long>& a,
307
- const std::pair<int, unsigned long long>& b) {
308
- return std::abs(a.first) < std::abs(b.first);
309
- });
251
+ std::vector<std::pair<int, unsigned long long> > cand;
252
+ cand.reserve(freq.size());
253
+ for (std::unordered_map<int, unsigned long long>::iterator it = freq.begin();
254
+ it != freq.end(); ++it) {
255
+ if (it->second >= minsup) cand.push_back(*it);
256
+ }
310
257
 
311
- // 4) depth‐first enumerate them
312
- for (auto const& pr : cand) {
313
- int item = pr.first;
314
- prefix.push_back(item);
258
+ std::sort(cand.begin(), cand.end(),
259
+ [](const std::pair<int, unsigned long long>& a,
260
+ const std::pair<int, unsigned long long>& b) {
261
+ return std::abs(a.first) < std::abs(b.first);
262
+ });
315
263
 
316
- if (use_dic) {
317
- // “un‐compress” each pattern back to original IDs
318
- std::vector<int> unmapped;
319
- unmapped.reserve(prefix.size());
320
- for (int cid : prefix) {
321
- int abs_id = std::abs(cid);
322
- int o = inv_item_dic[abs_id];
323
- unmapped.push_back(cid < 0 ? -o : o);
324
- }
325
- out.push_back(std::move(unmapped));
326
- } else {
327
- // just store the raw prefix
328
- out.push_back(prefix);
329
- }
264
+ for (size_t k = 0; k < cand.size(); ++k) {
265
+ int item = cand[k].first;
266
+ prefix.push_back(item);
330
267
 
331
- // 5) project on the *first* occurrence of `item`
332
- std::vector<std::vector<int>> proj;
333
- proj.reserve(db.size());
334
- for (auto const& seq : db) {
335
- auto it = std::find(seq.begin(), seq.end(), item);
336
- if (it != seq.end() && ++it != seq.end()) {
337
- proj.emplace_back(it, seq.end());
338
- }
268
+ if (use_dic) {
269
+ std::vector<int> unmapped;
270
+ unmapped.reserve(prefix.size());
271
+ for (size_t i = 0; i < prefix.size(); ++i) {
272
+ int cid = prefix[i];
273
+ int abs_id = std::abs(cid);
274
+ int o = inv_item_dic[abs_id];
275
+ unmapped.push_back(cid < 0 ? -o : o);
339
276
  }
277
+ out.push_back(unmapped);
278
+ } else {
279
+ out.push_back(prefix);
280
+ }
340
281
 
341
- if (!proj.empty()) {
342
- Freq_miner_list(proj, prefix, minsup, out);
282
+ std::vector<std::vector<int> > proj;
283
+ proj.reserve(db.size());
284
+ for (size_t s = 0; s < db.size(); ++s) {
285
+ const std::vector<int>& svec = db[s];
286
+ std::vector<int>::const_iterator it =
287
+ std::find(svec.begin(), svec.end(), item);
288
+ if (it != svec.end()) {
289
+ ++it;
290
+ if (it != svec.end()) proj.push_back(std::vector<int>(it, svec.end()));
343
291
  }
344
-
345
- prefix.pop_back();
346
292
  }
293
+
294
+ if (!proj.empty()) Freq_miner_list(proj, prefix, minsup, out);
295
+ prefix.pop_back();
347
296
  }
297
+ }
348
298
 
349
- } // namespace largebm
299
+ } // namespace largebm