effspm 0.1.11__cp313-cp313-macosx_10_13_universal2.whl → 0.2.1__cp313-cp313-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of effspm might be problematic. Click here for more details.

Files changed (48) hide show
  1. effspm/__init__.py +3 -3
  2. effspm/_effspm.cpp +437 -13
  3. effspm/_effspm.cpython-313-darwin.so +0 -0
  4. effspm/btminer/src/freq_miner.cpp +3 -0
  5. effspm/btminer/src/load_inst.cpp +10 -4
  6. effspm/btminer/src/load_inst.hpp +2 -0
  7. effspm/btminer/src/utility.cpp +31 -33
  8. effspm/btminer/src/utility.hpp +9 -13
  9. effspm/htminer/src/build_mdd.cpp +192 -0
  10. effspm/htminer/src/build_mdd.hpp +64 -0
  11. effspm/htminer/src/freq_miner.cpp +350 -0
  12. effspm/htminer/src/freq_miner.hpp +60 -0
  13. effspm/htminer/src/load_inst.cpp +381 -0
  14. effspm/htminer/src/load_inst.hpp +23 -0
  15. effspm/htminer/src/main.cpp +96 -0
  16. effspm/htminer/src/utility.cpp +72 -0
  17. effspm/htminer/src/utility.hpp +77 -0
  18. effspm/largebm/src/build_mdd.cpp +137 -0
  19. effspm/largebm/src/build_mdd.hpp +47 -0
  20. effspm/largebm/src/freq_miner.cpp +342 -0
  21. effspm/largebm/src/freq_miner.hpp +48 -0
  22. effspm/largebm/src/load_inst.cpp +235 -0
  23. effspm/largebm/src/load_inst.hpp +45 -0
  24. effspm/largebm/src/main.cpp +95 -0
  25. effspm/largebm/src/utility.cpp +45 -0
  26. effspm/largebm/src/utility.hpp +18 -0
  27. effspm/largehm/src/build_mdd.cpp +173 -0
  28. effspm/largehm/src/build_mdd.hpp +93 -0
  29. effspm/largehm/src/freq_miner.cpp +441 -0
  30. effspm/largehm/src/freq_miner.hpp +77 -0
  31. effspm/largehm/src/load_inst.cpp +357 -0
  32. effspm/largehm/src/load_inst.hpp +64 -0
  33. effspm/largehm/src/main.cpp +95 -0
  34. effspm/largehm/src/utility.cpp +38 -0
  35. effspm/largehm/src/utility.hpp +29 -0
  36. effspm/largepp/src/freq_miner.cpp +170 -0
  37. effspm/largepp/src/freq_miner.hpp +43 -0
  38. effspm/largepp/src/load_inst.cpp +219 -0
  39. effspm/largepp/src/load_inst.hpp +28 -0
  40. effspm/largepp/src/main.cpp +108 -0
  41. effspm/largepp/src/utility.cpp +33 -0
  42. effspm/largepp/src/utility.hpp +20 -0
  43. {effspm-0.1.11.dist-info → effspm-0.2.1.dist-info}/METADATA +1 -1
  44. effspm-0.2.1.dist-info/RECORD +59 -0
  45. {effspm-0.1.11.dist-info → effspm-0.2.1.dist-info}/WHEEL +1 -1
  46. effspm-0.1.11.dist-info/RECORD +0 -25
  47. {effspm-0.1.11.dist-info → effspm-0.2.1.dist-info}/licenses/LICENSE +0 -0
  48. {effspm-0.1.11.dist-info → effspm-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,77 @@
1
+ #pragma once
2
+
3
+ #include <vector>
4
+ #include <ctime>
5
+ #include <string>
6
+ #include "build_mdd.hpp"
7
+ #include "freq_miner.hpp"
8
+ #include "load_inst.hpp"
9
+
10
+ namespace htminer {
11
+
12
+ // ─── Global flags and counters ─────────────────────────────────────────────
13
+ /// Controls whether to mine in “list” mode (unused for HTMiner, but declared)
14
+ extern bool use_list;
15
+ /// If true, only build MDD and exit (don’t actually mine)
16
+ extern bool just_build;
17
+ /// If true, print each pattern to stdout as it’s found
18
+ extern bool b_disp;
19
+ /// If true, write each pattern to file (see out_file)
20
+ extern bool b_write;
21
+ /// If true, use a dictionary‐file mapping items → new IDs
22
+ extern bool use_dic;
23
+ /// If true, preprocess input (create dictionary) instead of mining
24
+ extern bool pre_pro;
25
+
26
+ /// Time limit (in seconds) for mining before forced exit
27
+ extern unsigned int time_limit;
28
+ /// Output filename (if b_write is true)
29
+ extern std::string out_file;
30
+ /// Clock tick when mining started
31
+ extern std::clock_t start_time;
32
+
33
+ // ─── Data‐set‐level globals ─────────────────────────────────────────────────
34
+ /// The input sequences (each sequence is a vector of integers)
35
+ extern std::vector<std::vector<int>> items;
36
+ /// Number of sequences (items.size())
37
+ extern unsigned long long N;
38
+ /// Number of distinct items (max absolute item ID)
39
+ extern unsigned long long L;
40
+ /// Minimum support threshold (absolute count, not fraction)
41
+ extern unsigned long long theta;
42
+ /// Maximum sequence length across all items
43
+ extern unsigned int M;
44
+ /// Total number of “entries” (sum of all sequence lengths)
45
+ extern unsigned long long E;
46
+
47
+ // ─── Per‐pattern DFS stacks ─────────────────────────────────────────────────
48
+ /// DFS stack of “in‐memory” patterns (each Pattern holds its own ilist/slist, freq, str_pnt, etc.)
49
+ extern std::vector<Pattern> DFS;
50
+
51
+ extern std::vector<std::vector<int>> collectedPatterns;
52
+ // ─── Collected output ───────────────────────────────────────────────────────
53
+ /// Clears any patterns left in DFS (called at the start of each run)
54
+ inline void ClearCollected() {
55
+ DFS.clear();
56
+ collectedPatterns.clear();
57
+ }
58
+ /// Returns a reference to the entire “collected patterns” vector
59
+ /// (each Pattern knows how to output itself as a vector<int>)
60
+ const std::vector<std::vector<int>>& GetCollected();
61
+
62
+ // ─── Helper functions ───────────────────────────────────────────────────────
63
+ /// Given a clock‐tick difference, return elapsed seconds as a float
64
+ float give_time(std::clock_t kk);
65
+
66
+ /// Check whether a candidate can extend its parent pattern:
67
+ /// cur_arc = current Arc node ID in MDD
68
+ /// str_pnt = string‐pointer of the existing pattern
69
+ /// start = starting index within the MDD arc‐list
70
+ /// strpnt_vec = parent’s “string pointers” vector
71
+ /// Returns true if `cur_arc` is a valid child of `str_pnt` from position `start`.
72
+ bool check_parent(unsigned int cur_arc,
73
+ unsigned int str_pnt,
74
+ unsigned int start,
75
+ std::vector<unsigned int>& strpnt_vec);
76
+
77
+ } // namespace htminer
@@ -0,0 +1,137 @@
1
+ // File: effspm/largebm/src/load_inst.cpp
2
+
3
+ #include <vector>
4
+ #include <iostream>
5
+ #include <unordered_map>
6
+ #include "load_inst.hpp"
7
+ #include "build_mdd.hpp"
8
+ #include "freq_miner.hpp"
9
+ #include "utility.hpp"
10
+
11
+ namespace largebm {
12
+
13
+ // Forward declaration for Add_arc
14
+ int Add_arc(int item, unsigned long long int last_arc, int& itmset,
15
+ std::unordered_map<int, unsigned long long int>& ancest_map);
16
+
17
+ // Global MDD tree and other globals (declared in headers)
18
+ std::vector<Arc> Tree;
19
+
20
+ void Build_MDD(std::vector<int>& items) {
21
+ std::unordered_map<int, unsigned long long int> ancest_map;
22
+ unsigned long long int last_arc = 0;
23
+ int itmset = 0;
24
+
25
+ for (auto it = items.begin(); it != items.end(); ++it) {
26
+ last_arc = Add_arc(*it, last_arc, itmset, ancest_map);
27
+ }
28
+ }
29
+
30
+
31
+ int Add_arc(int item, unsigned long long int last_arc, int& itmset,
32
+ std::unordered_map<int, unsigned long long int>& ancest_map) {
33
+
34
+ unsigned idx = std::abs(item) - 1;
35
+
36
+ // ─── DEBUG ────────────────────────────────────────────────
37
+ // std::cout << "[Add_arc] item=" << item
38
+ // << " idx=" << idx
39
+ // << " last_arc=" << last_arc
40
+ // << " Tree.size=" << Tree.size()
41
+ // << " DFS.size=" << DFS.size()
42
+ // << std::endl;
43
+
44
+ // Ensure DFS can hold this index
45
+ if (idx >= DFS.size()) {
46
+ // std::cout << "[Add_arc] • resizing DFS to " << (idx + 1) << std::endl;
47
+ DFS.reserve(idx + 1);
48
+ while (DFS.size() <= idx) {
49
+ DFS.emplace_back(-static_cast<int>(DFS.size()) - 1); // Pattern(-id)
50
+ }
51
+ }
52
+
53
+ unsigned long long int anct;
54
+ auto p = ancest_map.find(std::abs(item));
55
+ if (p == ancest_map.end()) {
56
+ anct = 0;
57
+ } else {
58
+ anct = p->second;
59
+ }
60
+
61
+ if (item < 0) {
62
+ ++itmset;
63
+ }
64
+
65
+ // Before accessing Tree[last_arc].chld, check bounds
66
+ if (last_arc >= Tree.size()) {
67
+ // std::cout << "[Add_arc] !!! last_arc OOB last_arc="
68
+ // << last_arc << " Tree.size=" << Tree.size()
69
+ // << std::endl;
70
+ // We still proceed so we can see crash context:
71
+ }
72
+
73
+ unsigned long long int last_sibl = 0;
74
+ if (last_arc < Tree.size()) {
75
+ last_sibl = Tree[last_arc].chld;
76
+ }
77
+
78
+ if (last_sibl == 0) {
79
+ // Insert new node as first child
80
+ Tree.emplace_back(item, itmset, anct);
81
+ last_sibl = Tree.size() - 1;
82
+
83
+ if (last_arc < Tree.size()) {
84
+ Tree[last_arc].chld = last_sibl;
85
+ }
86
+ if (anct == 0) {
87
+ // Debug before DFS access
88
+ // std::cout << "[Add_arc] • DFS access at index=" << (std::abs(item) - 1)
89
+ // << " DFS.size=" << DFS.size() << std::endl;
90
+ DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
91
+ }
92
+
93
+ } else {
94
+
95
+ // Walk siblings until find matching item or end
96
+ while (true) {
97
+ if (last_sibl >= Tree.size()) {
98
+ // std::cout << "[Add_arc] !!! last_sibl OOB last_sibl="
99
+ // << last_sibl << " Tree.size=" << Tree.size()
100
+ // << std::endl;
101
+ break;
102
+ }
103
+ if (Tree[last_sibl].item == item) {
104
+ break;
105
+ }
106
+ if (Tree[last_sibl].sibl == 0) {
107
+ Tree.emplace_back(item, itmset, anct);
108
+ Tree[last_sibl].sibl = Tree.size() - 1;
109
+ last_sibl = Tree.size() - 1;
110
+ if (anct == 0) {
111
+ // std::cout << "[Add_arc] • DFS access at index=" << (std::abs(item) - 1)
112
+ // << " DFS.size=" << DFS.size() << std::endl;
113
+ DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
114
+ }
115
+ break;
116
+ }
117
+ last_sibl = Tree[last_sibl].sibl;
118
+ }
119
+ }
120
+
121
+ if (anct == 0) {
122
+ // std::cout << "[Add_arc] • increment DFS.freq at index=" << (std::abs(item) - 1)
123
+ // << " DFS.size=" << DFS.size() << std::endl;
124
+ DFS[std::abs(item) - 1].freq++;
125
+ }
126
+
127
+ if (last_sibl < Tree.size()) {
128
+ // std::cout << "[Add_arc] • increment Tree.freq at node=" << last_sibl
129
+ // << " Tree.size=" << Tree.size() << std::endl;
130
+ Tree[last_sibl].freq++;
131
+ }
132
+
133
+ ancest_map[std::abs(item)] = last_sibl;
134
+ return last_sibl;
135
+ }
136
+
137
+ } // namespace largebm
@@ -0,0 +1,47 @@
1
+ #pragma once
2
+
3
+ #include<vector>
4
+ #include <cmath>
5
+ #include "load_inst.hpp"
6
+
7
+ namespace largebm {
8
+ void Build_MDD(std::vector<int>& items);
9
+
10
+ class Arc {
11
+ public:
12
+
13
+ unsigned long long int chld;
14
+ unsigned long long int sibl;
15
+ unsigned long long int freq;
16
+ unsigned long long int anct;
17
+ int itmset;
18
+ int item;
19
+
20
+ Arc(int _itm, int _itmset, unsigned long long int _anc) {
21
+ itmset = _itmset;
22
+ anct = _anc;
23
+ item = _itm;
24
+ freq = 0;
25
+ chld = 0;
26
+ sibl = 0;
27
+ }
28
+
29
+ Arc(int _itm, int _anc) {
30
+ item = _itm;
31
+ anct = _anc;
32
+ freq = 0;
33
+ chld = 0;
34
+ sibl = 0;
35
+ }
36
+
37
+ Arc() {
38
+ freq = 0;
39
+ chld = 0;
40
+ sibl = 0;
41
+ }
42
+
43
+
44
+ };
45
+
46
+ extern std::vector<Arc> Tree;
47
+ }
@@ -0,0 +1,342 @@
1
+ // File: effspm/largebm/src/freq_miner.cpp
2
+
3
+ #include <vector>
4
+ #include <algorithm>
5
+ #include <iostream>
6
+ #include <fstream>
7
+ #include <ctime>
8
+ #include <unordered_map>
9
+ #include <unordered_set>
10
+
11
+ #include "freq_miner.hpp" // must come before load_inst.hpp
12
+ #include "load_inst.hpp"
13
+ #include "utility.hpp"
14
+ #include "build_mdd.hpp"
15
+
16
+ namespace largebm {
17
+
18
+ // Helper declarations (must match headers exactly)
19
+ static void Out_patt(const std::vector<int>& seq, unsigned long long freq);
20
+ static void Extend_patt(Pattern& patt);
21
+
22
+ // Globals (declared once; types must match freq_miner.hpp)
23
+ unsigned long long int num_patt = 0;
24
+ std::vector<bool> ilist;
25
+ std::vector<bool> slist;
26
+ std::vector<int> DFS_numfound;
27
+ Pattern _patt;
28
+
29
+ void Freq_miner() {
30
+ std::vector<int> list;
31
+
32
+ if (use_list) {
33
+ // List-based routine
34
+ std::vector<int> empty_pref;
35
+ Freq_miner_list(items, empty_pref, theta, collected);
36
+ return;
37
+ }
38
+
39
+ // MDD-based initialization
40
+ for (int i = 0; i < static_cast<int>(L); ++i) {
41
+ if (DFS[i].freq >= theta) {
42
+ list.push_back(-i - 1);
43
+ if (itmset_exists) {
44
+ list.push_back(i + 1);
45
+ }
46
+ }
47
+ }
48
+ for (size_t i = 0; i < DFS.size(); ++i) {
49
+ DFS[i].list = list;
50
+ }
51
+
52
+ while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
53
+ if (DFS.back().freq >= theta) {
54
+ Extend_patt(DFS.back());
55
+ } else {
56
+ DFS.pop_back();
57
+ }
58
+ }
59
+ }
60
+
61
+ void Extend_patt(Pattern& _pattern) {
62
+ swap(_patt, _pattern);
63
+ DFS.pop_back();
64
+
65
+ slist = std::vector<bool>(L, false);
66
+ bool ilist_nempty = false;
67
+
68
+ if (itmset_exists) {
69
+ ilist = std::vector<bool>(L, false);
70
+ for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
71
+ if (*it < 0) {
72
+ slist[-(*it) - 1] = true;
73
+ } else {
74
+ ilist[(*it) - 1] = true;
75
+ ilist_nempty = true;
76
+ }
77
+ }
78
+ } else {
79
+ for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
80
+ slist[-(*it) - 1] = true;
81
+ }
82
+ }
83
+
84
+ int itmset_size = 1;
85
+ int last_neg = static_cast<int>(_patt.seq.size()) - 1;
86
+ while (_patt.seq[last_neg] > 0) {
87
+ --last_neg;
88
+ ++itmset_size;
89
+ }
90
+
91
+ std::vector<Pattern> pot_patt(L + (ilist_nempty ? L : 0));
92
+ std::vector<unsigned long long int> DFS_patt_init;
93
+ std::vector<unsigned long long int> DFS_patt;
94
+ if (ilist_nempty) {
95
+ DFS_numfound.clear();
96
+ }
97
+ std::vector<unsigned long long int> last_strpnt(L, 0);
98
+
99
+ for (unsigned long long int pnt = 0; pnt < _patt.str_pnt.size(); ++pnt) {
100
+ DFS_patt_init.push_back(_patt.str_pnt[pnt]);
101
+ while (!DFS_patt_init.empty()) {
102
+ unsigned long long int cur_sibl = Tree[DFS_patt_init.back()].chld;
103
+ DFS_patt_init.pop_back();
104
+ while (cur_sibl != 0) {
105
+ int cur_itm = Tree[cur_sibl].item;
106
+ if (cur_itm < 0) {
107
+ cur_itm = -cur_itm;
108
+ if (slist[cur_itm - 1]) {
109
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
110
+ if (Tree[cur_sibl].chld != 0) {
111
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
112
+ }
113
+ }
114
+ if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
115
+ DFS_patt.push_back(cur_sibl);
116
+ if (ilist_nempty) {
117
+ if (cur_itm == -_patt.seq[last_neg]) {
118
+ DFS_numfound.push_back(1);
119
+ } else {
120
+ DFS_numfound.push_back(0);
121
+ }
122
+ }
123
+ }
124
+ } else {
125
+ if (ilist[cur_itm - 1]) {
126
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
127
+ if (Tree[cur_sibl].chld != 0) {
128
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
129
+ }
130
+ }
131
+ if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
132
+ DFS_patt_init.push_back(cur_sibl);
133
+ }
134
+ }
135
+ cur_sibl = Tree[cur_sibl].sibl;
136
+ }
137
+ }
138
+ if (ilist_nempty) {
139
+ for (int i = 0; i < static_cast<int>(L); ++i) {
140
+ if (ilist[i]) {
141
+ last_strpnt[i] = pot_patt[i + L].str_pnt.size();
142
+ }
143
+ }
144
+ }
145
+ while (!DFS_patt.empty()) {
146
+ unsigned long long int cur_sibl = Tree[DFS_patt.back()].chld;
147
+ DFS_patt.pop_back();
148
+ int num_found = 0;
149
+ if (ilist_nempty) {
150
+ num_found = DFS_numfound.back();
151
+ DFS_numfound.pop_back();
152
+ }
153
+ while (cur_sibl != 0) {
154
+ int cur_itm = Tree[cur_sibl].item;
155
+ if (cur_itm > 0) {
156
+ if (num_found == itmset_size &&
157
+ ilist[cur_itm - 1] &&
158
+ (Tree[Tree[cur_sibl].anct].itmset < Tree[_patt.str_pnt[pnt]].itmset ||
159
+ !check_parent(cur_sibl, _patt.str_pnt[pnt],
160
+ last_strpnt[cur_itm - 1],
161
+ pot_patt[cur_itm + L - 1].str_pnt))) {
162
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
163
+ if (Tree[cur_sibl].chld != 0) {
164
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
165
+ }
166
+ }
167
+ if (slist[cur_itm - 1] &&
168
+ Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
169
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
170
+ if (Tree[cur_sibl].chld != 0) {
171
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
172
+ }
173
+ }
174
+ if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
175
+ DFS_patt.push_back(cur_sibl);
176
+ if (ilist_nempty) {
177
+ if (num_found < itmset_size &&
178
+ cur_itm == std::abs(_patt.seq[last_neg + num_found])) {
179
+ DFS_numfound.push_back(num_found + 1);
180
+ } else {
181
+ DFS_numfound.push_back(num_found);
182
+ }
183
+ }
184
+ }
185
+ } else {
186
+ cur_itm = -cur_itm;
187
+ if (slist[cur_itm - 1] &&
188
+ Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
189
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
190
+ if (Tree[cur_sibl].chld != 0) {
191
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
192
+ }
193
+ }
194
+ if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
195
+ DFS_patt.push_back(cur_sibl);
196
+ if (ilist_nempty) {
197
+ if (cur_itm == -_patt.seq[last_neg]) {
198
+ DFS_numfound.push_back(1);
199
+ } else {
200
+ DFS_numfound.push_back(0);
201
+ }
202
+ }
203
+ }
204
+ }
205
+ cur_sibl = Tree[cur_sibl].sibl;
206
+ }
207
+ }
208
+ }
209
+
210
+ std::vector<int> ilistp;
211
+ std::vector<int> slistp;
212
+ for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
213
+ int idx = (*it < 0) ? (-(*it) - 1) : ((*it) - 1 + static_cast<int>(L));
214
+ if (*it > 0 && pot_patt[idx].freq >= theta) {
215
+ ilistp.push_back(*it);
216
+ } else if (*it < 0 && pot_patt[-(*it) - 1].freq >= theta) {
217
+ if (itmset_exists) {
218
+ slistp.push_back(-(*it));
219
+ }
220
+ ilistp.push_back(*it);
221
+ slistp.push_back(*it);
222
+ }
223
+ }
224
+
225
+ for (auto it = ilistp.begin(); it != ilistp.end(); ++it) {
226
+ int p;
227
+ if (*it < 0) {
228
+ p = -(*it) - 1;
229
+ } else {
230
+ p = (*it) - 1 + static_cast<int>(L);
231
+ }
232
+
233
+ DFS.emplace_back();
234
+ swap(DFS.back(), pot_patt[p]);
235
+ DFS.back().seq = _patt.seq;
236
+ DFS.back().seq.push_back(*it);
237
+ if (*it < 0) {
238
+ DFS.back().list = slistp;
239
+ } else {
240
+ DFS.back().list = ilistp;
241
+ }
242
+ if (b_disp || b_write) {
243
+ Out_patt(DFS.back().seq, DFS.back().freq);
244
+ }
245
+ ++num_patt;
246
+ }
247
+ }
248
+
249
+ void Out_patt(const std::vector<int>& seq, unsigned long long freq) {
250
+ if (b_disp || b_write) {
251
+ std::ofstream file_o;
252
+ if (b_write) {
253
+ file_o.open(out_file, std::ios::app);
254
+ }
255
+ for (int v : seq) {
256
+ if (b_disp) std::cout << v << ' ';
257
+ if (b_write) file_o << v << ' ';
258
+ }
259
+ if (b_disp) std::cout << '\n';
260
+ if (b_write) file_o << '\n';
261
+
262
+ if (b_disp) {
263
+ std::cout << "************** Freq: " << freq << '\n';
264
+ }
265
+ if (b_write) {
266
+ file_o << "************** Freq: " << freq << '\n';
267
+ file_o.close();
268
+ }
269
+ }
270
+ collected.push_back(seq);
271
+ }
272
+
273
+ void Freq_miner_list(const std::vector<std::vector<int>>& db,
274
+ std::vector<int>& prefix,
275
+ unsigned long long minsup,
276
+ std::vector<std::vector<int>>& out) {
277
+ // 1) count single‐item support (one count per sequence)
278
+ std::unordered_map<int, unsigned long long> freq;
279
+ for (auto const& seq : db) {
280
+ std::unordered_set<int> seen;
281
+ for (int x : seq) {
282
+ if (seen.insert(x).second) {
283
+ ++freq[x];
284
+ }
285
+ }
286
+ }
287
+
288
+ // 2) collect the frequent candidates
289
+ std::vector<std::pair<int, unsigned long long>> cand;
290
+ cand.reserve(freq.size());
291
+ for (auto& p : freq) {
292
+ if (p.second >= minsup) {
293
+ cand.emplace_back(p.first, p.second);
294
+ }
295
+ }
296
+
297
+ // 3) sort by absolute item ID
298
+ std::sort(cand.begin(), cand.end(),
299
+ [](const std::pair<int, unsigned long long>& a,
300
+ const std::pair<int, unsigned long long>& b) {
301
+ return std::abs(a.first) < std::abs(b.first);
302
+ });
303
+
304
+ // 4) depth-first enumerate them
305
+ for (auto const& pr : cand) {
306
+ int item = pr.first;
307
+ prefix.push_back(item);
308
+
309
+ if (use_dic) {
310
+ // “un-compress” each pattern back to original IDs
311
+ std::vector<int> unmapped;
312
+ unmapped.reserve(prefix.size());
313
+ for (int cid : prefix) {
314
+ int abs_id = std::abs(cid);
315
+ int o = inv_item_dic[abs_id];
316
+ unmapped.push_back(cid < 0 ? -o : o);
317
+ }
318
+ out.push_back(std::move(unmapped));
319
+ } else {
320
+ // just store the raw prefix
321
+ out.push_back(prefix);
322
+ }
323
+
324
+ // 5) project on the *first* occurrence of `item`
325
+ std::vector<std::vector<int>> proj;
326
+ proj.reserve(db.size());
327
+ for (auto const& seq : db) {
328
+ auto it = std::find(seq.begin(), seq.end(), item);
329
+ if (it != seq.end() && ++it != seq.end()) {
330
+ proj.emplace_back(it, seq.end());
331
+ }
332
+ }
333
+
334
+ if (!proj.empty()) {
335
+ Freq_miner_list(proj, prefix, minsup, out);
336
+ }
337
+
338
+ prefix.pop_back();
339
+ }
340
+ }
341
+
342
+ } // namespace largebm
@@ -0,0 +1,48 @@
1
+ #pragma once
2
+
3
+ #include "load_inst.hpp"
4
+ #include "build_mdd.hpp"
5
+
6
+ namespace largebm {
7
+
8
+ void Freq_miner();
9
+ // recursive helper for the list‐based mode
10
+ void Freq_miner_list(const std::vector<std::vector<int>>& db,
11
+ std::vector<int>& prefix,
12
+ unsigned long long theta,
13
+ std::vector<std::vector<int>>& out);
14
+ class Pattern {
15
+ public:
16
+
17
+ vector<int> seq;
18
+ vector<unsigned long long int> str_pnt;
19
+ vector<int> list;
20
+
21
+ unsigned long long int freq;
22
+
23
+ Pattern(vector<int>& _seq, int item) {
24
+ seq.swap(_seq);
25
+ seq.push_back(item);
26
+ freq = 0;
27
+ }
28
+
29
+ Pattern(int item) {
30
+ seq.push_back(item);
31
+ freq = 0;
32
+ }
33
+
34
+ Pattern() {
35
+ freq = 0;
36
+ }
37
+
38
+
39
+ };
40
+
41
+ extern unsigned long long int num_patt;
42
+ extern std::vector<bool> ilist;
43
+ extern std::vector<bool> slist;
44
+ extern std::vector<int> DFS_numfound;
45
+ extern Pattern _patt;
46
+
47
+
48
+ }