effspm 0.2.7__cp39-cp39-win_amd64.whl → 0.3.3__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. effspm/_effspm.cp39-win_amd64.pyd +0 -0
  2. effspm/_effspm.cpp +961 -210
  3. effspm/btminer/src/build_mdd.cpp +42 -17
  4. effspm/btminer/src/build_mdd.hpp +13 -19
  5. effspm/btminer/src/freq_miner.cpp +134 -49
  6. effspm/btminer/src/freq_miner.hpp +16 -0
  7. effspm/btminer/src/load_inst.cpp +211 -126
  8. effspm/btminer/src/load_inst.hpp +22 -4
  9. effspm/btminer/src/main.cpp +83 -0
  10. effspm/btminer/src/utility.cpp +26 -41
  11. effspm/btminer/src/utility.hpp +6 -30
  12. effspm/freq_miner.hpp +2 -1
  13. effspm/htminer/src/build_mdd.cpp +46 -124
  14. effspm/htminer/src/build_mdd.hpp +56 -49
  15. effspm/htminer/src/freq_miner.cpp +341 -307
  16. effspm/htminer/src/freq_miner.hpp +39 -40
  17. effspm/htminer/src/load_inst.cpp +287 -336
  18. effspm/htminer/src/load_inst.hpp +23 -6
  19. effspm/htminer/src/main.cpp +97 -0
  20. effspm/htminer/src/utility.cpp +38 -57
  21. effspm/htminer/src/utility.hpp +9 -64
  22. effspm/largebm/src/build_mdd.cpp +69 -110
  23. effspm/largebm/src/build_mdd.hpp +22 -37
  24. effspm/largebm/src/freq_miner.cpp +241 -291
  25. effspm/largebm/src/freq_miner.hpp +25 -36
  26. effspm/largebm/src/load_inst.cpp +20 -26
  27. effspm/largebm/src/load_inst.hpp +24 -34
  28. effspm/largebm/src/main.cpp +95 -0
  29. effspm/largebm/src/utility.cpp +11 -21
  30. effspm/largebm/src/utility.hpp +7 -10
  31. effspm/largehm/src/build_mdd.cpp +75 -110
  32. effspm/largehm/src/build_mdd.hpp +53 -73
  33. effspm/largehm/src/freq_miner.cpp +134 -191
  34. effspm/largehm/src/freq_miner.hpp +37 -60
  35. effspm/largehm/src/load_inst.cpp +137 -174
  36. effspm/largehm/src/load_inst.hpp +13 -50
  37. effspm/largehm/src/main.cpp +95 -0
  38. effspm/largehm/src/utility.cpp +46 -28
  39. effspm/largehm/src/utility.hpp +18 -16
  40. effspm/largepp/src/freq_miner.cpp +184 -156
  41. effspm/largepp/src/freq_miner.hpp +11 -36
  42. effspm/largepp/src/load_inst.cpp +32 -12
  43. effspm/largepp/src/load_inst.hpp +15 -9
  44. effspm/largepp/src/main.cpp +108 -0
  45. effspm/largepp/src/pattern.hpp +31 -0
  46. effspm/load_inst.cpp +8 -8
  47. effspm/load_inst.hpp +1 -1
  48. effspm/main.cpp +103 -0
  49. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
  50. effspm-0.3.3.dist-info/RECORD +60 -0
  51. effspm-0.2.7.dist-info/RECORD +0 -53
  52. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
  53. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
  54. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
@@ -1,26 +1,46 @@
1
1
  #include <vector>
2
2
  #include <iostream>
3
3
  #include <unordered_map>
4
- #include "load_inst.hpp"
4
+ #include <cstdlib>
5
+ #include <cmath>
6
+
7
+ #include "load_inst.hpp" // ← has: extern unsigned long long E;
5
8
  #include "build_mdd.hpp"
6
9
  #include "freq_miner.hpp"
7
10
  #include "utility.hpp"
8
11
 
9
12
  namespace btminer {
10
13
 
11
- int Add_arc(int item, int last_arc, int& itmset, std::unordered_map<int, int>& ancest_map);
12
- std::vector<Arc> Tree;
14
+ using std::vector;
15
+ using std::unordered_map;
16
+ using std::abs;
17
+
18
+ static int Add_arc(int item,
19
+ int last_arc,
20
+ int& itmset,
21
+ unordered_map<int, int>& ancest_map);
22
+
23
+ vector<Arc> Tree; // professor had this global
24
+
25
+ void Build_MDD(vector<int>& items) {
26
+ unordered_map<int, int> ancest_map;
27
+
28
+ int last_arc = 0;
29
+ int itmset = 0;
13
30
 
14
- void Build_MDD(std::vector<int>& items) {
15
- std::unordered_map<int, int> ancest_map;
16
- int last_arc = 0, itmset = 0;
17
- for (auto it = items.begin(); it != items.end(); ++it)
18
- last_arc = Add_arc(*it, last_arc, itmset, ancest_map);
31
+ for (int item : items) {
32
+ ++E; // ✅ count this entry, just like prefix-projection does
33
+ last_arc = Add_arc(item, last_arc, itmset, ancest_map);
34
+ }
19
35
  }
20
36
 
21
- int Add_arc(int item, int last_arc, int& itmset, std::unordered_map<int, int>& ancest_map) {
37
+ static int Add_arc(int item,
38
+ int last_arc,
39
+ int& itmset,
40
+ unordered_map<int, int>& ancest_map) {
41
+
22
42
  int anct;
23
- auto p = ancest_map.find(abs(item));
43
+ auto p = ancest_map.find(std::abs(item));
24
44
  if (p == ancest_map.end())
25
45
  anct = 0;
26
46
  else
@@ -32,19 +52,23 @@ int Add_arc(int item, int last_arc, int& itmset, std::unordered_map<int, int>& a
32
52
  int last_sibl = Tree[last_arc].chld;
33
53
 
34
54
  if (last_sibl == -1) {
55
+ // create child
35
56
  Tree.emplace_back(item, itmset, anct);
36
- last_sibl = Tree.size() - 1;
57
+ last_sibl = static_cast<int>(Tree.size()) - 1;
37
58
  Tree[last_arc].chld = last_sibl;
59
+
38
60
  if (anct == 0)
39
- DFS[abs(item) - 1].str_pnt.push_back(last_sibl);
61
+ DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
62
+
40
63
  } else {
64
+ // walk siblings
41
65
  while (Tree[last_sibl].item != item) {
42
66
  if (Tree[last_sibl].sibl == -1) {
43
67
  Tree.emplace_back(item, itmset, anct);
44
- Tree[last_sibl].sibl = Tree.size() - 1;
45
- last_sibl = Tree.size() - 1;
68
+ Tree[last_sibl].sibl = static_cast<int>(Tree.size()) - 1;
69
+ last_sibl = static_cast<int>(Tree.size()) - 1;
46
70
  if (anct == 0)
47
- DFS[abs(item) - 1].str_pnt.push_back(last_sibl);
71
+ DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
48
72
  break;
49
73
  }
50
74
  last_sibl = Tree[last_sibl].sibl;
@@ -52,10 +76,11 @@ int Add_arc(int item, int last_arc, int& itmset, std::unordered_map<int, int>& a
52
76
  }
53
77
 
54
78
  if (anct == 0)
55
- ++DFS[abs(item) - 1].freq;
79
+ ++DFS[std::abs(item) - 1].freq;
56
80
 
57
81
  ++Tree[last_sibl].freq;
58
- ancest_map[abs(item)] = last_sibl;
82
+
83
+ ancest_map[std::abs(item)] = last_sibl;
59
84
 
60
85
  return last_sibl;
61
86
  }
@@ -6,7 +6,9 @@
6
6
 
7
7
  namespace btminer {
8
8
 
9
- void Build_MDD(std::vector<int>& items);
9
+ using std::vector;
10
+
11
+ void Build_MDD(vector<int>& items);
10
12
 
11
13
  class Arc {
12
14
  public:
@@ -17,24 +19,16 @@ public:
17
19
  int itmset;
18
20
  int item;
19
21
 
20
- Arc(int _itm, int _itmset, int _anc) {
21
- itmset = _itmset;
22
- anct = _anc;
23
- item = _itm;
24
- }
25
-
26
- Arc(int _itm, int _anc) {
27
- item = _itm;
28
- anct = _anc;
29
- }
30
-
31
- Arc() {
32
- chld = -1;
33
- sibl = -1;
34
- freq = 0;
35
- }
22
+ Arc(int _itm, int _itmset, int _anc)
23
+ : chld(-1), sibl(-1), freq(0), anct(_anc), itmset(_itmset), item(_itm) {}
24
+
25
+ // sometimes professor used this shorter ctor
26
+ Arc(int _itm, int _anc)
27
+ : chld(-1), sibl(-1), freq(0), anct(_anc), itmset(0), item(_itm) {}
28
+
29
+ Arc() = default;
36
30
  };
37
31
 
38
- extern std::vector<Arc> Tree;
32
+ extern vector<Arc> Tree;
39
33
 
40
- }
34
+ } // namespace btminer
@@ -1,27 +1,59 @@
1
+ // effspm/btminer/src/freq_miner.cpp
1
2
  #include <iostream>
3
+ #include <fstream> // for ofstream
2
4
  #include <time.h>
3
- #include <vector>
4
- #include <fstream>
5
- #include <cmath>
6
5
  #include "freq_miner.hpp"
7
6
  #include "build_mdd.hpp"
8
7
  #include "utility.hpp"
8
+ #include "load_inst.hpp"
9
9
 
10
10
  namespace btminer {
11
11
 
12
- void Out_patt(std::vector<int>& seq, int freq);
13
- void Extend_patt(Pattern _patt);
12
+ using namespace std;
14
13
 
14
+ // professor logic needs these forward decls
15
+ static void Out_patt(vector<int>& seq, int freq);
16
+ static void Extend_patt(Pattern _patt);
17
+
18
+ // ✅ define ONLY num_patt here
15
19
  int num_patt = 0;
16
20
 
21
+ // ✅ here we ONLY REFER to DFS (real one lives in load_inst.cpp)
22
+ extern std::vector<Pattern> DFS;
23
+
24
+ // ✅ NEW: actual storage for collected BT patterns
25
+ std::vector<std::vector<int>> collectedPatterns;
26
+
27
+ // ------------------------------------------------------------
28
+ // helper API
29
+ // ------------------------------------------------------------
30
+ void ClearCollected() {
31
+ collectedPatterns.clear();
32
+ // optional: keep some capacity
33
+ collectedPatterns.reserve(512);
34
+ }
35
+
36
+ const std::vector<std::vector<int>>& GetCollected() {
37
+ return collectedPatterns;
38
+ }
39
+
40
+ // ------------------------------------------------------------
41
+ // main entry
42
+ // ------------------------------------------------------------
17
43
  void Freq_miner() {
18
- std::vector<int> islist;
44
+
45
+ // every run should start clean
46
+ ClearCollected();
47
+ num_patt = 0;
48
+
49
+ vector<int> islist;
50
+
19
51
  for (int i = 0; i < L; ++i) {
20
52
  if (DFS[i].freq >= theta)
21
53
  islist.push_back(i);
22
54
  }
23
55
 
24
- for (int i = 0; i < DFS.size(); ++i) {
56
+ for (int i = 0; i < static_cast<int>(DFS.size()); ++i) {
25
57
  DFS[i].ilist = islist;
26
58
  DFS[i].slist = islist;
27
59
  }
@@ -34,13 +66,20 @@ void Freq_miner() {
34
66
  }
35
67
  }
36
68
 
37
- void Extend_patt(Pattern _patt) {
69
+ // ------------------------------------------------------------
70
+ // recursive extension
71
+ // ------------------------------------------------------------
72
+ static void Extend_patt(Pattern _patt) {
73
+
38
74
  DFS.pop_back();
39
- std::vector<bool> slist(L, 0);
40
- std::vector<bool> ilist(L, 0);
41
75
 
42
- for (auto it : _patt.slist) slist[it] = 1;
43
- for (auto it : _patt.ilist) ilist[it] = 1;
76
+ vector<bool> slist(L, 0);
77
+ vector<bool> ilist(L, 0);
78
+
79
+ for (vector<int>::iterator it = _patt.slist.begin(); it != _patt.slist.end(); ++it)
80
+ slist[*it] = 1;
81
+ for (vector<int>::iterator it = _patt.ilist.begin(); it != _patt.ilist.end(); ++it)
82
+ ilist[*it] = 1;
44
83
 
45
84
  int itmset_size = 1;
46
85
  int last_neg = _patt.seq.size() - 1;
@@ -49,10 +88,13 @@ void Extend_patt(Pattern _patt) {
49
88
  ++itmset_size;
50
89
  }
51
90
 
52
- std::vector<Pattern> pot_patt(2 * L);
53
- std::vector<int> DFS_patt_init, DFS_patt, DFS_numfound, last_strpnt(L, 0);
91
+ vector<Pattern> pot_patt(2 * L);
92
+ vector<int> DFS_patt_init;
93
+ vector<int> DFS_patt;
94
+ vector<int> DFS_numfound;
95
+ vector<int> last_strpnt(L, 0);
54
96
 
55
- for (int pnt = 0; pnt < _patt.str_pnt.size(); ++pnt) {
97
+ for (int pnt = 0; pnt < static_cast<int>(_patt.str_pnt.size()); ++pnt) {
56
98
  DFS_patt_init.push_back(_patt.str_pnt[pnt]);
57
99
  while (!DFS_patt_init.empty()) {
58
100
  int cur_sibl = Tree[DFS_patt_init.back()].chld;
@@ -67,9 +109,15 @@ void Extend_patt(Pattern _patt) {
67
109
  }
68
110
  if (Tree[cur_sibl].chld != -1) {
69
111
  DFS_patt.push_back(cur_sibl);
70
- DFS_numfound.push_back(cur_itm == -_patt.seq[last_neg] ? 1 : 0);
112
+ if (!_patt.ilist.empty()) {
113
+ if (cur_itm == -_patt.seq[last_neg])
114
+ DFS_numfound.push_back(1);
115
+ else
116
+ DFS_numfound.push_back(0);
117
+ }
71
118
  }
72
- } else {
119
+ }
120
+ else {
73
121
  if (ilist[cur_itm - 1]) {
74
122
  pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
75
123
  pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
@@ -81,8 +129,8 @@ void Extend_patt(Pattern _patt) {
81
129
  }
82
130
  }
83
131
 
84
- for (auto it : _patt.ilist)
85
- last_strpnt[it] = pot_patt[it].str_pnt.size();
132
+ for (vector<int>::iterator it = _patt.ilist.begin(); it != _patt.ilist.end(); ++it)
133
+ last_strpnt[*it] = pot_patt[*it].str_pnt.size();
86
134
 
87
135
  while (!DFS_patt.empty()) {
88
136
  int cur_sibl = Tree[DFS_patt.back()].chld;
@@ -92,9 +140,13 @@ void Extend_patt(Pattern _patt) {
92
140
  while (cur_sibl != -1) {
93
141
  int cur_itm = Tree[cur_sibl].item;
94
142
  if (cur_itm > 0) {
95
- if (num_found == itmset_size && ilist[cur_itm - 1] &&
143
+ if (num_found == itmset_size &&
144
+ ilist[cur_itm - 1] &&
96
145
  (Tree[Tree[cur_sibl].anct].itmset < Tree[_patt.str_pnt[pnt]].itmset ||
97
- !check_parent(cur_sibl, _patt.str_pnt[pnt], last_strpnt[cur_itm - 1], pot_patt[cur_itm - 1].str_pnt))) {
146
+ !check_parent(cur_sibl,
147
+ _patt.str_pnt[pnt],
148
+ last_strpnt[cur_itm - 1],
149
+ pot_patt[cur_itm - 1].str_pnt))) {
98
150
  pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
99
151
  pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
100
152
  }
@@ -105,10 +157,14 @@ void Extend_patt(Pattern _patt) {
105
157
  if (Tree[cur_sibl].chld != -1) {
106
158
  DFS_patt.push_back(cur_sibl);
107
159
  if (!_patt.ilist.empty()) {
108
- DFS_numfound.push_back((num_found < itmset_size && cur_itm == abs(_patt.seq[last_neg + num_found])) ? num_found + 1 : num_found);
160
+ if (num_found < itmset_size && cur_itm == abs(_patt.seq[last_neg + num_found]))
161
+ DFS_numfound.push_back(num_found + 1);
162
+ else
163
+ DFS_numfound.push_back(num_found);
109
164
  }
110
165
  }
111
- } else {
166
+ }
167
+ else {
112
168
  cur_itm = -cur_itm;
113
169
  if (slist[cur_itm - 1] && Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
114
170
  pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
@@ -117,7 +173,10 @@ void Extend_patt(Pattern _patt) {
117
173
  if (Tree[cur_sibl].chld != -1) {
118
174
  DFS_patt.push_back(cur_sibl);
119
175
  if (!_patt.ilist.empty()) {
120
- DFS_numfound.push_back(cur_itm == -_patt.seq[last_neg] ? 1 : 0);
176
+ if (cur_itm == -_patt.seq[last_neg])
177
+ DFS_numfound.push_back(1);
178
+ else
179
+ DFS_numfound.push_back(0);
121
180
  }
122
181
  }
123
182
  }
@@ -126,52 +185,78 @@ void Extend_patt(Pattern _patt) {
126
185
  }
127
186
  }
128
187
 
129
- std::vector<int> slistp, ilistp;
130
- for (auto it : _patt.ilist) if (pot_patt[it].freq >= theta) ilistp.push_back(it);
131
- for (auto it : _patt.slist) if (pot_patt[it + L].freq >= theta) slistp.push_back(it);
188
+ vector<int> slistp;
189
+ vector<int> ilistp;
190
+
191
+ for (vector<int>::iterator it = _patt.ilist.begin(); it != _patt.ilist.end(); ++it) {
192
+ if (pot_patt[*it].freq >= theta)
193
+ ilistp.push_back(*it);
194
+ }
132
195
 
133
- for (auto it : ilistp) {
134
- pot_patt[it].str_pnt.shrink_to_fit();
135
- DFS.push_back(pot_patt[it]);
196
+ for (vector<int>::iterator it = _patt.slist.begin(); it != _patt.slist.end(); ++it) {
197
+ if (pot_patt[(*it) + L].freq >= theta)
198
+ slistp.push_back(*it);
199
+ }
200
+
201
+ // ----- positive extensions -----
202
+ for (vector<int>::iterator it = ilistp.begin(); it != ilistp.end(); ++it) {
203
+ pot_patt[*it].str_pnt.shrink_to_fit();
204
+ DFS.push_back(pot_patt[*it]);
136
205
  DFS.back().seq = _patt.seq;
137
- DFS.back().seq.push_back(it + 1);
206
+ DFS.back().seq.push_back((*it) + 1);
138
207
  DFS.back().seq.shrink_to_fit();
139
208
  DFS.back().slist = slistp;
140
209
  DFS.back().ilist = ilistp;
141
- if (b_disp || b_write) Out_patt(DFS.back().seq, DFS.back().freq);
210
+
211
+ // print/write + collect
212
+ Out_patt(DFS.back().seq, DFS.back().freq);
142
213
  ++num_patt;
143
214
  }
144
215
 
145
- for (auto it : slistp) {
146
- pot_patt[it + L].str_pnt.shrink_to_fit();
147
- DFS.push_back(pot_patt[it + L]);
216
+ // ----- itemset (negative) extensions -----
217
+ for (vector<int>::iterator it = slistp.begin(); it != slistp.end(); ++it) {
218
+ pot_patt[(*it) + L].str_pnt.shrink_to_fit();
219
+ DFS.push_back(pot_patt[(*it) + L]);
148
220
  DFS.back().seq = _patt.seq;
149
- DFS.back().seq.push_back(-it - 1);
221
+ DFS.back().seq.push_back(-(*it) - 1);
150
222
  DFS.back().seq.shrink_to_fit();
151
223
  DFS.back().slist = slistp;
152
224
  DFS.back().ilist = slistp;
153
- if (b_disp || b_write) Out_patt(DFS.back().seq, DFS.back().freq);
225
+
226
+ // print/write + collect
227
+ Out_patt(DFS.back().seq, DFS.back().freq);
154
228
  ++num_patt;
155
229
  }
156
230
  }
157
231
 
158
- void Out_patt(std::vector<int>& seq, int freq) {
159
-
160
- btminer::collected.push_back(seq); // make pattern visible to Python
232
+ // ------------------------------------------------------------
233
+ // final pattern printer + collector
234
+ // ------------------------------------------------------------
235
+ static void Out_patt(vector<int>& seq, int freq) {
236
+
237
+ // 1) ALWAYS collect (so Python can read it)
238
+ collectedPatterns.push_back(seq);
161
239
 
162
- std::ofstream file_o;
163
- if (b_write) file_o.open(out_file, std::ios::app);
240
+ // 2) existing behavior: print / write
241
+ ofstream file_o;
242
+ if (b_write)
243
+ file_o.open(out_file, std::ios::app);
164
244
 
165
- for (int ii = 0; ii < seq.size(); ii++) {
166
- if (b_disp) std::cout << seq[ii] << " ";
167
- if (b_write) file_o << seq[ii] << " ";
245
+ for (int ii = 0; ii < static_cast<int>(seq.size()); ii++) {
246
+ if (b_disp)
247
+ cout << seq[ii] << " ";
248
+ if (b_write)
249
+ file_o << seq[ii] << " ";
168
250
  }
169
- if (b_disp) std::cout << std::endl;
170
- if (b_write) file_o << std::endl;
251
+ if (b_disp)
252
+ cout << endl;
253
+ if (b_write)
254
+ file_o << endl;
171
255
 
172
- if (b_disp) std::cout << "************** Freq: " << freq << std::endl;
256
+ if (b_disp)
257
+ cout << "************** Freq: " << freq << endl;
173
258
  if (b_write) {
174
- file_o << "************** Freq: " << freq << std::endl;
259
+ file_o << "************** Freq: " << freq << endl;
175
260
  file_o.close();
176
261
  }
177
262
  }
@@ -8,6 +8,10 @@ namespace btminer {
8
8
 
9
9
  void Freq_miner();
10
10
 
11
+ /**
12
+ * One pattern in the DFS stack.
13
+ * (same as professor)
14
+ */
11
15
  class Pattern {
12
16
  public:
13
17
  std::vector<int> seq;
@@ -32,8 +36,20 @@ public:
32
36
  }
33
37
  };
34
38
 
39
+ // ----- existing globals -----
35
40
  extern int num_patt;
36
41
  extern int num_max_patt;
37
42
  extern std::vector<Pattern> DFS;
38
43
 
44
+ // ----- NEW: collected patterns for Python binding -----
45
+
46
+ // stores every pattern exactly as mined: [-68, -36, -5, ...]
47
+ extern std::vector<std::vector<int>> collectedPatterns;
48
+
49
+ // clear before every run
50
+ void ClearCollected();
51
+
52
+ // read-only access (Python binding will use this)
53
+ const std::vector<std::vector<int>>& GetCollected();
54
+
39
55
  } // namespace btminer