effspm 0.1.5__cp310-cp310-win_amd64.whl → 0.3.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. effspm/__init__.py +9 -2
  2. effspm/_core.cpp +91 -13
  3. effspm/_effspm.cp310-win_amd64.pyd +0 -0
  4. effspm/_effspm.cpp +679 -0
  5. effspm/btminer/src/build_mdd.cpp +88 -0
  6. effspm/btminer/src/build_mdd.hpp +34 -0
  7. effspm/btminer/src/freq_miner.cpp +264 -0
  8. effspm/btminer/src/freq_miner.hpp +55 -0
  9. effspm/btminer/src/load_inst.cpp +275 -0
  10. effspm/btminer/src/load_inst.hpp +43 -0
  11. effspm/btminer/src/utility.cpp +50 -0
  12. effspm/btminer/src/utility.hpp +16 -0
  13. effspm/freq_miner.hpp +7 -1
  14. effspm/htminer/src/build_mdd.cpp +139 -0
  15. effspm/htminer/src/build_mdd.hpp +64 -0
  16. effspm/htminer/src/freq_miner.cpp +350 -0
  17. effspm/htminer/src/freq_miner.hpp +60 -0
  18. effspm/htminer/src/load_inst.cpp +394 -0
  19. effspm/htminer/src/load_inst.hpp +23 -0
  20. effspm/htminer/src/utility.cpp +72 -0
  21. effspm/htminer/src/utility.hpp +77 -0
  22. effspm/largebm/src/build_mdd.cpp +96 -0
  23. effspm/largebm/src/build_mdd.hpp +32 -0
  24. effspm/largebm/src/freq_miner.cpp +299 -0
  25. effspm/largebm/src/freq_miner.hpp +37 -0
  26. effspm/largebm/src/load_inst.cpp +224 -0
  27. effspm/largebm/src/load_inst.hpp +35 -0
  28. effspm/largebm/src/utility.cpp +35 -0
  29. effspm/largebm/src/utility.hpp +15 -0
  30. effspm/largehm/src/build_mdd.cpp +174 -0
  31. effspm/largehm/src/build_mdd.hpp +93 -0
  32. effspm/largehm/src/freq_miner.cpp +429 -0
  33. effspm/largehm/src/freq_miner.hpp +77 -0
  34. effspm/largehm/src/load_inst.cpp +375 -0
  35. effspm/largehm/src/load_inst.hpp +64 -0
  36. effspm/largehm/src/utility.cpp +38 -0
  37. effspm/largehm/src/utility.hpp +29 -0
  38. effspm/largepp/src/freq_miner.cpp +198 -0
  39. effspm/largepp/src/freq_miner.hpp +18 -0
  40. effspm/largepp/src/load_inst.cpp +238 -0
  41. effspm/largepp/src/load_inst.hpp +34 -0
  42. effspm/largepp/src/pattern.hpp +31 -0
  43. effspm/largepp/src/utility.cpp +34 -0
  44. effspm/largepp/src/utility.hpp +21 -0
  45. effspm/load_inst.hpp +18 -12
  46. effspm-0.3.0.dist-info/METADATA +237 -0
  47. effspm-0.3.0.dist-info/RECORD +54 -0
  48. {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/WHEEL +1 -1
  49. effspm/_core.cp310-win_amd64.pyd +0 -0
  50. effspm-0.1.5.dist-info/METADATA +0 -38
  51. effspm-0.1.5.dist-info/RECORD +0 -14
  52. {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/licenses/LICENSE +0 -0
  53. {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,88 @@
1
+ #include <vector>
2
+ #include <iostream>
3
+ #include <unordered_map>
4
+ #include <cstdlib>
5
+ #include <cmath>
6
+
7
+ #include "load_inst.hpp" // ← has: extern unsigned long long E;
8
+ #include "build_mdd.hpp"
9
+ #include "freq_miner.hpp"
10
+ #include "utility.hpp"
11
+
12
+ namespace btminer {
13
+
14
+ using std::vector;
15
+ using std::unordered_map;
16
+ using std::abs;
17
+
18
+ static int Add_arc(int item,
19
+ int last_arc,
20
+ int& itmset,
21
+ unordered_map<int, int>& ancest_map);
22
+
23
+ vector<Arc> Tree; // professor had this global
24
+
25
+ void Build_MDD(vector<int>& items) {
26
+ unordered_map<int, int> ancest_map;
27
+
28
+ int last_arc = 0;
29
+ int itmset = 0;
30
+
31
+ for (int item : items) {
32
+ ++E; // ✅ count this entry, just like prefix-projection does
33
+ last_arc = Add_arc(item, last_arc, itmset, ancest_map);
34
+ }
35
+ }
36
+
37
+ static int Add_arc(int item,
38
+ int last_arc,
39
+ int& itmset,
40
+ unordered_map<int, int>& ancest_map) {
41
+
42
+ int anct;
43
+ auto p = ancest_map.find(std::abs(item));
44
+ if (p == ancest_map.end())
45
+ anct = 0;
46
+ else
47
+ anct = p->second;
48
+
49
+ if (item < 0)
50
+ ++itmset;
51
+
52
+ int last_sibl = Tree[last_arc].chld;
53
+
54
+ if (last_sibl == -1) {
55
+ // create child
56
+ Tree.emplace_back(item, itmset, anct);
57
+ last_sibl = static_cast<int>(Tree.size()) - 1;
58
+ Tree[last_arc].chld = last_sibl;
59
+
60
+ if (anct == 0)
61
+ DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
62
+
63
+ } else {
64
+ // walk siblings
65
+ while (Tree[last_sibl].item != item) {
66
+ if (Tree[last_sibl].sibl == -1) {
67
+ Tree.emplace_back(item, itmset, anct);
68
+ Tree[last_sibl].sibl = static_cast<int>(Tree.size()) - 1;
69
+ last_sibl = static_cast<int>(Tree.size()) - 1;
70
+ if (anct == 0)
71
+ DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
72
+ break;
73
+ }
74
+ last_sibl = Tree[last_sibl].sibl;
75
+ }
76
+ }
77
+
78
+ if (anct == 0)
79
+ ++DFS[std::abs(item) - 1].freq;
80
+
81
+ ++Tree[last_sibl].freq;
82
+
83
+ ancest_map[std::abs(item)] = last_sibl;
84
+
85
+ return last_sibl;
86
+ }
87
+
88
+ } // namespace btminer
@@ -0,0 +1,34 @@
1
+ #pragma once
2
+
3
+ #include <vector>
4
+ #include <cmath>
5
+ #include "load_inst.hpp"
6
+
7
+ namespace btminer {
8
+
9
+ using std::vector;
10
+
11
+ void Build_MDD(vector<int>& items);
12
+
13
+ class Arc {
14
+ public:
15
+ int chld = -1;
16
+ int sibl = -1;
17
+ int freq = 0;
18
+ int anct;
19
+ int itmset;
20
+ int item;
21
+
22
+ Arc(int _itm, int _itmset, int _anc)
23
+ : chld(-1), sibl(-1), freq(0), anct(_anc), itmset(_itmset), item(_itm) {}
24
+
25
+ // sometimes professor used this shorter ctor
26
+ Arc(int _itm, int _anc)
27
+ : chld(-1), sibl(-1), freq(0), anct(_anc), itmset(0), item(_itm) {}
28
+
29
+ Arc() = default;
30
+ };
31
+
32
+ extern vector<Arc> Tree;
33
+
34
+ } // namespace btminer
@@ -0,0 +1,264 @@
1
+ // effspm/btminer/src/freq_miner.cpp
2
+ #include <iostream>
3
+ #include <fstream> // for ofstream
4
+ #include <time.h>
5
+ #include "freq_miner.hpp"
6
+ #include "build_mdd.hpp"
7
+ #include "utility.hpp"
8
+ #include "load_inst.hpp"
9
+
10
+ namespace btminer {
11
+
12
+ using namespace std;
13
+
14
+ // professor logic needs these forward decls
15
+ static void Out_patt(vector<int>& seq, int freq);
16
+ static void Extend_patt(Pattern _patt);
17
+
18
+ // ✅ define ONLY num_patt here
19
+ int num_patt = 0;
20
+
21
+ // ✅ here we ONLY REFER to DFS (real one lives in load_inst.cpp)
22
+ extern std::vector<Pattern> DFS;
23
+
24
+ // ✅ NEW: actual storage for collected BT patterns
25
+ std::vector<std::vector<int>> collectedPatterns;
26
+
27
+ // ------------------------------------------------------------
28
+ // helper API
29
+ // ------------------------------------------------------------
30
+ void ClearCollected() {
31
+ collectedPatterns.clear();
32
+ // optional: keep some capacity
33
+ collectedPatterns.reserve(512);
34
+ }
35
+
36
+ const std::vector<std::vector<int>>& GetCollected() {
37
+ return collectedPatterns;
38
+ }
39
+
40
+ // ------------------------------------------------------------
41
+ // main entry
42
+ // ------------------------------------------------------------
43
+ void Freq_miner() {
44
+
45
+ // every run should start clean
46
+ ClearCollected();
47
+ num_patt = 0;
48
+
49
+ vector<int> islist;
50
+
51
+ for (int i = 0; i < L; ++i) {
52
+ if (DFS[i].freq >= theta)
53
+ islist.push_back(i);
54
+ }
55
+
56
+ for (int i = 0; i < static_cast<int>(DFS.size()); ++i) {
57
+ DFS[i].ilist = islist;
58
+ DFS[i].slist = islist;
59
+ }
60
+
61
+ while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
62
+ if (DFS.back().freq >= theta)
63
+ Extend_patt(DFS.back());
64
+ else
65
+ DFS.pop_back();
66
+ }
67
+ }
68
+
69
+ // ------------------------------------------------------------
70
+ // recursive extension
71
+ // ------------------------------------------------------------
72
+ static void Extend_patt(Pattern _patt) {
73
+
74
+ DFS.pop_back();
75
+
76
+ vector<bool> slist(L, 0);
77
+ vector<bool> ilist(L, 0);
78
+
79
+ for (vector<int>::iterator it = _patt.slist.begin(); it != _patt.slist.end(); ++it)
80
+ slist[*it] = 1;
81
+ for (vector<int>::iterator it = _patt.ilist.begin(); it != _patt.ilist.end(); ++it)
82
+ ilist[*it] = 1;
83
+
84
+ int itmset_size = 1;
85
+ int last_neg = _patt.seq.size() - 1;
86
+ while (_patt.seq[last_neg] > 0) {
87
+ --last_neg;
88
+ ++itmset_size;
89
+ }
90
+
91
+ vector<Pattern> pot_patt(2 * L);
92
+ vector<int> DFS_patt_init;
93
+ vector<int> DFS_patt;
94
+ vector<int> DFS_numfound;
95
+ vector<int> last_strpnt(L, 0);
96
+
97
+ for (int pnt = 0; pnt < static_cast<int>(_patt.str_pnt.size()); ++pnt) {
98
+ DFS_patt_init.push_back(_patt.str_pnt[pnt]);
99
+ while (!DFS_patt_init.empty()) {
100
+ int cur_sibl = Tree[DFS_patt_init.back()].chld;
101
+ DFS_patt_init.pop_back();
102
+ while (cur_sibl != -1) {
103
+ int cur_itm = Tree[cur_sibl].item;
104
+ if (cur_itm < 0) {
105
+ cur_itm = -cur_itm;
106
+ if (slist[cur_itm - 1]) {
107
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
108
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
109
+ }
110
+ if (Tree[cur_sibl].chld != -1) {
111
+ DFS_patt.push_back(cur_sibl);
112
+ if (!_patt.ilist.empty()) {
113
+ if (cur_itm == -_patt.seq[last_neg])
114
+ DFS_numfound.push_back(1);
115
+ else
116
+ DFS_numfound.push_back(0);
117
+ }
118
+ }
119
+ }
120
+ else {
121
+ if (ilist[cur_itm - 1]) {
122
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
123
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
124
+ }
125
+ if (Tree[cur_sibl].chld != -1)
126
+ DFS_patt_init.push_back(cur_sibl);
127
+ }
128
+ cur_sibl = Tree[cur_sibl].sibl;
129
+ }
130
+ }
131
+
132
+ for (vector<int>::iterator it = _patt.ilist.begin(); it != _patt.ilist.end(); ++it)
133
+ last_strpnt[*it] = pot_patt[*it].str_pnt.size();
134
+
135
+ while (!DFS_patt.empty()) {
136
+ int cur_sibl = Tree[DFS_patt.back()].chld;
137
+ int num_found = DFS_numfound.back();
138
+ DFS_patt.pop_back();
139
+ DFS_numfound.pop_back();
140
+ while (cur_sibl != -1) {
141
+ int cur_itm = Tree[cur_sibl].item;
142
+ if (cur_itm > 0) {
143
+ if (num_found == itmset_size &&
144
+ ilist[cur_itm - 1] &&
145
+ (Tree[Tree[cur_sibl].anct].itmset < Tree[_patt.str_pnt[pnt]].itmset ||
146
+ !check_parent(cur_sibl,
147
+ _patt.str_pnt[pnt],
148
+ last_strpnt[cur_itm - 1],
149
+ pot_patt[cur_itm - 1].str_pnt))) {
150
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
151
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
152
+ }
153
+ if (slist[cur_itm - 1] && Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
154
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
155
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
156
+ }
157
+ if (Tree[cur_sibl].chld != -1) {
158
+ DFS_patt.push_back(cur_sibl);
159
+ if (!_patt.ilist.empty()) {
160
+ if (num_found < itmset_size && cur_itm == abs(_patt.seq[last_neg + num_found]))
161
+ DFS_numfound.push_back(num_found + 1);
162
+ else
163
+ DFS_numfound.push_back(num_found);
164
+ }
165
+ }
166
+ }
167
+ else {
168
+ cur_itm = -cur_itm;
169
+ if (slist[cur_itm - 1] && Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
170
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
171
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
172
+ }
173
+ if (Tree[cur_sibl].chld != -1) {
174
+ DFS_patt.push_back(cur_sibl);
175
+ if (!_patt.ilist.empty()) {
176
+ if (cur_itm == -_patt.seq[last_neg])
177
+ DFS_numfound.push_back(1);
178
+ else
179
+ DFS_numfound.push_back(0);
180
+ }
181
+ }
182
+ }
183
+ cur_sibl = Tree[cur_sibl].sibl;
184
+ }
185
+ }
186
+ }
187
+
188
+ vector<int> slistp;
189
+ vector<int> ilistp;
190
+
191
+ for (vector<int>::iterator it = _patt.ilist.begin(); it != _patt.ilist.end(); ++it) {
192
+ if (pot_patt[*it].freq >= theta)
193
+ ilistp.push_back(*it);
194
+ }
195
+
196
+ for (vector<int>::iterator it = _patt.slist.begin(); it != _patt.slist.end(); ++it) {
197
+ if (pot_patt[(*it) + L].freq >= theta)
198
+ slistp.push_back(*it);
199
+ }
200
+
201
+ // ----- positive extensions -----
202
+ for (vector<int>::iterator it = ilistp.begin(); it != ilistp.end(); ++it) {
203
+ pot_patt[*it].str_pnt.shrink_to_fit();
204
+ DFS.push_back(pot_patt[*it]);
205
+ DFS.back().seq = _patt.seq;
206
+ DFS.back().seq.push_back((*it) + 1);
207
+ DFS.back().seq.shrink_to_fit();
208
+ DFS.back().slist = slistp;
209
+ DFS.back().ilist = ilistp;
210
+
211
+ // print/write + collect
212
+ Out_patt(DFS.back().seq, DFS.back().freq);
213
+ ++num_patt;
214
+ }
215
+
216
+ // ----- itemset (negative) extensions -----
217
+ for (vector<int>::iterator it = slistp.begin(); it != slistp.end(); ++it) {
218
+ pot_patt[(*it) + L].str_pnt.shrink_to_fit();
219
+ DFS.push_back(pot_patt[(*it) + L]);
220
+ DFS.back().seq = _patt.seq;
221
+ DFS.back().seq.push_back(-(*it) - 1);
222
+ DFS.back().seq.shrink_to_fit();
223
+ DFS.back().slist = slistp;
224
+ DFS.back().ilist = slistp;
225
+
226
+ // print/write + collect
227
+ Out_patt(DFS.back().seq, DFS.back().freq);
228
+ ++num_patt;
229
+ }
230
+ }
231
+
232
+ // ------------------------------------------------------------
233
+ // final pattern printer + collector
234
+ // ------------------------------------------------------------
235
+ static void Out_patt(vector<int>& seq, int freq) {
236
+
237
+ // 1) ALWAYS collect (so Python can read it)
238
+ collectedPatterns.push_back(seq);
239
+
240
+ // 2) existing behavior: print / write
241
+ ofstream file_o;
242
+ if (b_write)
243
+ file_o.open(out_file, std::ios::app);
244
+
245
+ for (int ii = 0; ii < static_cast<int>(seq.size()); ii++) {
246
+ if (b_disp)
247
+ cout << seq[ii] << " ";
248
+ if (b_write)
249
+ file_o << seq[ii] << " ";
250
+ }
251
+ if (b_disp)
252
+ cout << endl;
253
+ if (b_write)
254
+ file_o << endl;
255
+
256
+ if (b_disp)
257
+ cout << "************** Freq: " << freq << endl;
258
+ if (b_write) {
259
+ file_o << "************** Freq: " << freq << endl;
260
+ file_o.close();
261
+ }
262
+ }
263
+
264
+ } // namespace btminer
@@ -0,0 +1,55 @@
1
+ #pragma once
2
+
3
+ #include <vector>
4
+ #include "load_inst.hpp"
5
+ #include "build_mdd.hpp"
6
+
7
+ namespace btminer {
8
+
9
+ void Freq_miner();
10
+
11
+ /**
12
+ * One pattern in the DFS stack.
13
+ * (same as professor)
14
+ */
15
+ class Pattern {
16
+ public:
17
+ std::vector<int> seq;
18
+ std::vector<int> str_pnt;
19
+ std::vector<int> slist;
20
+ std::vector<int> ilist;
21
+ int freq;
22
+
23
+ Pattern(std::vector<int>& _seq, int item) {
24
+ seq.swap(_seq);
25
+ seq.push_back(item);
26
+ freq = 0;
27
+ }
28
+
29
+ Pattern(int item) {
30
+ seq.push_back(item);
31
+ freq = 0;
32
+ }
33
+
34
+ Pattern() {
35
+ freq = 0;
36
+ }
37
+ };
38
+
39
+ // ----- existing globals -----
40
+ extern int num_patt;
41
+ extern int num_max_patt;
42
+ extern std::vector<Pattern> DFS;
43
+
44
+ // ----- NEW: collected patterns for Python binding -----
45
+
46
+ // stores every pattern exactly as mined: [-68, -36, -5, ...]
47
+ extern std::vector<std::vector<int>> collectedPatterns;
48
+
49
+ // clear before every run
50
+ void ClearCollected();
51
+
52
+ // read-only access (Python binding will use this)
53
+ const std::vector<std::vector<int>>& GetCollected();
54
+
55
+ } // namespace btminer