effspm 0.2.6__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. effspm/__init__.py +11 -0
  2. effspm/_core.cpp +106 -0
  3. effspm/_effspm.cpp +609 -0
  4. effspm/_effspm.cpython-312-x86_64-linux-gnu.so +0 -0
  5. effspm/btminer/src/build_mdd.cpp +63 -0
  6. effspm/btminer/src/build_mdd.hpp +40 -0
  7. effspm/btminer/src/freq_miner.cpp +179 -0
  8. effspm/btminer/src/freq_miner.hpp +39 -0
  9. effspm/btminer/src/load_inst.cpp +200 -0
  10. effspm/btminer/src/load_inst.hpp +25 -0
  11. effspm/btminer/src/utility.cpp +65 -0
  12. effspm/btminer/src/utility.hpp +40 -0
  13. effspm/freq_miner.cpp +143 -0
  14. effspm/freq_miner.hpp +48 -0
  15. effspm/htminer/src/build_mdd.cpp +192 -0
  16. effspm/htminer/src/build_mdd.hpp +64 -0
  17. effspm/htminer/src/freq_miner.cpp +350 -0
  18. effspm/htminer/src/freq_miner.hpp +60 -0
  19. effspm/htminer/src/load_inst.cpp +394 -0
  20. effspm/htminer/src/load_inst.hpp +23 -0
  21. effspm/htminer/src/utility.cpp +72 -0
  22. effspm/htminer/src/utility.hpp +77 -0
  23. effspm/largebm/src/build_mdd.cpp +137 -0
  24. effspm/largebm/src/build_mdd.hpp +47 -0
  25. effspm/largebm/src/freq_miner.cpp +349 -0
  26. effspm/largebm/src/freq_miner.hpp +48 -0
  27. effspm/largebm/src/load_inst.cpp +230 -0
  28. effspm/largebm/src/load_inst.hpp +45 -0
  29. effspm/largebm/src/utility.cpp +45 -0
  30. effspm/largebm/src/utility.hpp +18 -0
  31. effspm/largehm/src/build_mdd.cpp +174 -0
  32. effspm/largehm/src/build_mdd.hpp +93 -0
  33. effspm/largehm/src/freq_miner.cpp +445 -0
  34. effspm/largehm/src/freq_miner.hpp +77 -0
  35. effspm/largehm/src/load_inst.cpp +357 -0
  36. effspm/largehm/src/load_inst.hpp +64 -0
  37. effspm/largehm/src/utility.cpp +38 -0
  38. effspm/largehm/src/utility.hpp +29 -0
  39. effspm/largepp/src/freq_miner.cpp +170 -0
  40. effspm/largepp/src/freq_miner.hpp +43 -0
  41. effspm/largepp/src/load_inst.cpp +219 -0
  42. effspm/largepp/src/load_inst.hpp +28 -0
  43. effspm/largepp/src/utility.cpp +34 -0
  44. effspm/largepp/src/utility.hpp +21 -0
  45. effspm/load_inst.cpp +252 -0
  46. effspm/load_inst.hpp +31 -0
  47. effspm/utility.cpp +55 -0
  48. effspm/utility.hpp +29 -0
  49. effspm-0.2.6.dist-info/METADATA +237 -0
  50. effspm-0.2.6.dist-info/RECORD +53 -0
  51. effspm-0.2.6.dist-info/WHEEL +6 -0
  52. effspm-0.2.6.dist-info/licenses/LICENSE +201 -0
  53. effspm-0.2.6.dist-info/top_level.txt +1 -0
effspm/freq_miner.cpp ADDED
@@ -0,0 +1,143 @@
1
+ #include <iostream>
2
+ #include <time.h>
3
+ #include <fstream>
4
+ #include <cmath>
5
+ #include "freq_miner.hpp"
6
+ #include "utility.hpp"
7
+
8
+
9
+
10
+ // Forward declarations from the original code:
11
+ void Extend_patt(Pattern& _patt);
12
+
13
+ // Globals from original:
14
+ unsigned long long int num_patt = 0;
15
+ Pattern _patt;
16
+
17
+ // Main miner function from original:
18
+ void Freq_miner() {
19
+ vector<int> islist;
20
+ if (use_list) {
21
+ for (int i = 0; i < L; ++i) {
22
+ if (DFS[i].freq >= theta)
23
+ islist.push_back(i);
24
+ }
25
+ for (int i = 0; i < DFS.size(); ++i) {
26
+ DFS[i].ilist = islist;
27
+ DFS[i].slist = islist;
28
+ }
29
+ }
30
+
31
+ while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
32
+ if (DFS.back().freq >= theta)
33
+ Extend_patt(DFS.back());
34
+ else
35
+ DFS.pop_back();
36
+ }
37
+ }
38
+
39
+ // The recursive extension from original:
40
+ void Extend_patt(Pattern& _pattern) {
41
+ swap(_patt, _pattern);
42
+ DFS.pop_back();
43
+
44
+ vector<bool> slist;
45
+ vector<bool> ilist;
46
+
47
+ if (use_list) {
48
+ slist = vector<bool>(L, 0);
49
+ ilist = vector<bool>(L, 0);
50
+ for (int idx : _patt.slist) slist[idx] = 1;
51
+ for (int idx : _patt.ilist) ilist[idx] = 1;
52
+ }
53
+
54
+ vector<Pattern> pot_patt(L * 2);
55
+
56
+ int last_neg = _patt.seq.size() - 1;
57
+ while (_patt.seq[last_neg] > 0) --last_neg;
58
+
59
+ for (int i = 0; i < _patt.str_pnt.size(); ++i) {
60
+ vector<bool> found(L * 2, 0);
61
+ unsigned int seq = _patt.seq_ID[i];
62
+ unsigned int j = _patt.str_pnt[i] + 1;
63
+ // positive extensions
64
+ while (j < items[seq].size() && items[seq][j] > 0) {
65
+ int cur_itm = items[seq][j];
66
+ if (!use_list || ilist[cur_itm - 1]) {
67
+ pot_patt[cur_itm - 1].seq_ID.push_back(seq);
68
+ pot_patt[cur_itm - 1].str_pnt.push_back(j);
69
+ ++pot_patt[cur_itm - 1].freq;
70
+ found[cur_itm - 1] = 1;
71
+ }
72
+ ++j;
73
+ }
74
+ // negative and cross-itemset extensions...
75
+ int num_itmfnd = 0;
76
+ for (int k = j; k < items[seq].size(); ++k) {
77
+ int cur_itm = abs(items[seq][k]);
78
+ if (items[seq][k] < 0) num_itmfnd = 0;
79
+ if ((!use_list || slist[cur_itm - 1]) && !found[L + cur_itm - 1]) {
80
+ pot_patt[L + cur_itm - 1].seq_ID.push_back(seq);
81
+ pot_patt[L + cur_itm - 1].str_pnt.push_back(k);
82
+ ++pot_patt[L + cur_itm - 1].freq;
83
+ found[L + cur_itm - 1] = 1;
84
+ }
85
+ if (num_itmfnd == _patt.seq.size() - last_neg) {
86
+ if ((!use_list || ilist[cur_itm - 1]) && !found[cur_itm - 1]) {
87
+ pot_patt[cur_itm - 1].seq_ID.push_back(seq);
88
+ pot_patt[cur_itm - 1].str_pnt.push_back(k);
89
+ ++pot_patt[cur_itm - 1].freq;
90
+ found[cur_itm - 1] = 1;
91
+ }
92
+ } else if (cur_itm == abs(_patt.seq[last_neg + num_itmfnd])) {
93
+ ++num_itmfnd;
94
+ }
95
+ }
96
+ }
97
+
98
+ // Now generate new DFS states
99
+ if (use_list) {
100
+ // itemset extensions
101
+ vector<int> slistp, ilistp;
102
+ for (int idx : _patt.ilist)
103
+ if (pot_patt[idx].freq >= theta) ilistp.push_back(idx);
104
+ for (int idx : _patt.slist)
105
+ if (pot_patt[idx + L].freq >= theta) slistp.push_back(idx);
106
+
107
+ for (int idx : ilistp) {
108
+ DFS.emplace_back();
109
+ swap(DFS.back(), pot_patt[idx]);
110
+ DFS.back().seq = _patt.seq;
111
+ DFS.back().seq.push_back(idx + 1);
112
+ DFS.back().slist = slistp;
113
+ DFS.back().ilist = ilistp;
114
+ Out_patt(DFS.back().seq, DFS.back().freq);
115
+ ++num_patt;
116
+ }
117
+ for (int idx : slistp) {
118
+ DFS.emplace_back();
119
+ swap(DFS.back(), pot_patt[idx + L]);
120
+ DFS.back().seq = _patt.seq;
121
+ DFS.back().seq.push_back(-idx - 1);
122
+ DFS.back().slist = slistp;
123
+ DFS.back().ilist = slistp;
124
+ Out_patt(DFS.back().seq, DFS.back().freq);
125
+ ++num_patt;
126
+ }
127
+ } else {
128
+ // no list optimization
129
+ for (int i = 0; i < 2 * L; ++i) {
130
+ if (pot_patt[i].freq >= theta) {
131
+ DFS.emplace_back();
132
+ swap(DFS.back(), pot_patt[i]);
133
+ DFS.back().seq = _patt.seq;
134
+ if (i >= L)
135
+ DFS.back().seq.push_back(-(i - L + 1));
136
+ else
137
+ DFS.back().seq.push_back(i + 1);
138
+ Out_patt(DFS.back().seq, DFS.back().freq);
139
+ ++num_patt;
140
+ }
141
+ }
142
+ }
143
+ }
effspm/freq_miner.hpp ADDED
@@ -0,0 +1,48 @@
1
+ #pragma once
2
+ #include <vector>
3
+ #include "load_inst.hpp"
4
+ #include <cstdlib>
5
+ #include <cmath>
6
+ #include <cstddef> // for std::size_t
7
+
8
+ using namespace std;
9
+ void Freq_miner();
10
+ void Out_patt(std::vector<int>& seq, unsigned int freq);
11
+
12
+
13
+ class Pattern {
14
+ public:
15
+
16
+ vector<int> seq;
17
+ vector<unsigned int> str_pnt;
18
+ vector<unsigned int> seq_ID;
19
+
20
+ vector<int> slist;
21
+ vector<int> ilist;
22
+
23
+ unsigned int freq;
24
+
25
+ Pattern(vector<int>& _seq, int item) {
26
+ seq.reserve(_seq.size());
27
+ for (std::size_t i = 0; i < _seq.size(); ++i)
28
+
29
+ seq.push_back(_seq[i]);
30
+ seq.push_back(item);
31
+ freq = 0;
32
+ }
33
+
34
+
35
+ Pattern(int item) {
36
+ seq.push_back(item);
37
+ freq = 0;
38
+ }
39
+
40
+ Pattern() {
41
+ freq = 0;
42
+ }
43
+
44
+ };
45
+
46
+ extern vector<Pattern> DFS; //DFS queue of potential patterns to extend
47
+
48
+ extern unsigned long long int num_patt;
@@ -0,0 +1,192 @@
1
+ #include <vector>
2
+ #include <iostream>
3
+ #include "load_inst.hpp"
4
+ #include "build_mdd.hpp"
5
+ #include "freq_miner.hpp"
6
+ #include "utility.hpp"
7
+
8
+ namespace htminer {
9
+
10
+ // Forward declarations (unchanged)
11
+ int Add_arc(int item, unsigned int last_arc, int& itmset, std::vector<unsigned int>& ancest_map);
12
+ void Add_vec(std::vector<int>& items_lim, std::vector<unsigned int>& ancest_map, unsigned int last_arc, int itmset);
13
+
14
+ // Global trees (unchanged)
15
+ std::vector<Arc> Tree;
16
+ std::vector<VArc> VTree;
17
+ std::vector<CArc> CTree;
18
+
19
+ void Build_MDD(std::vector<int>& items, std::vector<int>& items_lim) {
20
+ // DEBUG: entry into Build_MDD
21
+ // std::cerr << "[HTMiner::Build_MDD] called with items.size()=" << items.size()
22
+ // << " items_lim.size()=" << items_lim.size() << std::endl;
23
+
24
+ // // Prepare ancestor map of size L
25
+ std::vector<unsigned int> ancest_map(L, 0);
26
+
27
+ unsigned int last_arc = 0;
28
+ int itmset = 0;
29
+
30
+ // Iterate over items
31
+ for (size_t idx = 0; idx < items.size(); ++idx) {
32
+ int curr_item = items[idx];
33
+ // std::cerr << "[HTMiner::Build_MDD] processing items[" << idx
34
+ // << "]=" << curr_item << " last_arc=" << last_arc
35
+ // << " itmset=" << itmset << std::endl;
36
+
37
+ last_arc = Add_arc(curr_item, last_arc, itmset, ancest_map);
38
+
39
+ // std::cerr << "[HTMiner::Build_MDD] returned from Add_arc, new last_arc="
40
+ // << last_arc << " itmset=" << itmset << std::endl;
41
+ }
42
+
43
+ // If there are limited items, handle them
44
+ if (!items_lim.empty()) {
45
+ // std::cerr << "[HTMiner::Build_MDD] items_lim is not empty; size="
46
+ // << items_lim.size() << std::endl;
47
+ Add_vec(items_lim, ancest_map, last_arc, itmset);
48
+ // std::cerr << "[HTMiner::Build_MDD] returned from Add_vec" << std::endl;
49
+ } else {
50
+ // std::cerr << "[HTMiner::Build_MDD] items_lim is empty; skipping Add_vec" << std::endl;
51
+ }
52
+
53
+ // DEBUG: exit Build_MDD
54
+ // std::cerr << "[HTMiner::Build_MDD] exiting; Tree.size()=" << Tree.size()
55
+ // << " CTree.size()=" << CTree.size()
56
+ // << " VTree.size()=" << VTree.size() << std::endl;
57
+ //
58
+ }
59
+
60
+ int Add_arc(int item, unsigned int last_arc, int& itmset, std::vector<unsigned int>& ancest_map) {
61
+ unsigned int anct = ancest_map[std::abs(item) - 1];
62
+ if (item < 0) {
63
+ ++itmset;
64
+ // std::cerr << "[HTMiner::Add_arc] negative item detected; itmset incremented to "
65
+ // << itmset << std::endl;
66
+ }
67
+
68
+ unsigned int last_sibl = Tree[last_arc].chld;
69
+ // std::cerr << "[HTMiner::Add_arc] starting with last_sibl=" << last_sibl
70
+ // << " anct=" << anct << std::endl;
71
+
72
+ if (last_sibl == 0) {
73
+ Tree.emplace_back(item, itmset, anct);
74
+ last_sibl = static_cast<unsigned int>(Tree.size() - 1);
75
+ Tree[last_arc].chld = last_sibl;
76
+ // std::cerr << "[HTMiner::Add_arc] created new arc at index=" << last_sibl
77
+ // << " setting Tree[" << last_arc << "].chld=" << last_sibl << std::endl;
78
+ if (anct == 0) {
79
+ DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
80
+ // std::cerr << "[HTMiner::Add_arc] appended to DFS[" << (std::abs(item) - 1)
81
+ // << "].str_pnt -> " << last_sibl << std::endl;
82
+ }
83
+ }
84
+ else {
85
+ // std::cerr << "[HTMiner::Add_arc] traversing siblings starting at " << last_sibl << std::endl;
86
+ while (Tree[last_sibl].item != item) {
87
+ if (Tree[last_sibl].sibl == 0) {
88
+ Tree.emplace_back(item, itmset, anct);
89
+ Tree[last_sibl].sibl = static_cast<unsigned int>(Tree.size() - 1);
90
+ last_sibl = static_cast<unsigned int>(Tree.size() - 1);
91
+ // std::cerr << "[HTMiner::Add_arc] created sibling arc at index=" << last_sibl
92
+ // << " setting Tree[" << (last_sibl - 1) << "].sibl=" << last_sibl << std::endl;
93
+ if (anct == 0) {
94
+ DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
95
+ // std::cerr << "[HTMiner::Add_arc] appended to DFS[" << (std::abs(item) - 1)
96
+ // << "].str_pnt -> " << last_sibl << std::endl;
97
+ }
98
+ break;
99
+ }
100
+ last_sibl = Tree[last_sibl].sibl;
101
+ // std::cerr << "[HTMiner::Add_arc] moving to next sibling: " << last_sibl << std::endl;
102
+ }
103
+ }
104
+
105
+ if (anct == 0) {
106
+ ++DFS[std::abs(item) - 1].freq;
107
+ // std::cerr << "[HTMiner::Add_arc] incremented DFS[" << (std::abs(item) - 1)
108
+ // << "].freq -> " << DFS[std::abs(item) - 1].freq << std::endl;
109
+ }
110
+
111
+ ++Tree[last_sibl].freq;
112
+ // std::cerr << "[HTMiner::Add_arc] incremented Tree[" << last_sibl << "].freq -> "
113
+ // << Tree[last_sibl].freq << std::endl;
114
+
115
+ ancest_map[std::abs(item) - 1] = last_sibl;
116
+ // std::cerr << "[HTMiner::Add_arc] updated ancest_map[" << (std::abs(item) - 1)
117
+ // << "] -> " << last_sibl << std::endl;
118
+
119
+ return static_cast<int>(last_sibl);
120
+ }
121
+
122
+ void Add_vec(std::vector<int>& items_lim, std::vector<unsigned int>& ancest, unsigned int last_arc, int itmset) {
123
+ items_lim.shrink_to_fit();
124
+ // std::cerr << "[HTMiner::Add_vec] called with items_lim.size()=" << items_lim.size()
125
+ // << " last_arc=" << last_arc << " itmset=" << itmset << std::endl;
126
+
127
+ std::vector<bool> counted(L, false);
128
+
129
+ if (Tree[last_arc].itmset > 0) {
130
+ ancest.push_back(0);
131
+ ancest.shrink_to_fit();
132
+ // std::cerr << "[HTMiner::Add_vec] Tree[" << last_arc << "].itmset > 0; pushing 0 to ancest" << std::endl;
133
+
134
+ for (size_t i = 0; i < items_lim.size(); ++i) {
135
+ int cur_itm = std::abs(items_lim[i]);
136
+ if (ancest[cur_itm - 1] == 0 && !counted[cur_itm - 1]) {
137
+ if (i + 1 < static_cast<int>(items_lim.size())) {
138
+ VDFS[cur_itm - 1].str_pnt.push_back(-static_cast<int>(i) - 1);
139
+ VDFS[cur_itm - 1].seq_ID.push_back(static_cast<unsigned int>(CTree.size()));
140
+ // std::cerr << "[HTMiner::Add_vec] appended negative str_pnt to VDFS["
141
+ // << (cur_itm - 1) << "] -> " << (-static_cast<int>(i) - 1) << std::endl;
142
+ }
143
+ ++DFS[cur_itm - 1].freq;
144
+ counted[cur_itm - 1] = true;
145
+ // std::cerr << "[HTMiner::Add_vec] incremented DFS[" << (cur_itm - 1)
146
+ // << "].freq -> " << DFS[cur_itm - 1].freq << std::endl;
147
+ }
148
+ }
149
+
150
+ CTree.emplace_back(ancest, items_lim);
151
+ //std::cerr << "[HTMiner::Add_vec] added new CTree node; CTree.size()=" << CTree.size() << std::endl;
152
+
153
+ Tree[last_arc].chld = static_cast<unsigned int>(CTree.size() - 1);
154
+ Tree[last_arc].itmset = -itmset;
155
+ // std::cerr << "[HTMiner::Add_vec] updated Tree[" << last_arc
156
+ // << "].chld=" << Tree[last_arc].chld
157
+ // << " Tree[" << last_arc << "].itmset=" << Tree[last_arc].itmset << std::endl;
158
+ //
159
+ }
160
+ else {
161
+ std::vector<unsigned int>& ancest_ct = CTree[Tree[last_arc].chld].ancest;
162
+ // std::cerr << "[HTMiner::Add_vec] Tree[" << last_arc << "].itmset <= 0; using existing CTree node "
163
+ // << Tree[last_arc].chld << std::endl;
164
+
165
+ for (size_t i = 0; i < items_lim.size(); ++i) {
166
+ int cur_itm = std::abs(items_lim[i]);
167
+ if (!counted[cur_itm - 1] && ancest_ct[cur_itm - 1] == 0) {
168
+ if (i + 1 < static_cast<int>(items_lim.size())) {
169
+ VDFS[cur_itm - 1].str_pnt.push_back(static_cast<unsigned int>(i) + 1);
170
+ VDFS[cur_itm - 1].seq_ID.push_back(static_cast<unsigned int>(VTree.size()));
171
+ // std::cerr << "[HTMiner::Add_vec] appended positive str_pnt to VDFS["
172
+ // << (cur_itm - 1) << "] -> " << (static_cast<unsigned int>(i) + 1) << std::endl;
173
+ }
174
+ ++DFS[cur_itm - 1].freq;
175
+ counted[cur_itm - 1] = true;
176
+ // std::cerr << "[HTMiner::Add_vec] incremented DFS[" << (cur_itm - 1)
177
+ // << "].freq -> " << DFS[cur_itm - 1].freq << std::endl;
178
+ }
179
+ }
180
+
181
+ VTree.emplace_back(items_lim, ancest_ct.back());
182
+ // std::cerr << "[HTMiner::Add_vec] added new VTree node; VTree.size()=" << VTree.size() << std::endl;
183
+
184
+ CTree[Tree[last_arc].chld].ancest.back() = static_cast<unsigned int>(VTree.size());
185
+ // std::cerr << "[HTMiner::Add_vec] updated CTree[" << Tree[last_arc].chld
186
+ // << "].ancest.back()=" << CTree[Tree[last_arc].chld].ancest.back() << std::endl;
187
+ }
188
+
189
+ //std::cerr << "[HTMiner::Add_vec] exiting" << std::endl;
190
+ }
191
+
192
+ } // namespace htminer
@@ -0,0 +1,64 @@
1
+ #pragma once
2
+
3
+ #include <vector>
4
+ #include <cmath>
5
+ #include "load_inst.hpp"
6
+
7
+ namespace htminer {
8
+ void Build_MDD(std::vector<int>& items, std::vector<int>& items_lim);
9
+
10
+ class Arc {
11
+ public:
12
+ unsigned int chld;
13
+ unsigned int sibl;
14
+ unsigned int freq;
15
+ unsigned int anct;
16
+ int itmset;
17
+ int item;
18
+
19
+ Arc(unsigned int _itm, int _itmset, unsigned int _anc) {
20
+ chld = 0;
21
+ sibl = 0;
22
+ freq = 0;
23
+ itmset = _itmset;
24
+ anct = _anc;
25
+ item = _itm;
26
+ }
27
+
28
+ Arc() {
29
+ chld = 0;
30
+ sibl = 0;
31
+ freq = 0;
32
+ }
33
+ };
34
+
35
+ class VArc {
36
+ public:
37
+ unsigned int sibl;
38
+ std::vector<int> seq;
39
+
40
+ VArc(std::vector<int>& items, unsigned int _sib) {
41
+ sibl = _sib;
42
+ seq.swap(items);
43
+ }
44
+
45
+ VArc() {
46
+ sibl = 0;
47
+ }
48
+ };
49
+
50
+ class CArc {
51
+ public:
52
+ std::vector<int> seq;
53
+ std::vector<unsigned int> ancest;
54
+
55
+ CArc(std::vector<unsigned int>& _anc, std::vector<int>& items) {
56
+ ancest.swap(_anc);
57
+ seq.swap(items);
58
+ }
59
+ };
60
+
61
+ extern std::vector<Arc> Tree;
62
+ extern std::vector<VArc> VTree;
63
+ extern std::vector<CArc> CTree;
64
+ }