effspm 0.2.8__cp39-cp39-win_amd64.whl → 0.3.3__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. effspm/_effspm.cp39-win_amd64.pyd +0 -0
  2. effspm/_effspm.cpp +961 -210
  3. effspm/btminer/src/build_mdd.cpp +42 -17
  4. effspm/btminer/src/build_mdd.hpp +13 -19
  5. effspm/btminer/src/freq_miner.cpp +134 -49
  6. effspm/btminer/src/freq_miner.hpp +16 -0
  7. effspm/btminer/src/load_inst.cpp +211 -126
  8. effspm/btminer/src/load_inst.hpp +22 -4
  9. effspm/btminer/src/main.cpp +83 -0
  10. effspm/btminer/src/utility.cpp +26 -41
  11. effspm/btminer/src/utility.hpp +6 -30
  12. effspm/freq_miner.hpp +2 -1
  13. effspm/htminer/src/build_mdd.cpp +46 -124
  14. effspm/htminer/src/build_mdd.hpp +56 -49
  15. effspm/htminer/src/freq_miner.cpp +341 -307
  16. effspm/htminer/src/freq_miner.hpp +39 -40
  17. effspm/htminer/src/load_inst.cpp +287 -336
  18. effspm/htminer/src/load_inst.hpp +23 -6
  19. effspm/htminer/src/main.cpp +97 -0
  20. effspm/htminer/src/utility.cpp +38 -57
  21. effspm/htminer/src/utility.hpp +9 -64
  22. effspm/largebm/src/build_mdd.cpp +69 -110
  23. effspm/largebm/src/build_mdd.hpp +22 -37
  24. effspm/largebm/src/freq_miner.cpp +241 -291
  25. effspm/largebm/src/freq_miner.hpp +25 -36
  26. effspm/largebm/src/load_inst.cpp +20 -26
  27. effspm/largebm/src/load_inst.hpp +24 -34
  28. effspm/largebm/src/main.cpp +95 -0
  29. effspm/largebm/src/utility.cpp +11 -21
  30. effspm/largebm/src/utility.hpp +7 -10
  31. effspm/largehm/src/build_mdd.cpp +75 -110
  32. effspm/largehm/src/build_mdd.hpp +53 -73
  33. effspm/largehm/src/freq_miner.cpp +134 -191
  34. effspm/largehm/src/freq_miner.hpp +37 -60
  35. effspm/largehm/src/load_inst.cpp +137 -174
  36. effspm/largehm/src/load_inst.hpp +13 -50
  37. effspm/largehm/src/main.cpp +95 -0
  38. effspm/largehm/src/utility.cpp +46 -28
  39. effspm/largehm/src/utility.hpp +18 -16
  40. effspm/largepp/src/freq_miner.cpp +184 -156
  41. effspm/largepp/src/freq_miner.hpp +11 -36
  42. effspm/largepp/src/load_inst.cpp +32 -12
  43. effspm/largepp/src/load_inst.hpp +15 -9
  44. effspm/largepp/src/main.cpp +108 -0
  45. effspm/largepp/src/pattern.hpp +31 -0
  46. effspm/load_inst.cpp +8 -8
  47. effspm/load_inst.hpp +1 -1
  48. effspm/main.cpp +103 -0
  49. {effspm-0.2.8.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
  50. effspm-0.3.3.dist-info/RECORD +60 -0
  51. effspm-0.2.8.dist-info/RECORD +0 -53
  52. {effspm-0.2.8.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
  53. {effspm-0.2.8.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
  54. {effspm-0.2.8.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
@@ -2,37 +2,55 @@
2
2
  #include "build_mdd.hpp"
3
3
  #include "load_inst.hpp"
4
4
  #include <iostream>
5
+
5
6
  namespace largehm {
6
- std::vector<std::vector<int>> collected;
7
- bool check_parent(unsigned long long int cur_anct, unsigned long long int str_pnt, unsigned long long int start, vector<unsigned long long int>& strpnt_vec) {
8
-
9
- vector<unsigned long long int> ancestors;
10
-
11
- while (abs(Tree[cur_anct].itmset) > abs(Tree[str_pnt].itmset)) {
12
- if (Tree[cur_anct].item > 0)
13
- ancestors.push_back(cur_anct);
14
- cur_anct = Tree[cur_anct].anct;
15
- }
16
-
17
- if (abs(Tree[cur_anct].itmset) == abs(Tree[str_pnt].itmset))
18
- return 1;
19
- else {
20
- for (vector<unsigned long long int>::reverse_iterator it = ancestors.rbegin(); it != ancestors.rend(); ++it) {
21
- for (unsigned int i = start; i < strpnt_vec.size(); ++i) {
22
- if (strpnt_vec[i] == *it)
23
- return 1;
24
- }
25
- }
26
- }
27
-
28
- return 0;
29
7
 
8
+ using namespace std;
9
+
10
+ // storage for mined patterns (each pattern = vector<int>)
11
+ std::vector<std::vector<int>> collectedPatterns;
12
+
13
+ bool check_parent(unsigned long long int cur_anct,
14
+ unsigned long long int str_pnt,
15
+ unsigned long long int start,
16
+ vector<unsigned long long int>& strpnt_vec) {
17
+
18
+ vector<unsigned long long int> ancestors;
19
+
20
+ while (abs(Tree[cur_anct].itmset) > abs(Tree[str_pnt].itmset)) {
21
+ if (Tree[cur_anct].item > 0)
22
+ ancestors.push_back(cur_anct);
23
+ cur_anct = Tree[cur_anct].anct;
24
+ }
25
+
26
+ if (abs(Tree[cur_anct].itmset) == abs(Tree[str_pnt].itmset))
27
+ return 1;
28
+ else {
29
+ for (vector<unsigned long long int>::reverse_iterator it = ancestors.rbegin();
30
+ it != ancestors.rend(); ++it) {
31
+ for (unsigned int i = start; i < strpnt_vec.size(); ++i) {
32
+ if (strpnt_vec[i] == *it)
33
+ return 1;
34
+ }
35
+ }
36
+ }
37
+
38
+ return 0;
30
39
  }
31
40
 
41
+ float give_time(clock_t kk) {
42
+ float ll = ((float)kk) / CLOCKS_PER_SEC;
43
+ return ll;
44
+ }
32
45
 
46
+ // clear vector used to return patterns to Python
47
+ void ClearCollected() {
48
+ collectedPatterns.clear();
49
+ }
50
+
51
+ // return reference so Python wrapper can build list[list[int]]
52
+ const std::vector<std::vector<int>>& GetCollected() {
53
+ return collectedPatterns;
54
+ }
33
55
 
34
- // float give_time(clock_t kk) {
35
- // float ll = ((float)kk) / CLOCKS_PER_SEC;
36
- // return ll;
37
- // }
38
- }
56
+ } // namespace largehm
@@ -6,24 +6,26 @@
6
6
  #include "build_mdd.hpp"
7
7
 
8
8
  namespace largehm {
9
- using namespace std;
10
9
 
11
- extern std::vector<std::vector<int>> collected;
10
+ using std::vector;
11
+ using std::string;
12
12
 
13
- // Helpers to clear and fetch collected patterns from Python:
14
- inline void ClearCollected() {
15
- collected.clear();
16
- }
17
- inline const std::vector<std::vector<int>>& GetCollected() {
18
- return collected;
19
- }
13
+ // time helper
14
+ float give_time(clock_t kk);
20
15
 
21
- // A small timer helper:
22
- inline float give_time(clock_t kk) {
23
- float ll = ((float)kk) / CLOCKS_PER_SEC;
24
- return ll;
25
- }
26
- bool check_parent(unsigned long long int cur_anct, unsigned long long int str_pnt, unsigned long long int start, vector<unsigned long long int>& strpnt_vec);
16
+ // ancestor-check helper
17
+ bool check_parent(unsigned long long int cur_anct,
18
+ unsigned long long int str_pnt,
19
+ unsigned long long int start,
20
+ vector<unsigned long long int>& strpnt_vec);
27
21
 
22
+ // pattern collection for Python wrapper
23
+ extern std::vector<std::vector<int>> collectedPatterns;
28
24
 
29
- }
25
+ // clear collected patterns between runs
26
+ void ClearCollected();
27
+
28
+ // get collected patterns after mining
29
+ const std::vector<std::vector<int>>& GetCollected();
30
+
31
+ } // namespace largehm
@@ -1,170 +1,198 @@
1
+ #include <algorithm>
2
+ #include <cstdlib>
3
+ #include <fstream>
1
4
  #include <iostream>
2
- #include <time.h>
5
+
3
6
  #include "freq_miner.hpp"
7
+ #include "pattern.hpp"
8
+ #include "load_inst.hpp"
4
9
  #include "utility.hpp"
5
10
 
6
- namespace largepp // ← BEGIN namespacing
7
- {
8
- void Out_patt(vector<int>& seq, unsigned int freq);
9
- void Extend_patt(Pattern& _patt);
10
-
11
- unsigned long long int num_patt = 0;
12
-
13
- Pattern _patt;
11
+ namespace largepp {
14
12
 
15
- void Freq_miner() {
13
+ using std::abs;
14
+ using std::cout;
15
+ using std::endl;
16
+ using std::ofstream;
17
+ using std::swap;
18
+ using std::vector;
16
19
 
17
- vector<int> islist;
20
+ static void Out_patt(vector<int>& seq, unsigned int freq);
21
+ static void Extend_patt(Pattern& _pattern);
18
22
 
19
- for (int i = 0; i < L; ++i) {
20
- if (DFS[i].freq >= theta)
21
- islist.push_back(i);
22
- }
23
-
24
- for (int i = 0; i < DFS.size(); ++i) {
25
- DFS[i].ilist = islist;
26
- DFS[i].slist = islist;
27
- }
28
-
29
- while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
30
- if (DFS.back().freq >= theta)
31
- Extend_patt(DFS.back());
32
- else
33
- DFS.pop_back();
34
- }
23
+ unsigned long long int num_patt = 0; // counter for emitted patterns
24
+ static Pattern _patt; // scratch pattern (for in-place extend)
35
25
 
26
+ /* ------------------------------------------------------------------ */
27
+ /* Driver */
28
+ /* ------------------------------------------------------------------ */
29
+ void Freq_miner()
30
+ {
31
+ // Build the candidate item list once (items that pass minsup at length-1)
32
+ vector<int> islist;
33
+ islist.reserve(L);
34
+ for (unsigned int i = 0; i < L; ++i) {
35
+ if (DFS[i].freq >= theta) islist.push_back(static_cast<int>(i));
36
+ }
37
+
38
+ // Seed each 1-length pattern’s extension lists
39
+ for (unsigned int i = 0; i < DFS.size(); ++i) {
40
+ DFS[i].ilist = islist;
41
+ DFS[i].slist = islist;
42
+ }
43
+
44
+ // DFS over the stack, extending only nodes whose current support ≥ theta
45
+ while (!DFS.empty() && give_time(std::clock() - start_time) < time_limit) {
46
+ if (DFS.back().freq >= theta) {
47
+ Extend_patt(DFS.back());
48
+ } else {
49
+ DFS.pop_back();
50
+ }
51
+ }
36
52
  }
37
53
 
38
- void Extend_patt(Pattern& _pattern) {
39
-
40
- swap(_patt, _pattern);
41
- DFS.pop_back();
42
-
43
- vector<bool> slist(L, 0);
44
- vector<bool> ilist(L, 0);
45
-
46
- for (vector<int>::iterator it = _patt.slist.begin(); it != _patt.slist.end(); ++it)
47
- slist[*it] = 1;
48
- for (vector<int>::iterator it = _patt.ilist.begin(); it != _patt.ilist.end(); ++it)
49
- ilist[*it] = 1;
50
-
51
- vector<Pattern> pot_patt(L * 2);
52
-
53
- int last_neg = _patt.seq.size() - 1;
54
- while (_patt.seq[last_neg] > 0)
55
- --last_neg;
56
-
57
- for (int i = 0; i < _patt.str_pnt.size(); ++i) {
58
-
59
- vector<bool> found(L * 2, 0);
60
-
61
- unsigned long long int seq = _patt.seq_ID[i];
62
- unsigned int j = _patt.str_pnt[i] + 1;
63
- while (j < items[seq].size() && items[seq][j] > 0) {
64
- int cur_itm = items[seq][j];
65
- if (ilist[cur_itm - 1]) {
66
- pot_patt[cur_itm - 1].seq_ID.push_back(seq);
67
- pot_patt[cur_itm - 1].str_pnt.push_back(j);
68
- ++pot_patt[cur_itm - 1].freq;
69
- found[cur_itm - 1] = 1;
70
- }
71
- ++j;
72
- }
73
-
74
- int num_itmfnd = 0;
75
- for (int k = j; k < items[seq].size(); ++k) {
76
- int cur_itm = abs(items[seq][k]);
77
- if (items[seq][k] < 0)
78
- num_itmfnd = 0;
79
- if (slist[cur_itm - 1] && !found[L + cur_itm - 1]) {
80
- pot_patt[L + cur_itm - 1].seq_ID.push_back(seq);
81
- pot_patt[L + cur_itm - 1].str_pnt.push_back(k);
82
- ++pot_patt[L + cur_itm - 1].freq;
83
- found[L + cur_itm - 1] = 1;
84
- }
85
- if (num_itmfnd == _patt.seq.size() - last_neg) {
86
- if (ilist[cur_itm - 1] && !found[cur_itm - 1]) {
87
- pot_patt[cur_itm - 1].seq_ID.push_back(seq);
88
- pot_patt[cur_itm - 1].str_pnt.push_back(k);
89
- ++pot_patt[cur_itm - 1].freq;
90
- found[cur_itm - 1] = 1;
91
- }
92
- }
93
- else if (cur_itm == abs(_patt.seq[last_neg + num_itmfnd]))
94
- ++num_itmfnd;
95
- }
96
- }
97
-
98
-
99
- vector<int> slistp;
100
- vector<int> ilistp;
101
-
102
- for (vector<int>::iterator it = _patt.ilist.begin(); it != _patt.ilist.end(); ++it) {
103
- if (pot_patt[*it].freq >= theta)
104
- ilistp.push_back(*it);
105
- }
106
-
107
- for (vector<int>::iterator it = _patt.slist.begin(); it != _patt.slist.end(); ++it) {
108
- if (pot_patt[(*it) + L].freq >= theta)
109
- slistp.push_back(*it);
110
- }
111
-
112
- for (vector<int>::iterator it = ilistp.begin(); it != ilistp.end(); ++it) {
113
- DFS.emplace_back();
114
- swap(DFS.back(), pot_patt[*it]);
115
- DFS.back().seq = _patt.seq;
116
- DFS.back().seq.push_back((*it) + 1);
117
- DFS.back().slist = slistp;
118
- DFS.back().ilist = ilistp;
119
- if (b_disp || b_write)
120
- Out_patt(DFS.back().seq, DFS.back().freq);
121
- ++num_patt;
122
- }
123
-
124
-
125
- for (vector<int>::iterator it = slistp.begin(); it != slistp.end(); ++it) {
126
- DFS.emplace_back();
127
- swap(DFS.back(), pot_patt[(*it) + L]);
128
- DFS.back().seq = _patt.seq;
129
- DFS.back().seq.push_back(-(*it) - 1);
130
- DFS.back().slist = slistp;
131
- DFS.back().ilist = slistp;
132
- if (b_disp || b_write)
133
- Out_patt(DFS.back().seq, DFS.back().freq);
134
- ++num_patt;
135
- }
136
-
54
+ /* ------------------------------------------------------------------ */
55
+ /* Extend_patt: given a frequent pattern, enumerate its i- and s-ext */
56
+ /* ------------------------------------------------------------------ */
57
+ static void Extend_patt(Pattern& _pattern)
58
+ {
59
+ swap(_patt, _pattern); // work on local scratch
60
+ DFS.pop_back(); // remove from stack
61
+
62
+ // Quick presence tables for allowed i-/s-extensions
63
+ vector<bool> slist(L, false);
64
+ vector<bool> ilist(L, false);
65
+ for (int idx : _patt.slist) slist[static_cast<size_t>(idx)] = true;
66
+ for (int idx : _patt.ilist) ilist[static_cast<size_t>(idx)] = true;
67
+
68
+ // Potential children buffers:
69
+ vector<Pattern> pot_patt(L * 2); // [0..L-1] = i-ext, [L..2L-1] = s-ext
70
+
71
+ // Find last negative from the end (boundary between itemsets)
72
+ int last_neg = static_cast<int>(_patt.seq.size()) - 1;
73
+ while (last_neg >= 0 && _patt.seq[static_cast<size_t>(last_neg)] > 0) --last_neg;
74
+
75
+ // Scan occurrences to build supports for all valid next-steps
76
+ for (size_t i = 0; i < _patt.str_pnt.size(); ++i) {
77
+ vector<bool> found(L * 2, false);
78
+
79
+ unsigned long long seq_id = _patt.seq_ID[i];
80
+ unsigned int j = _patt.str_pnt[i] + 1;
81
+
82
+ // 1) Same itemset (i-extension) forward until end-of-itemset (>0)
83
+ while (j < items[seq_id].size() && items[seq_id][j] > 0) {
84
+ int cur_itm = items[seq_id][j];
85
+ if (ilist[static_cast<size_t>(cur_itm - 1)]) {
86
+ pot_patt[static_cast<size_t>(cur_itm - 1)].seq_ID.push_back(seq_id);
87
+ pot_patt[static_cast<size_t>(cur_itm - 1)].str_pnt.push_back(j);
88
+ ++pot_patt[static_cast<size_t>(cur_itm - 1)].freq;
89
+ found[static_cast<size_t>(cur_itm - 1)] = true;
90
+ }
91
+ ++j;
92
+ }
93
+
94
+ // 2) Later itemsets (s-extension), plus special re-open i-ext rule
95
+ int num_itmfnd = 0;
96
+ for (size_t k = j; k < items[seq_id].size(); ++k) {
97
+ int cur = items[seq_id][k];
98
+ int cur_itm = abs(cur);
99
+
100
+ if (cur < 0) num_itmfnd = 0; // new itemset boundary seen
101
+
102
+ // s-extension: add cur_itm as new itemset element
103
+ if (slist[static_cast<size_t>(cur_itm - 1)] &&
104
+ !found[static_cast<size_t>(L + cur_itm - 1)]) {
105
+ pot_patt[static_cast<size_t>(L + cur_itm - 1)].seq_ID.push_back(seq_id);
106
+ pot_patt[static_cast<size_t>(L + cur_itm - 1)].str_pnt.push_back(k);
107
+ ++pot_patt[static_cast<size_t>(L + cur_itm - 1)].freq;
108
+ found[static_cast<size_t>(L + cur_itm - 1)] = true;
109
+ }
110
+
111
+ // once we've seen the suffix of the last itemset fully,
112
+ // allow i-extension again (across future itemsets)
113
+ if (num_itmfnd == static_cast<int>(_patt.seq.size()) - last_neg) {
114
+ if (ilist[static_cast<size_t>(cur_itm - 1)] &&
115
+ !found[static_cast<size_t>(cur_itm - 1)]) {
116
+ pot_patt[static_cast<size_t>(cur_itm - 1)].seq_ID.push_back(seq_id);
117
+ pot_patt[static_cast<size_t>(cur_itm - 1)].str_pnt.push_back(k);
118
+ ++pot_patt[static_cast<size_t>(cur_itm - 1)].freq;
119
+ found[static_cast<size_t>(cur_itm - 1)] = true;
120
+ }
121
+ } else if (last_neg + num_itmfnd >= 0 &&
122
+ cur_itm == abs(_patt.seq[static_cast<size_t>(last_neg + num_itmfnd)])) {
123
+ ++num_itmfnd;
124
+ }
125
+ }
126
+ }
127
+
128
+ // Filter children by support threshold
129
+ vector<int> ilistp;
130
+ vector<int> slistp;
131
+ ilistp.reserve(_patt.ilist.size());
132
+ slistp.reserve(_patt.slist.size());
133
+
134
+ for (int idx : _patt.ilist) {
135
+ if (pot_patt[static_cast<size_t>(idx)].freq >= theta)
136
+ ilistp.push_back(idx);
137
+ }
138
+ for (int idx : _patt.slist) {
139
+ if (pot_patt[static_cast<size_t>(idx + static_cast<int>(L))].freq >= theta)
140
+ slistp.push_back(idx);
141
+ }
142
+
143
+ // Push all i-extensions
144
+ for (int idx : ilistp) {
145
+ DFS.emplace_back();
146
+ swap(DFS.back(), pot_patt[static_cast<size_t>(idx)]);
147
+
148
+ DFS.back().seq = _patt.seq;
149
+ DFS.back().seq.push_back(idx + 1);
150
+
151
+ DFS.back().slist = slistp;
152
+ DFS.back().ilist = ilistp;
153
+
154
+ // ALWAYS emit (so collected fills even if !b_disp && !b_write)
155
+ Out_patt(DFS.back().seq, DFS.back().freq);
156
+ ++num_patt;
157
+ }
158
+
159
+ // Push all s-extensions
160
+ for (int idx : slistp) {
161
+ DFS.emplace_back();
162
+ swap(DFS.back(), pot_patt[static_cast<size_t>(idx + static_cast<int>(L))]);
163
+
164
+ DFS.back().seq = _patt.seq;
165
+ DFS.back().seq.push_back(-(idx + 1)); // negative encodes new itemset
166
+
167
+ DFS.back().slist = slistp;
168
+ DFS.back().ilist = slistp; // as in original code
169
+
170
+ // ALWAYS emit
171
+ Out_patt(DFS.back().seq, DFS.back().freq);
172
+ ++num_patt;
173
+ }
137
174
  }
138
175
 
139
-
140
- void Out_patt(vector<int>& seq, unsigned int freq) {
141
-
142
- largepp::collected.push_back(seq);
143
-
144
- ofstream file_o;
145
- if (b_write)
146
- file_o.open(out_file, std::ios::app);
147
-
148
- for (int ii = 0; ii < seq.size(); ii++) {
149
- if (b_disp)
150
- cout << seq[ii] << " ";
151
- if (b_write)
152
- file_o << seq[ii] << " ";
153
- }
154
- if (b_disp)
155
- cout << endl;
156
- if (b_write)
157
- file_o << endl;
158
-
159
- if (b_disp)
160
- cout << "************** Freq: " << freq << endl;
161
- if (b_write) {
162
- file_o << "************** Freq: " << freq << endl;
163
- file_o.close();
164
- }
165
- }
176
+ /* ------------------------------------------------------------------ */
177
+ /* Out_patt: append to buffer; optionally print/write */
178
+ /* ------------------------------------------------------------------ */
179
+ static void Out_patt(vector<int>& seq, unsigned int freq)
180
+ {
181
+ // Always append to in-memory results returned to Python
182
+ largepp::collected.push_back(seq);
183
+
184
+ ofstream file_o;
185
+ if (b_write) file_o.open(out_file, std::ios::app);
186
+
187
+ if (b_disp) {
188
+ for (int v : seq) cout << v << " ";
189
+ cout << "\n************** Freq: " << freq << endl;
190
+ }
191
+ if (b_write) {
192
+ for (int v : seq) file_o << v << " ";
193
+ file_o << "\n************** Freq: " << freq << "\n";
194
+ file_o.close();
195
+ }
166
196
  }
167
197
 
168
-
169
-
170
-
198
+ } // namespace largepp
@@ -1,43 +1,18 @@
1
1
  #pragma once
2
2
 
3
- #include "load_inst.hpp"
4
- namespace largepp // ← BEGIN namespacing
5
- {
6
- void Freq_miner();
7
-
8
- class Pattern {
9
- public:
10
-
11
- vector<int> seq;
12
- vector<unsigned int> str_pnt;
13
- vector<unsigned long long int> seq_ID;
14
-
15
- vector<int> slist;
16
- vector<int> ilist;
17
-
18
- unsigned long long int freq;
3
+ #include <vector>
4
+ #include <string>
19
5
 
20
- Pattern(vector<int>& _seq, int item) {
21
- seq.reserve(_seq.size());
22
- for (int i = 0; i < _seq.size(); ++i)
23
- seq.push_back(_seq[i]);
24
- seq.push_back(item);
25
- freq = 0;
26
- }
6
+ #include "pattern.hpp" // defines largepp::Pattern
7
+ #include "load_inst.hpp" // declares externs: items, L, theta, DFS, etc.
8
+ #include "utility.hpp" // flags, collected buffer, timers, helpers
27
9
 
10
+ namespace largepp {
28
11
 
29
- Pattern(int item) {
30
- seq.push_back(item);
31
- freq = 0;
32
- }
33
-
34
- Pattern() {
35
- freq = 0;
36
- }
37
-
38
- };
39
-
40
- extern vector<Pattern> DFS; //DFS queue of potential patterns to extend
12
+ // Public entry point
13
+ void Freq_miner();
41
14
 
15
+ // (defined in the .cpp)
42
16
  extern unsigned long long int num_patt;
43
- }
17
+
18
+ } // namespace largepp
@@ -2,6 +2,7 @@
2
2
  #include <sstream>
3
3
  #include <algorithm>
4
4
  #include <cmath>
5
+ #include <fstream>
5
6
  #include "load_inst.hpp"
6
7
  #include "freq_miner.hpp"
7
8
  #include "utility.hpp"
@@ -36,6 +37,7 @@ bool Load_instance(string& items_file, double thresh)
36
37
 
37
38
  cout << "\nPreprocess done in " << give_time(clock() - kk) << " seconds\n\n";
38
39
 
40
+ DFS.clear();
39
41
  DFS.reserve(L);
40
42
  for (unsigned int i = 0; i < L; ++i)
41
43
  DFS.emplace_back(-int(i) - 1);
@@ -48,10 +50,28 @@ bool Load_instance(string& items_file, double thresh)
48
50
  return false;
49
51
  else
50
52
  theta = (thresh < 1.0) ? ceil(thresh * N) : thresh;
51
-
52
- cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
53
- cout << "Found " << N << " sequence, with max line len " << M
54
- << ", and " << L << " items, and " << E << " enteries\n";
53
+ if (b_disp)
54
+ cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
55
+ if (b_disp)
56
+ cout << "Found " << N << " sequence, with max line len " << M
57
+ << ", and " << L << " items, and " << E << " enteries\n";
58
+
59
+ // ───────────────────────────────────────────────────────────
60
+ // DEBUG snapshot of seeds right after loading
61
+ // ───────────────────────────────────────────────────────────
62
+ {
63
+ unsigned long long seeds_ge_theta = 0, seeds_nonzero = 0, max_freq = 0;
64
+ for (size_t i = 0; i < DFS.size(); ++i) {
65
+ if (DFS[i].freq > 0) ++seeds_nonzero;
66
+ if (DFS[i].freq >= theta) ++seeds_ge_theta;
67
+ if (DFS[i].freq > max_freq) max_freq = DFS[i].freq;
68
+ }
69
+ // std::cout << " theta=" << theta
70
+ // << " | DFS.size=" << DFS.size()
71
+ // << " | seeds>=theta=" << seeds_ge_theta
72
+ // << " | seeds>0=" << seeds_nonzero
73
+ // << " | max_seed_freq=" << max_freq << "\n";
74
+ }
55
75
 
56
76
  return true;
57
77
  }
@@ -67,12 +87,12 @@ void Load_py(const pybind11::object& data, double thresh)
67
87
  int max_id = 0;
68
88
  M = 0; E = 0;
69
89
  for (auto& seq : items) {
70
- M = max<unsigned int>(M, seq.size());
90
+ M = max<unsigned int>(M, static_cast<unsigned int>(seq.size()));
71
91
  E += seq.size();
72
92
  for (int x : seq)
73
93
  max_id = max(max_id, abs(x));
74
94
  }
75
- L = max_id;
95
+ L = static_cast<unsigned int>(max_id);
76
96
  theta = (thresh < 1.0) ? ceil(thresh * N) : thresh;
77
97
 
78
98
  DFS.clear();
@@ -82,7 +102,7 @@ void Load_py(const pybind11::object& data, double thresh)
82
102
  }
83
103
 
84
104
  /* =================================================================
85
- * The professor’s original helpers — untouched
105
+ * The professor’s original helpers — untouched except minor safety
86
106
  * ================================================================= */
87
107
  static bool Preprocess(string& inst, double thresh)
88
108
  {
@@ -97,7 +117,7 @@ static bool Preprocess(string& inst, double thresh)
97
117
  string itm;
98
118
  while (word >> itm) {
99
119
  ditem = stoi(itm);
100
- L = max<unsigned int>(L, abs(ditem));
120
+ L = max<unsigned int>(L, static_cast<unsigned int>(abs(ditem)));
101
121
 
102
122
  if (freq.size() < L) {
103
123
  freq.resize(L, 0);
@@ -170,7 +190,7 @@ static void Load_items_pre(string& inst)
170
190
  }
171
191
  if (empty_seq) continue;
172
192
 
173
- ++N; E += size_m; M = max<unsigned int>(M, size_m);
193
+ ++N; E += size_m; M = max<unsigned int>(M, static_cast<unsigned int>(size_m));
174
194
  }
175
195
  }
176
196
 
@@ -193,8 +213,8 @@ static bool Load_items(string& inst)
193
213
 
194
214
  while (word >> itm) {
195
215
  ditem = stoi(itm);
196
- if (L < abs(ditem)) {
197
- L = abs(ditem);
216
+ if (L < static_cast<unsigned int>(abs(ditem))) {
217
+ L = static_cast<unsigned int>(abs(ditem));
198
218
  while (DFS.size() < L) {
199
219
  DFS.emplace_back(-int(DFS.size()) - 1);
200
220
  counted.push_back(0);
@@ -211,7 +231,7 @@ static bool Load_items(string& inst)
211
231
  ++size_m;
212
232
  }
213
233
  E += size_m;
214
- M = max<unsigned int>(M, size_m);
234
+ M = max<unsigned int>(M, static_cast<unsigned int>(size_m));
215
235
  }
216
236
  return true;
217
237
  }