effspm 0.1.5__cp310-cp310-win_amd64.whl → 0.3.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. effspm/__init__.py +9 -2
  2. effspm/_core.cpp +91 -13
  3. effspm/_effspm.cp310-win_amd64.pyd +0 -0
  4. effspm/_effspm.cpp +679 -0
  5. effspm/btminer/src/build_mdd.cpp +88 -0
  6. effspm/btminer/src/build_mdd.hpp +34 -0
  7. effspm/btminer/src/freq_miner.cpp +264 -0
  8. effspm/btminer/src/freq_miner.hpp +55 -0
  9. effspm/btminer/src/load_inst.cpp +275 -0
  10. effspm/btminer/src/load_inst.hpp +43 -0
  11. effspm/btminer/src/utility.cpp +50 -0
  12. effspm/btminer/src/utility.hpp +16 -0
  13. effspm/freq_miner.hpp +7 -1
  14. effspm/htminer/src/build_mdd.cpp +139 -0
  15. effspm/htminer/src/build_mdd.hpp +64 -0
  16. effspm/htminer/src/freq_miner.cpp +350 -0
  17. effspm/htminer/src/freq_miner.hpp +60 -0
  18. effspm/htminer/src/load_inst.cpp +394 -0
  19. effspm/htminer/src/load_inst.hpp +23 -0
  20. effspm/htminer/src/utility.cpp +72 -0
  21. effspm/htminer/src/utility.hpp +77 -0
  22. effspm/largebm/src/build_mdd.cpp +96 -0
  23. effspm/largebm/src/build_mdd.hpp +32 -0
  24. effspm/largebm/src/freq_miner.cpp +299 -0
  25. effspm/largebm/src/freq_miner.hpp +37 -0
  26. effspm/largebm/src/load_inst.cpp +224 -0
  27. effspm/largebm/src/load_inst.hpp +35 -0
  28. effspm/largebm/src/utility.cpp +35 -0
  29. effspm/largebm/src/utility.hpp +15 -0
  30. effspm/largehm/src/build_mdd.cpp +174 -0
  31. effspm/largehm/src/build_mdd.hpp +93 -0
  32. effspm/largehm/src/freq_miner.cpp +429 -0
  33. effspm/largehm/src/freq_miner.hpp +77 -0
  34. effspm/largehm/src/load_inst.cpp +375 -0
  35. effspm/largehm/src/load_inst.hpp +64 -0
  36. effspm/largehm/src/utility.cpp +38 -0
  37. effspm/largehm/src/utility.hpp +29 -0
  38. effspm/largepp/src/freq_miner.cpp +198 -0
  39. effspm/largepp/src/freq_miner.hpp +18 -0
  40. effspm/largepp/src/load_inst.cpp +238 -0
  41. effspm/largepp/src/load_inst.hpp +34 -0
  42. effspm/largepp/src/pattern.hpp +31 -0
  43. effspm/largepp/src/utility.cpp +34 -0
  44. effspm/largepp/src/utility.hpp +21 -0
  45. effspm/load_inst.hpp +18 -12
  46. effspm-0.3.0.dist-info/METADATA +237 -0
  47. effspm-0.3.0.dist-info/RECORD +54 -0
  48. {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/WHEEL +1 -1
  49. effspm/_core.cp310-win_amd64.pyd +0 -0
  50. effspm-0.1.5.dist-info/METADATA +0 -38
  51. effspm-0.1.5.dist-info/RECORD +0 -14
  52. {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/licenses/LICENSE +0 -0
  53. {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,198 @@
1
+ #include <algorithm>
2
+ #include <cstdlib>
3
+ #include <fstream>
4
+ #include <iostream>
5
+
6
+ #include "freq_miner.hpp"
7
+ #include "pattern.hpp"
8
+ #include "load_inst.hpp"
9
+ #include "utility.hpp"
10
+
11
+ namespace largepp {
12
+
13
+ using std::abs;
14
+ using std::cout;
15
+ using std::endl;
16
+ using std::ofstream;
17
+ using std::swap;
18
+ using std::vector;
19
+
20
+ static void Out_patt(vector<int>& seq, unsigned int freq);
21
+ static void Extend_patt(Pattern& _pattern);
22
+
23
+ unsigned long long int num_patt = 0; // counter for emitted patterns
24
+ static Pattern _patt; // scratch pattern (for in-place extend)
25
+
26
+ /* ------------------------------------------------------------------ */
27
+ /* Driver */
28
+ /* ------------------------------------------------------------------ */
29
+ void Freq_miner()
30
+ {
31
+ // Build the candidate item list once (items that pass minsup at length-1)
32
+ vector<int> islist;
33
+ islist.reserve(L);
34
+ for (unsigned int i = 0; i < L; ++i) {
35
+ if (DFS[i].freq >= theta) islist.push_back(static_cast<int>(i));
36
+ }
37
+
38
+ // Seed each 1-length pattern’s extension lists
39
+ for (unsigned int i = 0; i < DFS.size(); ++i) {
40
+ DFS[i].ilist = islist;
41
+ DFS[i].slist = islist;
42
+ }
43
+
44
+ // DFS over the stack, extending only nodes whose current support ≥ theta
45
+ while (!DFS.empty() && give_time(std::clock() - start_time) < time_limit) {
46
+ if (DFS.back().freq >= theta) {
47
+ Extend_patt(DFS.back());
48
+ } else {
49
+ DFS.pop_back();
50
+ }
51
+ }
52
+ }
53
+
54
+ /* ------------------------------------------------------------------ */
55
+ /* Extend_patt: given a frequent pattern, enumerate its i- and s-ext */
56
+ /* ------------------------------------------------------------------ */
57
+ static void Extend_patt(Pattern& _pattern)
58
+ {
59
+ swap(_patt, _pattern); // work on local scratch
60
+ DFS.pop_back(); // remove from stack
61
+
62
+ // Quick presence tables for allowed i-/s-extensions
63
+ vector<bool> slist(L, false);
64
+ vector<bool> ilist(L, false);
65
+ for (int idx : _patt.slist) slist[static_cast<size_t>(idx)] = true;
66
+ for (int idx : _patt.ilist) ilist[static_cast<size_t>(idx)] = true;
67
+
68
+ // Potential children buffers:
69
+ vector<Pattern> pot_patt(L * 2); // [0..L-1] = i-ext, [L..2L-1] = s-ext
70
+
71
+ // Find last negative from the end (boundary between itemsets)
72
+ int last_neg = static_cast<int>(_patt.seq.size()) - 1;
73
+ while (last_neg >= 0 && _patt.seq[static_cast<size_t>(last_neg)] > 0) --last_neg;
74
+
75
+ // Scan occurrences to build supports for all valid next-steps
76
+ for (size_t i = 0; i < _patt.str_pnt.size(); ++i) {
77
+ vector<bool> found(L * 2, false);
78
+
79
+ unsigned long long seq_id = _patt.seq_ID[i];
80
+ unsigned int j = _patt.str_pnt[i] + 1;
81
+
82
+ // 1) Same itemset (i-extension) forward until end-of-itemset (>0)
83
+ while (j < items[seq_id].size() && items[seq_id][j] > 0) {
84
+ int cur_itm = items[seq_id][j];
85
+ if (ilist[static_cast<size_t>(cur_itm - 1)]) {
86
+ pot_patt[static_cast<size_t>(cur_itm - 1)].seq_ID.push_back(seq_id);
87
+ pot_patt[static_cast<size_t>(cur_itm - 1)].str_pnt.push_back(j);
88
+ ++pot_patt[static_cast<size_t>(cur_itm - 1)].freq;
89
+ found[static_cast<size_t>(cur_itm - 1)] = true;
90
+ }
91
+ ++j;
92
+ }
93
+
94
+ // 2) Later itemsets (s-extension), plus special re-open i-ext rule
95
+ int num_itmfnd = 0;
96
+ for (size_t k = j; k < items[seq_id].size(); ++k) {
97
+ int cur = items[seq_id][k];
98
+ int cur_itm = abs(cur);
99
+
100
+ if (cur < 0) num_itmfnd = 0; // new itemset boundary seen
101
+
102
+ // s-extension: add cur_itm as new itemset element
103
+ if (slist[static_cast<size_t>(cur_itm - 1)] &&
104
+ !found[static_cast<size_t>(L + cur_itm - 1)]) {
105
+ pot_patt[static_cast<size_t>(L + cur_itm - 1)].seq_ID.push_back(seq_id);
106
+ pot_patt[static_cast<size_t>(L + cur_itm - 1)].str_pnt.push_back(k);
107
+ ++pot_patt[static_cast<size_t>(L + cur_itm - 1)].freq;
108
+ found[static_cast<size_t>(L + cur_itm - 1)] = true;
109
+ }
110
+
111
+ // once we've seen the suffix of the last itemset fully,
112
+ // allow i-extension again (across future itemsets)
113
+ if (num_itmfnd == static_cast<int>(_patt.seq.size()) - last_neg) {
114
+ if (ilist[static_cast<size_t>(cur_itm - 1)] &&
115
+ !found[static_cast<size_t>(cur_itm - 1)]) {
116
+ pot_patt[static_cast<size_t>(cur_itm - 1)].seq_ID.push_back(seq_id);
117
+ pot_patt[static_cast<size_t>(cur_itm - 1)].str_pnt.push_back(k);
118
+ ++pot_patt[static_cast<size_t>(cur_itm - 1)].freq;
119
+ found[static_cast<size_t>(cur_itm - 1)] = true;
120
+ }
121
+ } else if (last_neg + num_itmfnd >= 0 &&
122
+ cur_itm == abs(_patt.seq[static_cast<size_t>(last_neg + num_itmfnd)])) {
123
+ ++num_itmfnd;
124
+ }
125
+ }
126
+ }
127
+
128
+ // Filter children by support threshold
129
+ vector<int> ilistp;
130
+ vector<int> slistp;
131
+ ilistp.reserve(_patt.ilist.size());
132
+ slistp.reserve(_patt.slist.size());
133
+
134
+ for (int idx : _patt.ilist) {
135
+ if (pot_patt[static_cast<size_t>(idx)].freq >= theta)
136
+ ilistp.push_back(idx);
137
+ }
138
+ for (int idx : _patt.slist) {
139
+ if (pot_patt[static_cast<size_t>(idx + static_cast<int>(L))].freq >= theta)
140
+ slistp.push_back(idx);
141
+ }
142
+
143
+ // Push all i-extensions
144
+ for (int idx : ilistp) {
145
+ DFS.emplace_back();
146
+ swap(DFS.back(), pot_patt[static_cast<size_t>(idx)]);
147
+
148
+ DFS.back().seq = _patt.seq;
149
+ DFS.back().seq.push_back(idx + 1);
150
+
151
+ DFS.back().slist = slistp;
152
+ DFS.back().ilist = ilistp;
153
+
154
+ // ALWAYS emit (so collected fills even if !b_disp && !b_write)
155
+ Out_patt(DFS.back().seq, DFS.back().freq);
156
+ ++num_patt;
157
+ }
158
+
159
+ // Push all s-extensions
160
+ for (int idx : slistp) {
161
+ DFS.emplace_back();
162
+ swap(DFS.back(), pot_patt[static_cast<size_t>(idx + static_cast<int>(L))]);
163
+
164
+ DFS.back().seq = _patt.seq;
165
+ DFS.back().seq.push_back(-(idx + 1)); // negative encodes new itemset
166
+
167
+ DFS.back().slist = slistp;
168
+ DFS.back().ilist = slistp; // as in original code
169
+
170
+ // ALWAYS emit
171
+ Out_patt(DFS.back().seq, DFS.back().freq);
172
+ ++num_patt;
173
+ }
174
+ }
175
+
176
+ /* ------------------------------------------------------------------ */
177
+ /* Out_patt: append to buffer; optionally print/write */
178
+ /* ------------------------------------------------------------------ */
179
+ static void Out_patt(vector<int>& seq, unsigned int freq)
180
+ {
181
+ // Always append to in-memory results returned to Python
182
+ largepp::collected.push_back(seq);
183
+
184
+ ofstream file_o;
185
+ if (b_write) file_o.open(out_file, std::ios::app);
186
+
187
+ if (b_disp) {
188
+ for (int v : seq) cout << v << " ";
189
+ cout << "\n************** Freq: " << freq << endl;
190
+ }
191
+ if (b_write) {
192
+ for (int v : seq) file_o << v << " ";
193
+ file_o << "\n************** Freq: " << freq << "\n";
194
+ file_o.close();
195
+ }
196
+ }
197
+
198
+ } // namespace largepp
@@ -0,0 +1,18 @@
1
+ #pragma once
2
+
3
+ #include <vector>
4
+ #include <string>
5
+
6
+ #include "pattern.hpp" // defines largepp::Pattern
7
+ #include "load_inst.hpp" // declares externs: items, L, theta, DFS, etc.
8
+ #include "utility.hpp" // flags, collected buffer, timers, helpers
9
+
10
+ namespace largepp {
11
+
12
+ // Public entry point
13
+ void Freq_miner();
14
+
15
+ // (defined in the .cpp)
16
+ extern unsigned long long int num_patt;
17
+
18
+ } // namespace largepp
@@ -0,0 +1,238 @@
1
+ #include <iostream>
2
+ #include <sstream>
3
+ #include <algorithm>
4
+ #include <cmath>
5
+ #include <fstream>
6
+ #include "load_inst.hpp"
7
+ #include "freq_miner.hpp"
8
+ #include "utility.hpp"
9
+
10
+ namespace largepp { // ─── BEGIN namespace ─────────────────────
11
+ using namespace std;
12
+
13
+ /* ------------------------------------------------------------------
14
+ * Global definitions (match the externs in load_inst.hpp)
15
+ * ---------------------------------------------------------------- */
16
+ unsigned int M = 0, L = 0;
17
+ unsigned long long N = 0, E = 0;
18
+ double theta = 0.01;
19
+ vector<vector<int>> items;
20
+ vector<Pattern> DFS;
21
+ vector<int> item_dic;
22
+
23
+ /* Forward decls for helper routines in this file */
24
+ static bool Load_items(string& inst);
25
+ static void Load_items_pre(string& inst);
26
+ static bool Preprocess(string& inst, double thresh);
27
+
28
+ /* ==================================================================
29
+ * MAIN ENTRY — load from disk
30
+ * ================================================================= */
31
+ bool Load_instance(string& items_file, double thresh)
32
+ {
33
+ clock_t kk = clock();
34
+
35
+ if (pre_pro) {
36
+ if (!Preprocess(items_file, thresh)) return false;
37
+
38
+ cout << "\nPreprocess done in " << give_time(clock() - kk) << " seconds\n\n";
39
+
40
+ DFS.clear();
41
+ DFS.reserve(L);
42
+ for (unsigned int i = 0; i < L; ++i)
43
+ DFS.emplace_back(-int(i) - 1);
44
+
45
+ kk = clock();
46
+ Load_items_pre(items_file);
47
+ N = items.size();
48
+ }
49
+ else if (!Load_items(items_file))
50
+ return false;
51
+ else
52
+ theta = (thresh < 1.0) ? ceil(thresh * N) : thresh;
53
+
54
+ cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
55
+ cout << "Found " << N << " sequence, with max line len " << M
56
+ << ", and " << L << " items, and " << E << " enteries\n";
57
+
58
+ // ───────────────────────────────────────────────────────────
59
+ // DEBUG snapshot of seeds right after loading
60
+ // ───────────────────────────────────────────────────────────
61
+ {
62
+ unsigned long long seeds_ge_theta = 0, seeds_nonzero = 0, max_freq = 0;
63
+ for (size_t i = 0; i < DFS.size(); ++i) {
64
+ if (DFS[i].freq > 0) ++seeds_nonzero;
65
+ if (DFS[i].freq >= theta) ++seeds_ge_theta;
66
+ if (DFS[i].freq > max_freq) max_freq = DFS[i].freq;
67
+ }
68
+ // std::cout << " theta=" << theta
69
+ // << " | DFS.size=" << DFS.size()
70
+ // << " | seeds>=theta=" << seeds_ge_theta
71
+ // << " | seeds>0=" << seeds_nonzero
72
+ // << " | max_seed_freq=" << max_freq << "\n";
73
+ }
74
+
75
+ return true;
76
+ }
77
+
78
+ /* ==================================================================
79
+ * ALT ENTRY — load directly from a Python list of lists
80
+ * ================================================================= */
81
+ void Load_py(const pybind11::object& data, double thresh)
82
+ {
83
+ items = data.cast<vector<vector<int>>>();
84
+ N = items.size();
85
+
86
+ int max_id = 0;
87
+ M = 0; E = 0;
88
+ for (auto& seq : items) {
89
+ M = max<unsigned int>(M, static_cast<unsigned int>(seq.size()));
90
+ E += seq.size();
91
+ for (int x : seq)
92
+ max_id = max(max_id, abs(x));
93
+ }
94
+ L = static_cast<unsigned int>(max_id);
95
+ theta = (thresh < 1.0) ? ceil(thresh * N) : thresh;
96
+
97
+ DFS.clear();
98
+ DFS.reserve(L);
99
+ for (unsigned int i = 0; i < L; ++i)
100
+ DFS.emplace_back(-int(i) - 1);
101
+ }
102
+
103
+ /* =================================================================
104
+ * The professor’s original helpers — untouched except minor safety
105
+ * ================================================================= */
106
+ static bool Preprocess(string& inst, double thresh)
107
+ {
108
+ ifstream file(inst);
109
+ vector<unsigned long long> freq(1000000), counted(1000000, 0);
110
+
111
+ if (file.good()) {
112
+ string line; int ditem;
113
+ while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
114
+ ++N;
115
+ istringstream word(line);
116
+ string itm;
117
+ while (word >> itm) {
118
+ ditem = stoi(itm);
119
+ L = max<unsigned int>(L, static_cast<unsigned int>(abs(ditem)));
120
+
121
+ if (freq.size() < L) {
122
+ freq.resize(L, 0);
123
+ counted.resize(L, 0);
124
+ }
125
+ if (counted[abs(ditem) - 1] != N) {
126
+ ++freq[abs(ditem) - 1];
127
+ counted[abs(ditem) - 1] = N;
128
+ }
129
+ }
130
+ }
131
+ } else {
132
+ cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
133
+ return false;
134
+ }
135
+
136
+ theta = (thresh < 1.0) ? ceil(thresh * N) : thresh;
137
+
138
+ int real_L = 0;
139
+ item_dic.assign(L, -1);
140
+ for (unsigned int i = 0; i < L; ++i)
141
+ if (freq[i] >= theta) item_dic[i] = ++real_L;
142
+
143
+ cout << "Original number of items: " << L
144
+ << " Reduced to: " << real_L << '\n';
145
+
146
+ L = real_L;
147
+ N = 0;
148
+ return true;
149
+ }
150
+
151
+ static void Load_items_pre(string& inst)
152
+ {
153
+ ifstream file(inst);
154
+
155
+ if (!file.good()) return;
156
+ string line; int size_m, ditem; bool empty_seq = false;
157
+
158
+ while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
159
+ vector<bool> counted(L, 0);
160
+ istringstream word(line);
161
+
162
+ if (!empty_seq) items.emplace_back();
163
+ string itm; size_m = 0; bool sgn = false; empty_seq = true;
164
+
165
+ while (word >> itm) {
166
+ ditem = stoi(itm);
167
+
168
+ if (item_dic[abs(ditem) - 1] == -1) {
169
+ if (!sgn) sgn = ditem < 0;
170
+ continue;
171
+ } else {
172
+ ditem = (ditem > 0)
173
+ ? item_dic[ditem - 1]
174
+ : -item_dic[-ditem - 1];
175
+ }
176
+ empty_seq = false;
177
+
178
+ if (sgn) { if (ditem > 0) ditem = -ditem; sgn = false; }
179
+
180
+ items.back().push_back(ditem);
181
+
182
+ if (!counted[abs(ditem) - 1] && !just_build) {
183
+ DFS[abs(ditem) - 1].seq_ID.push_back(items.size() - 1);
184
+ DFS[abs(ditem) - 1].str_pnt.push_back(items.back().size() - 1);
185
+ ++DFS[abs(ditem) - 1].freq;
186
+ counted[abs(ditem) - 1] = true;
187
+ }
188
+ ++size_m;
189
+ }
190
+ if (empty_seq) continue;
191
+
192
+ ++N; E += size_m; M = max<unsigned int>(M, static_cast<unsigned int>(size_m));
193
+ }
194
+ }
195
+
196
+ static bool Load_items(string& inst)
197
+ {
198
+ ifstream file(inst);
199
+ if (!file.good()) {
200
+ cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
201
+ return false;
202
+ }
203
+
204
+ string line; int size_m, ditem;
205
+ while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
206
+ ++N;
207
+ vector<bool> counted(L, 0);
208
+ istringstream word(line);
209
+
210
+ items.emplace_back();
211
+ string itm; size_m = 0;
212
+
213
+ while (word >> itm) {
214
+ ditem = stoi(itm);
215
+ if (L < static_cast<unsigned int>(abs(ditem))) {
216
+ L = static_cast<unsigned int>(abs(ditem));
217
+ while (DFS.size() < L) {
218
+ DFS.emplace_back(-int(DFS.size()) - 1);
219
+ counted.push_back(0);
220
+ }
221
+ }
222
+ items.back().push_back(ditem);
223
+
224
+ if (!counted[abs(ditem) - 1] && !just_build) {
225
+ DFS[abs(ditem) - 1].seq_ID.push_back(items.size() - 1);
226
+ DFS[abs(ditem) - 1].str_pnt.push_back(items.back().size() - 1);
227
+ ++DFS[abs(ditem) - 1].freq;
228
+ counted[abs(ditem) - 1] = true;
229
+ }
230
+ ++size_m;
231
+ }
232
+ E += size_m;
233
+ M = max<unsigned int>(M, static_cast<unsigned int>(size_m));
234
+ }
235
+ return true;
236
+ }
237
+
238
+ } // namespace largepp // ─── END namespace ──────────────────────
@@ -0,0 +1,34 @@
1
+ #pragma once
2
+
3
+ #include <vector>
4
+ #include <string>
5
+ #include <fstream>
6
+ #include <map>
7
+ #include <pybind11/pybind11.h>
8
+
9
+ #include "largepp/src/pattern.hpp" // ← ensure Pattern is a complete type here
10
+
11
+ namespace largepp {
12
+ using namespace std;
13
+
14
+ // public entry points
15
+ bool Load_instance(std::string& items_file, double thresh);
16
+ void Load_py(const pybind11::object& py_data, double thresh);
17
+
18
+ // shared state (defined in load_inst.cpp)
19
+ extern std::vector<std::vector<int>> items;
20
+ extern std::string out_file;
21
+
22
+ extern bool b_disp, b_write, use_dic, just_build, ovr_count, pre_pro;
23
+ extern bool use_list;
24
+
25
+ extern unsigned int M, L, time_limit;
26
+ extern unsigned long long N;
27
+ extern double theta;
28
+ extern unsigned long long E;
29
+ extern std::clock_t start_time;
30
+
31
+ // DFS queue of potential patterns to extend
32
+ extern std::vector<largepp::Pattern> DFS;
33
+
34
+ } // namespace largepp
@@ -0,0 +1,31 @@
1
+ #pragma once
2
+ #include <vector>
3
+
4
+ namespace largepp {
5
+
6
+ class Pattern {
7
+ public:
8
+ std::vector<int> seq;
9
+ std::vector<unsigned int> str_pnt;
10
+ std::vector<unsigned long long> seq_ID;
11
+
12
+ std::vector<int> slist;
13
+ std::vector<int> ilist;
14
+
15
+ unsigned long long freq;
16
+
17
+ Pattern() : freq(0) {}
18
+
19
+ explicit Pattern(int item) : freq(0) {
20
+ seq.push_back(item);
21
+ }
22
+
23
+ Pattern(std::vector<int>& _seq, int item) : freq(0) {
24
+ seq.reserve(_seq.size() + 1);
25
+ for (int i = 0; i < static_cast<int>(_seq.size()); ++i)
26
+ seq.push_back(_seq[i]);
27
+ seq.push_back(item);
28
+ }
29
+ };
30
+
31
+ } // namespace largepp
@@ -0,0 +1,34 @@
1
+ #include "utility.hpp"
2
+ #include <string>
3
+
4
+ namespace largepp {
5
+
6
+ // ─── instantiate the globals declared in the header ─────────────
7
+ bool b_disp = false;
8
+ bool b_write = false;
9
+ bool use_dic = false;
10
+ bool just_build = false;
11
+ bool ovr_count = false;
12
+ bool pre_pro = false;
13
+ bool use_list = true; // large-prefix flag the binder toggles
14
+ unsigned int time_limit = 36000;
15
+ std::string out_file;
16
+ std::vector<std::vector<int>> collected; // mined pattern output
17
+
18
+
19
+ std::clock_t start_time = 0;
20
+
21
+ // ─── helper implementations ─────────────────────────────────────
22
+ void ClearCollected() { collected.clear(); }
23
+
24
+ const std::vector<std::vector<int>>& GetCollected()
25
+ {
26
+ return collected;
27
+ }
28
+
29
+ double give_time(std::clock_t ticks)
30
+ {
31
+ return static_cast<double>(ticks) / CLOCKS_PER_SEC;
32
+ }
33
+
34
+ } // namespace largepp
@@ -0,0 +1,21 @@
1
+ #pragma once
2
+ #include <vector>
3
+ #include <ctime>
4
+ #include <string>
5
+
6
+ namespace largepp {
7
+
8
+ // Flag & option globals (only declare here – actual values in utility.cpp)
9
+ extern bool b_disp, b_write, use_dic, just_build, ovr_count, pre_pro;
10
+ extern bool use_list; // ← NEW (large-prefix needs this)
11
+ extern unsigned int time_limit;
12
+
13
+ // Pattern buffer that _effspm.cpp_ returns to Python
14
+ extern std::vector<std::vector<int>> collected;
15
+
16
+ // Helper functions every source file uses
17
+ void ClearCollected(); // wipe buffer
18
+ const std::vector<std::vector<int>>& GetCollected(); // read buffer
19
+ double give_time(std::clock_t ticks); // secs from clocks
20
+
21
+ } // namespace largepp
effspm/load_inst.hpp CHANGED
@@ -1,25 +1,31 @@
1
+ // effspm/load_inst.hpp
1
2
  #pragma once
2
3
 
3
- #include<vector>
4
- #include<string>
4
+ #include <vector>
5
+ #include <string>
5
6
  #include <fstream>
6
7
  #include <map>
7
- // Should work because "effspm" is in include_dirs
8
-
8
+ #include <ctime> // for clock_t
9
9
 
10
10
  using namespace std;
11
11
 
12
- bool Load_instance(string &items_file, double thresh);
13
-
14
- extern vector<vector<int>> items;
12
+ // ------------------------------------------------------------
13
+ // forward declare Pattern (defined in freq_miner.hpp)
14
+ struct Pattern;
15
15
 
16
- extern string out_file;
17
16
 
18
- extern bool b_disp, b_write, use_dic, use_list, pre_pro;
17
+ // Main entrypoint: load your file on disk into 'items', build DFS, theta, etc.
18
+ bool Load_instance(string &items_file, double thresh);
19
19
 
20
- extern unsigned int M, L, time_limit;
20
+ // storage & globals shared between the C++-CLI & Python bindings
21
+ extern vector<vector<int>> items;
22
+ extern vector<Pattern> DFS; // now Pattern is known
23
+ extern vector<int> item_dic;
21
24
 
22
- extern unsigned long long int N, theta;
25
+ extern string out_file;
26
+ extern bool b_disp, b_write, use_dic, use_list, pre_pro;
23
27
 
24
- extern clock_t start_time;
28
+ extern unsigned int M, L, time_limit;
29
+ extern unsigned long long N, E, theta; // E = total number of entries
25
30
 
31
+ extern clock_t start_time;