effspm 0.1.5__cp310-cp310-win_amd64.whl → 0.3.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. effspm/__init__.py +9 -2
  2. effspm/_core.cpp +91 -13
  3. effspm/_effspm.cp310-win_amd64.pyd +0 -0
  4. effspm/_effspm.cpp +679 -0
  5. effspm/btminer/src/build_mdd.cpp +88 -0
  6. effspm/btminer/src/build_mdd.hpp +34 -0
  7. effspm/btminer/src/freq_miner.cpp +264 -0
  8. effspm/btminer/src/freq_miner.hpp +55 -0
  9. effspm/btminer/src/load_inst.cpp +275 -0
  10. effspm/btminer/src/load_inst.hpp +43 -0
  11. effspm/btminer/src/utility.cpp +50 -0
  12. effspm/btminer/src/utility.hpp +16 -0
  13. effspm/freq_miner.hpp +7 -1
  14. effspm/htminer/src/build_mdd.cpp +139 -0
  15. effspm/htminer/src/build_mdd.hpp +64 -0
  16. effspm/htminer/src/freq_miner.cpp +350 -0
  17. effspm/htminer/src/freq_miner.hpp +60 -0
  18. effspm/htminer/src/load_inst.cpp +394 -0
  19. effspm/htminer/src/load_inst.hpp +23 -0
  20. effspm/htminer/src/utility.cpp +72 -0
  21. effspm/htminer/src/utility.hpp +77 -0
  22. effspm/largebm/src/build_mdd.cpp +96 -0
  23. effspm/largebm/src/build_mdd.hpp +32 -0
  24. effspm/largebm/src/freq_miner.cpp +299 -0
  25. effspm/largebm/src/freq_miner.hpp +37 -0
  26. effspm/largebm/src/load_inst.cpp +224 -0
  27. effspm/largebm/src/load_inst.hpp +35 -0
  28. effspm/largebm/src/utility.cpp +35 -0
  29. effspm/largebm/src/utility.hpp +15 -0
  30. effspm/largehm/src/build_mdd.cpp +174 -0
  31. effspm/largehm/src/build_mdd.hpp +93 -0
  32. effspm/largehm/src/freq_miner.cpp +429 -0
  33. effspm/largehm/src/freq_miner.hpp +77 -0
  34. effspm/largehm/src/load_inst.cpp +375 -0
  35. effspm/largehm/src/load_inst.hpp +64 -0
  36. effspm/largehm/src/utility.cpp +38 -0
  37. effspm/largehm/src/utility.hpp +29 -0
  38. effspm/largepp/src/freq_miner.cpp +198 -0
  39. effspm/largepp/src/freq_miner.hpp +18 -0
  40. effspm/largepp/src/load_inst.cpp +238 -0
  41. effspm/largepp/src/load_inst.hpp +34 -0
  42. effspm/largepp/src/pattern.hpp +31 -0
  43. effspm/largepp/src/utility.cpp +34 -0
  44. effspm/largepp/src/utility.hpp +21 -0
  45. effspm/load_inst.hpp +18 -12
  46. effspm-0.3.0.dist-info/METADATA +237 -0
  47. effspm-0.3.0.dist-info/RECORD +54 -0
  48. {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/WHEEL +1 -1
  49. effspm/_core.cp310-win_amd64.pyd +0 -0
  50. effspm-0.1.5.dist-info/METADATA +0 -38
  51. effspm-0.1.5.dist-info/RECORD +0 -14
  52. {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/licenses/LICENSE +0 -0
  53. {effspm-0.1.5.dist-info → effspm-0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,299 @@
1
+ #include <algorithm>
2
+ #include <fstream>
3
+ #include <iostream>
4
+ #include <unordered_map>
5
+ #include <unordered_set>
6
+ #include <cstdlib> // ensure std::abs(int)
7
+ #include "freq_miner.hpp"
8
+ #include "load_inst.hpp"
9
+ #include "utility.hpp"
10
+ #include "build_mdd.hpp"
11
+
12
+ namespace largebm {
13
+
14
+ unsigned long long int num_patt = 0;
15
+ std::vector<bool> ilist;
16
+ std::vector<bool> slist;
17
+ std::vector<int> DFS_numfound;
18
+ Pattern _patt;
19
+
20
+ static void Out_patt(const std::vector<int>& seq, unsigned long long freq);
21
+ static void Extend_patt(Pattern& patt);
22
+
23
+ void Freq_miner() {
24
+ collected.clear();
25
+ num_patt = 0;
26
+
27
+ if (static_cast<int>(DFS.size()) < static_cast<int>(L)) {
28
+ DFS.resize(L);
29
+ }
30
+
31
+ std::vector<int> list;
32
+
33
+ if (use_list) {
34
+ std::vector<int> empty_pref;
35
+ Freq_miner_list(items, empty_pref, theta, collected);
36
+ return;
37
+ }
38
+
39
+ // seed candidates by DFS[i].freq
40
+ for (int i = 0; i < static_cast<int>(L); ++i) {
41
+ if (DFS[i].freq >= theta) {
42
+ list.push_back(-i - 1);
43
+ if (itmset_exists) list.push_back(i + 1);
44
+ }
45
+ }
46
+
47
+ for (size_t i = 0; i < DFS.size(); ++i) {
48
+ DFS[i].list = list;
49
+ }
50
+
51
+ while (!DFS.empty() && give_time(std::clock() - start_time) < time_limit) {
52
+ if (DFS.back().freq >= theta) {
53
+ Extend_patt(DFS.back());
54
+ } else {
55
+ DFS.pop_back();
56
+ }
57
+ }
58
+ }
59
+
60
+ static void Extend_patt(Pattern& _pattern) {
61
+ std::swap(_patt, _pattern);
62
+ DFS.pop_back();
63
+
64
+ slist = std::vector<bool>(L, false);
65
+ bool ilist_nempty = false;
66
+
67
+ if (itmset_exists) {
68
+ ilist = std::vector<bool>(L, false);
69
+ for (size_t i = 0; i < _patt.list.size(); ++i) {
70
+ int v = _patt.list[i];
71
+ if (v < 0) slist[-v - 1] = true;
72
+ else { ilist[v - 1] = true; ilist_nempty = true; }
73
+ }
74
+ } else {
75
+ for (size_t i = 0; i < _patt.list.size(); ++i) {
76
+ int v = _patt.list[i];
77
+ slist[-v - 1] = true;
78
+ }
79
+ }
80
+
81
+ int itmset_size = 1;
82
+ int last_neg = static_cast<int>(_patt.seq.size()) - 1;
83
+ while (_patt.seq[last_neg] > 0) {
84
+ --last_neg;
85
+ ++itmset_size;
86
+ }
87
+
88
+ std::vector<Pattern> pot_patt(L + (ilist_nempty ? L : 0));
89
+ std::vector<unsigned long long> DFS_patt_init;
90
+ std::vector<unsigned long long> DFS_patt;
91
+ if (ilist_nempty) DFS_numfound.clear();
92
+ std::vector<unsigned long long> last_strpnt(L, 0);
93
+
94
+ for (unsigned long long pnt = 0; pnt < _patt.str_pnt.size(); ++pnt) {
95
+ DFS_patt_init.push_back(_patt.str_pnt[pnt]);
96
+ while (!DFS_patt_init.empty()) {
97
+ unsigned long long cur_sibl = Tree[DFS_patt_init.back()].chld;
98
+ DFS_patt_init.pop_back();
99
+ while (cur_sibl != 0) {
100
+ int cur_itm = Tree[cur_sibl].item;
101
+ if (cur_itm < 0) {
102
+ cur_itm = -cur_itm;
103
+ if (slist[cur_itm - 1]) {
104
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
105
+ if (Tree[cur_sibl].chld != 0)
106
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
107
+ }
108
+ if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
109
+ DFS_patt.push_back(cur_sibl);
110
+ if (ilist_nempty) {
111
+ DFS_numfound.push_back(cur_itm == -_patt.seq[last_neg] ? 1 : 0);
112
+ }
113
+ }
114
+ } else {
115
+ if (ilist[cur_itm - 1]) {
116
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
117
+ if (Tree[cur_sibl].chld != 0)
118
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
119
+ }
120
+ if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1))
121
+ DFS_patt_init.push_back(cur_sibl);
122
+ }
123
+ cur_sibl = Tree[cur_sibl].sibl;
124
+ }
125
+ }
126
+ if (ilist_nempty) {
127
+ for (int i = 0; i < static_cast<int>(L); ++i) {
128
+ if (ilist[i]) last_strpnt[i] = pot_patt[i + L].str_pnt.size();
129
+ }
130
+ }
131
+ while (!DFS_patt.empty()) {
132
+ unsigned long long cur_sibl = Tree[DFS_patt.back()].chld;
133
+ DFS_patt.pop_back();
134
+ int num_found = 0;
135
+ if (ilist_nempty) { num_found = DFS_numfound.back(); DFS_numfound.pop_back(); }
136
+ while (cur_sibl != 0) {
137
+ int cur_itm = Tree[cur_sibl].item;
138
+ if (cur_itm > 0) {
139
+ if (num_found == itmset_size &&
140
+ ilist[cur_itm - 1] &&
141
+ (Tree[Tree[cur_sibl].anct].itmset < Tree[_patt.str_pnt[pnt]].itmset ||
142
+ !check_parent(cur_sibl, _patt.str_pnt[pnt],
143
+ last_strpnt[cur_itm - 1],
144
+ pot_patt[cur_itm + L - 1].str_pnt))) {
145
+ pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
146
+ if (Tree[cur_sibl].chld != 0)
147
+ pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
148
+ }
149
+ if (slist[cur_itm - 1] &&
150
+ Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
151
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
152
+ if (Tree[cur_sibl].chld != 0)
153
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
154
+ }
155
+ if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
156
+ DFS_patt.push_back(cur_sibl);
157
+ if (ilist_nempty) {
158
+ if (num_found < itmset_size &&
159
+ cur_itm == std::abs(_patt.seq[last_neg + num_found])) {
160
+ DFS_numfound.push_back(num_found + 1);
161
+ } else {
162
+ DFS_numfound.push_back(num_found);
163
+ }
164
+ }
165
+ }
166
+ } else {
167
+ cur_itm = -cur_itm;
168
+ if (slist[cur_itm - 1] &&
169
+ Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
170
+ pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
171
+ if (Tree[cur_sibl].chld != 0)
172
+ pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
173
+ }
174
+ if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
175
+ DFS_patt.push_back(cur_sibl);
176
+ if (ilist_nempty) {
177
+ DFS_numfound.push_back(cur_itm == -_patt.seq[last_neg] ? 1 : 0);
178
+ }
179
+ }
180
+ }
181
+ cur_sibl = Tree[cur_sibl].sibl;
182
+ }
183
+ }
184
+ }
185
+
186
+ std::vector<int> ilistp;
187
+ std::vector<int> slistp;
188
+ for (size_t i = 0; i < _patt.list.size(); ++i) {
189
+ int v = _patt.list[i];
190
+ int idx = (v < 0) ? (-v - 1) : (v - 1 + static_cast<int>(L));
191
+ if (v > 0 && pot_patt[idx].freq >= theta) {
192
+ ilistp.push_back(v);
193
+ } else if (v < 0 && pot_patt[-v - 1].freq >= theta) {
194
+ if (itmset_exists) slistp.push_back(-v);
195
+ ilistp.push_back(v);
196
+ slistp.push_back(v);
197
+ }
198
+ }
199
+
200
+ for (size_t i = 0; i < ilistp.size(); ++i) {
201
+ int v = ilistp[i];
202
+ int p = (v < 0) ? (-v - 1) : (v - 1 + static_cast<int>(L));
203
+ DFS.emplace_back();
204
+ std::swap(DFS.back(), pot_patt[p]);
205
+ DFS.back().seq = _patt.seq;
206
+ DFS.back().seq.push_back(v);
207
+ DFS.back().list = (v < 0) ? slistp : ilistp;
208
+ Out_patt(DFS.back().seq, DFS.back().freq);
209
+ ++num_patt;
210
+ }
211
+ }
212
+
213
+ static void Out_patt(const std::vector<int>& seq, unsigned long long freq) {
214
+ if (!(b_disp || b_write)) {
215
+ collected.push_back(seq);
216
+ return;
217
+ }
218
+ std::ofstream file_o;
219
+ if (b_write) file_o.open(out_file, std::ios::app);
220
+
221
+ for (size_t i = 0; i < seq.size(); ++i) {
222
+ int v = seq[i];
223
+ if (b_disp) std::cout << v << ' ';
224
+ if (b_write) file_o << v << ' ';
225
+ }
226
+ if (b_disp) std::cout << '\n';
227
+ if (b_write) file_o << '\n';
228
+
229
+ if (b_disp) std::cout << "************** Freq: " << freq << '\n';
230
+ if (b_write) {
231
+ file_o << "************** Freq: " << freq << '\n';
232
+ file_o.close();
233
+ }
234
+ collected.push_back(seq);
235
+ }
236
+
237
+ void Freq_miner_list(const std::vector<std::vector<int>>& db,
238
+ std::vector<int>& prefix,
239
+ unsigned long long minsup,
240
+ std::vector<std::vector<int>>& out) {
241
+ std::unordered_map<int, unsigned long long> freq;
242
+ for (size_t sidx = 0; sidx < db.size(); ++sidx) {
243
+ const std::vector<int>& seq = db[sidx];
244
+ std::unordered_set<int> seen;
245
+ for (size_t i = 0; i < seq.size(); ++i) {
246
+ int x = seq[i];
247
+ if (seen.insert(x).second) ++freq[x];
248
+ }
249
+ }
250
+
251
+ std::vector<std::pair<int, unsigned long long> > cand;
252
+ cand.reserve(freq.size());
253
+ for (std::unordered_map<int, unsigned long long>::iterator it = freq.begin();
254
+ it != freq.end(); ++it) {
255
+ if (it->second >= minsup) cand.push_back(*it);
256
+ }
257
+
258
+ std::sort(cand.begin(), cand.end(),
259
+ [](const std::pair<int, unsigned long long>& a,
260
+ const std::pair<int, unsigned long long>& b) {
261
+ return std::abs(a.first) < std::abs(b.first);
262
+ });
263
+
264
+ for (size_t k = 0; k < cand.size(); ++k) {
265
+ int item = cand[k].first;
266
+ prefix.push_back(item);
267
+
268
+ if (use_dic) {
269
+ std::vector<int> unmapped;
270
+ unmapped.reserve(prefix.size());
271
+ for (size_t i = 0; i < prefix.size(); ++i) {
272
+ int cid = prefix[i];
273
+ int abs_id = std::abs(cid);
274
+ int o = inv_item_dic[abs_id];
275
+ unmapped.push_back(cid < 0 ? -o : o);
276
+ }
277
+ out.push_back(unmapped);
278
+ } else {
279
+ out.push_back(prefix);
280
+ }
281
+
282
+ std::vector<std::vector<int> > proj;
283
+ proj.reserve(db.size());
284
+ for (size_t s = 0; s < db.size(); ++s) {
285
+ const std::vector<int>& svec = db[s];
286
+ std::vector<int>::const_iterator it =
287
+ std::find(svec.begin(), svec.end(), item);
288
+ if (it != svec.end()) {
289
+ ++it;
290
+ if (it != svec.end()) proj.push_back(std::vector<int>(it, svec.end()));
291
+ }
292
+ }
293
+
294
+ if (!proj.empty()) Freq_miner_list(proj, prefix, minsup, out);
295
+ prefix.pop_back();
296
+ }
297
+ }
298
+
299
+ } // namespace largebm
@@ -0,0 +1,37 @@
1
+ #pragma once
2
+ #include <vector>
3
+
4
+ namespace largebm {
5
+
6
+ class Pattern {
7
+ public:
8
+ std::vector<int> seq;
9
+ std::vector<unsigned long long> str_pnt;
10
+ std::vector<int> list;
11
+ unsigned long long freq = 0;
12
+
13
+ Pattern() = default;
14
+ Pattern(std::vector<int>& _seq, int item) {
15
+ seq.swap(_seq);
16
+ seq.push_back(item);
17
+ freq = 0;
18
+ }
19
+ Pattern(int item) {
20
+ seq.push_back(item);
21
+ freq = 0;
22
+ }
23
+ };
24
+
25
+ void Freq_miner();
26
+ void Freq_miner_list(const std::vector<std::vector<int>>& db,
27
+ std::vector<int>& prefix,
28
+ unsigned long long theta,
29
+ std::vector<std::vector<int>>& out);
30
+
31
+ extern unsigned long long int num_patt;
32
+ extern std::vector<bool> ilist;
33
+ extern std::vector<bool> slist;
34
+ extern std::vector<int> DFS_numfound;
35
+ extern Pattern _patt;
36
+
37
+ } // namespace largebm
@@ -0,0 +1,224 @@
1
+ #include <algorithm>
2
+ #include <cmath>
3
+ #include <fstream>
4
+ #include <iostream>
5
+ #include <sstream>
6
+ #include "load_inst.hpp"
7
+ #include "build_mdd.hpp"
8
+ #include "utility.hpp"
9
+ #include "freq_miner.hpp"
10
+
11
+ namespace largebm {
12
+
13
+ // ── single definitions of globals ─────────────────────────────────
14
+ bool use_list = false;
15
+ bool b_disp = false;
16
+ bool b_write = false;
17
+ bool use_dic = false;
18
+ bool just_build = false;
19
+ bool pre_pro = false;
20
+ bool itmset_exists = false;
21
+
22
+ unsigned int M = 0, L = 0, time_limit = 0;
23
+ unsigned long long N = 0, num_nodes = 0, theta = 0, E = 0;
24
+ std::clock_t start_time = 0;
25
+
26
+ std::vector<int> item_dic;
27
+ std::vector<Pattern> DFS;
28
+ std::vector<std::vector<int>> items;
29
+ std::vector<std::vector<int>> collected;
30
+ std::vector<int> inv_item_dic;
31
+
32
+ std::string out_file, folder;
33
+
34
+ // ───────────── helper for list‐mode DB build ─────────────────────
35
+ static void Load_items_list(const std::string& fname) {
36
+ std::ifstream in(fname);
37
+ if (!in.good()) return;
38
+ std::string line;
39
+ while (std::getline(in, line)) {
40
+ std::istringstream iss(line);
41
+ std::vector<int> seq;
42
+ int x;
43
+ while (iss >> x) {
44
+ int a = std::abs(x);
45
+ if (a < 1 || a > static_cast<int>(item_dic.size())) continue;
46
+ if (item_dic[a - 1] == -1) continue;
47
+ seq.push_back(x);
48
+ }
49
+ if (!seq.empty()) items.push_back(seq);
50
+ }
51
+ }
52
+
53
+ bool Load_instance(const std::string& items_file, double minsup) {
54
+ // reset state
55
+ N = L = num_nodes = theta = M = E = 0;
56
+ start_time = std::clock();
57
+
58
+ DFS.clear();
59
+ Tree.clear();
60
+ items.clear();
61
+ collected.clear();
62
+ item_dic.clear();
63
+ inv_item_dic.clear();
64
+ itmset_exists = false;
65
+
66
+ std::clock_t kk = start_time;
67
+ Tree.emplace_back(0, 0, 0); // root
68
+
69
+ if (use_list) {
70
+ if (!Preprocess(items_file, minsup)) return false;
71
+ inv_item_dic.assign(L + 1, 0);
72
+ for (int old = 1; old <= static_cast<int>(item_dic.size()); ++old) {
73
+ int cid = item_dic[old - 1];
74
+ if (cid > 0) inv_item_dic[cid] = old;
75
+ }
76
+ Load_items_list(items_file);
77
+ N = items.size();
78
+ theta = (minsup < 1.0)
79
+ ? static_cast<unsigned long long>(std::ceil(minsup * N))
80
+ : static_cast<unsigned long long>(minsup);
81
+ return true;
82
+ }
83
+
84
+ // MDD build mode
85
+ if (pre_pro) {
86
+ if (!Preprocess(items_file, minsup)) return false;
87
+ DFS.clear();
88
+ DFS.reserve(L);
89
+ for (unsigned int i = 0; i < L; ++i)
90
+ DFS.emplace_back(-int(i) - 1);
91
+ kk = std::clock();
92
+ Load_items_pre(items_file);
93
+ } else {
94
+ if (!Preprocess(items_file, minsup)) return false;
95
+ kk = std::clock();
96
+ Load_items(items_file);
97
+ }
98
+
99
+ // ensure DFS size
100
+ if (DFS.size() < L) {
101
+ DFS.reserve(L);
102
+ while (DFS.size() < L) {
103
+ DFS.emplace_back(-int(DFS.size()) - 1);
104
+ }
105
+ }
106
+
107
+ // SAFETY — seed any zeroed singletons from their str_pnt list
108
+ for (unsigned int i = 0; i < L && i < DFS.size(); ++i) {
109
+ if (DFS[i].freq == 0 && !DFS[i].str_pnt.empty()) {
110
+ DFS[i].freq = static_cast<unsigned long long>(DFS[i].str_pnt.size());
111
+ }
112
+ }
113
+
114
+ return true;
115
+ }
116
+
117
+ bool Preprocess(const std::string& inst, double thresh) {
118
+ std::ifstream file(inst);
119
+ if (!file.good()) return false;
120
+
121
+ std::vector<unsigned long long> freq(1000000);
122
+ std::vector<unsigned long long> counted(1000000, 0);
123
+ std::string line;
124
+ while (std::getline(file, line)) {
125
+ ++N;
126
+ std::istringstream iss(line);
127
+ int x;
128
+ while (iss >> x) {
129
+ int a = std::abs(x);
130
+ L = std::max(L, static_cast<unsigned int>(a));
131
+ if (freq.size() < L) {
132
+ freq.resize(L);
133
+ counted.resize(L);
134
+ }
135
+ if (counted[a - 1] != N) {
136
+ freq[a - 1]++;
137
+ counted[a - 1] = N;
138
+ }
139
+ }
140
+ }
141
+
142
+ theta = (thresh < 1.0)
143
+ ? static_cast<unsigned long long>(std::ceil(thresh * N))
144
+ : static_cast<unsigned long long>(thresh);
145
+
146
+ item_dic.assign(L, -1);
147
+ unsigned int newid = 0;
148
+ for (unsigned int old = 1; old <= L; ++old) {
149
+ if (freq[old - 1] >= theta) {
150
+ ++newid;
151
+ item_dic[old - 1] = static_cast<int>(newid);
152
+ }
153
+ }
154
+
155
+ return true;
156
+ }
157
+
158
+ void Load_items_pre(const std::string& inst_name) {
159
+ std::ifstream file(inst_name);
160
+ if (!file.good()) return;
161
+
162
+ std::string line;
163
+ while (std::getline(file, line)) {
164
+ std::istringstream word(line);
165
+ std::string itm;
166
+ std::vector<int> temp_vec;
167
+ bool sgn = false;
168
+ while (word >> itm) {
169
+ int ditem;
170
+ try { ditem = std::stoi(itm); } catch (...) { continue; }
171
+ int absidx = std::abs(ditem) - 1;
172
+ if (absidx < 0 || absidx >= static_cast<int>(item_dic.size())) {
173
+ if (!sgn && ditem < 0) sgn = true;
174
+ continue;
175
+ }
176
+ if (item_dic[absidx] == -1) {
177
+ if (!sgn && ditem < 0) sgn = true;
178
+ continue;
179
+ }
180
+ if (ditem > 0) { ditem = item_dic[ditem - 1]; itmset_exists = true; }
181
+ else { ditem = -item_dic[-ditem - 1]; }
182
+ if (sgn) { if (ditem > 0) ditem = -ditem; sgn = false; }
183
+ temp_vec.push_back(ditem);
184
+ }
185
+ if (temp_vec.empty()) continue;
186
+ ++N;
187
+ M = std::max<unsigned>(M, temp_vec.size());
188
+ Build_MDD(temp_vec);
189
+ }
190
+ }
191
+
192
+ bool Load_items(const std::string& inst_name) {
193
+ std::ifstream file(inst_name);
194
+ if (!file.good()) return false;
195
+
196
+ std::string line;
197
+ while (std::getline(file, line)) {
198
+ ++N;
199
+ std::istringstream word(line);
200
+ std::string itm;
201
+ std::vector<int> temp_vec;
202
+ while (word >> itm) {
203
+ int ditem;
204
+ try { ditem = std::stoi(itm); } catch (...) { continue; }
205
+ if (ditem > 0) itmset_exists = true;
206
+ unsigned int ad = static_cast<unsigned int>(std::abs(ditem));
207
+ if (L < ad) {
208
+ L = ad;
209
+ DFS.reserve(L);
210
+ while (DFS.size() < L)
211
+ DFS.emplace_back(-int(DFS.size()) - 1);
212
+ }
213
+ temp_vec.push_back(ditem);
214
+ }
215
+ if (temp_vec.size() > M) M = temp_vec.size();
216
+ Build_MDD(temp_vec);
217
+ }
218
+ return true;
219
+ }
220
+
221
+ void ClearCollected() { collected.clear(); }
222
+ const std::vector<std::vector<int>>& GetCollected() { return collected; }
223
+
224
+ } // namespace largebm
@@ -0,0 +1,35 @@
1
+ #pragma once
2
+ #include <vector>
3
+ #include <string>
4
+ #include <ctime>
5
+
6
+ namespace largebm {
7
+
8
+ class Pattern; // forward
9
+ // [2025-10-25 NEW]: match the real definition (class, not struct) to avoid ABI warnings
10
+ class Arc; // was: struct Arc;
11
+
12
+ // Config & state (single definitions in load_inst.cpp)
13
+ extern std::string out_file, folder;
14
+ extern bool use_list;
15
+ extern bool b_disp, b_write, use_dic, just_build, pre_pro, itmset_exists;
16
+ extern unsigned int M, L, time_limit;
17
+ extern unsigned long long N, num_nodes, theta, E;
18
+ extern std::clock_t start_time;
19
+
20
+ extern std::vector<std::vector<int>> items;
21
+ extern std::vector<int> item_dic;
22
+ extern std::vector<int> inv_item_dic;
23
+ extern std::vector<Pattern> DFS;
24
+ extern std::vector<std::vector<int>> collected;
25
+
26
+ // Loader API
27
+ bool Load_instance(const std::string& items_file, double thresh);
28
+ bool Preprocess(const std::string& fname, double thresh);
29
+ void Load_items_pre(const std::string& fname);
30
+ bool Load_items(const std::string& fname);
31
+
32
+ void ClearCollected();
33
+ const std::vector<std::vector<int>>& GetCollected();
34
+
35
+ } // namespace largebm
@@ -0,0 +1,35 @@
1
+ #include "utility.hpp"
2
+ #include "build_mdd.hpp"
3
+ #include <vector>
4
+
5
+ namespace largebm {
6
+
7
+ double give_time(std::clock_t ticks) {
8
+ return static_cast<double>(ticks) / CLOCKS_PER_SEC;
9
+ }
10
+
11
+ bool check_parent(unsigned long long cur_arc,
12
+ unsigned long long str_pnt,
13
+ unsigned long long start,
14
+ std::vector<unsigned long long>& strpnt_vec) {
15
+ std::vector<unsigned long long> ancestors;
16
+ unsigned long long cur_anct = Tree[cur_arc].anct;
17
+
18
+ while (Tree[cur_anct].itmset > Tree[str_pnt].itmset) {
19
+ if (Tree[cur_anct].item > 0) ancestors.push_back(cur_anct);
20
+ cur_anct = Tree[cur_anct].anct;
21
+ }
22
+
23
+ if (Tree[cur_anct].itmset == Tree[str_pnt].itmset)
24
+ return true;
25
+
26
+ for (std::vector<unsigned long long>::reverse_iterator it = ancestors.rbegin();
27
+ it != ancestors.rend(); ++it) {
28
+ for (unsigned long long i = start; i < strpnt_vec.size(); ++i) {
29
+ if (strpnt_vec[i] == *it) return true;
30
+ }
31
+ }
32
+ return false;
33
+ }
34
+
35
+ } // namespace largebm
@@ -0,0 +1,15 @@
1
+ #pragma once
2
+ #include <vector>
3
+ #include <ctime>
4
+
5
+ namespace largebm {
6
+
7
+ double give_time(std::clock_t ticks);
8
+
9
+ // Check if `str_pnt` is an ancestor of `cur_arc` respecting itemset boundaries
10
+ bool check_parent(unsigned long long cur_arc,
11
+ unsigned long long str_pnt,
12
+ unsigned long long start,
13
+ std::vector<unsigned long long>& strpnt_vec);
14
+
15
+ } // namespace largebm