effspm 0.2.8__cp312-cp312-macosx_11_0_arm64.whl → 0.3.0__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/_effspm.cpp +310 -240
- effspm/_effspm.cpython-312-darwin.so +0 -0
- effspm/btminer/src/build_mdd.cpp +42 -17
- effspm/btminer/src/build_mdd.hpp +13 -19
- effspm/btminer/src/freq_miner.cpp +134 -49
- effspm/btminer/src/freq_miner.hpp +16 -0
- effspm/btminer/src/load_inst.cpp +196 -121
- effspm/btminer/src/load_inst.hpp +22 -4
- effspm/btminer/src/utility.cpp +26 -41
- effspm/btminer/src/utility.hpp +6 -30
- effspm/freq_miner.hpp +2 -1
- effspm/htminer/src/build_mdd.cpp +33 -86
- effspm/largebm/src/build_mdd.cpp +69 -110
- effspm/largebm/src/build_mdd.hpp +22 -37
- effspm/largebm/src/freq_miner.cpp +241 -291
- effspm/largebm/src/freq_miner.hpp +25 -36
- effspm/largebm/src/load_inst.cpp +20 -26
- effspm/largebm/src/load_inst.hpp +24 -34
- effspm/largebm/src/utility.cpp +11 -21
- effspm/largebm/src/utility.hpp +7 -10
- effspm/largehm/src/freq_miner.cpp +62 -78
- effspm/largehm/src/load_inst.cpp +79 -61
- effspm/largepp/src/freq_miner.cpp +184 -156
- effspm/largepp/src/freq_miner.hpp +11 -36
- effspm/largepp/src/load_inst.cpp +27 -8
- effspm/largepp/src/load_inst.hpp +15 -9
- effspm/largepp/src/pattern.hpp +31 -0
- effspm/load_inst.hpp +1 -1
- {effspm-0.2.8.dist-info → effspm-0.3.0.dist-info}/METADATA +1 -1
- effspm-0.3.0.dist-info/RECORD +54 -0
- effspm-0.2.8.dist-info/RECORD +0 -53
- {effspm-0.2.8.dist-info → effspm-0.3.0.dist-info}/WHEEL +0 -0
- {effspm-0.2.8.dist-info → effspm-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.2.8.dist-info → effspm-0.3.0.dist-info}/top_level.txt +0 -0
effspm/largehm/src/load_inst.cpp
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
// ─── effspm/largehm/src/load_inst.cpp ────────────────────────────────────────
|
|
2
|
+
|
|
1
3
|
#include <iostream>
|
|
2
4
|
#include <sstream>
|
|
3
5
|
#include <algorithm>
|
|
@@ -16,17 +18,17 @@ using namespace std;
|
|
|
16
18
|
string out_file;
|
|
17
19
|
string folder;
|
|
18
20
|
|
|
19
|
-
bool b_disp
|
|
20
|
-
bool b_write
|
|
21
|
-
bool use_dic
|
|
22
|
-
bool use_list
|
|
23
|
-
bool just_build
|
|
24
|
-
bool pre_pro
|
|
21
|
+
bool b_disp = false;
|
|
22
|
+
bool b_write = false;
|
|
23
|
+
bool use_dic = false;
|
|
24
|
+
bool use_list = false;
|
|
25
|
+
bool just_build = false;
|
|
26
|
+
bool pre_pro = false;
|
|
25
27
|
bool itmset_exists = false;
|
|
26
28
|
|
|
27
|
-
unsigned int M
|
|
28
|
-
unsigned int L
|
|
29
|
-
unsigned int mlim
|
|
29
|
+
unsigned int M = 0;
|
|
30
|
+
unsigned int L = 0;
|
|
31
|
+
unsigned int mlim = 0;
|
|
30
32
|
unsigned int time_limit = 0;
|
|
31
33
|
|
|
32
34
|
unsigned long long int N = 0;
|
|
@@ -37,13 +39,16 @@ clock_t start_time = 0;
|
|
|
37
39
|
|
|
38
40
|
vector<vector<int>> items;
|
|
39
41
|
|
|
40
|
-
vector<int>
|
|
42
|
+
vector<int> item_dic;
|
|
41
43
|
vector<Pattern> DFS;
|
|
42
44
|
vector<VPattern> VDFS;
|
|
43
45
|
|
|
44
46
|
|
|
47
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
48
|
+
// Load_instance
|
|
49
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
45
50
|
bool Load_instance(string& items_file, double thresh) {
|
|
46
|
-
//
|
|
51
|
+
// 1) CLEAR leftover state
|
|
47
52
|
Tree.clear();
|
|
48
53
|
VTree.clear();
|
|
49
54
|
CTree.clear();
|
|
@@ -58,62 +63,68 @@ bool Load_instance(string& items_file, double thresh) {
|
|
|
58
63
|
E = 0;
|
|
59
64
|
theta = 0;
|
|
60
65
|
itmset_exists = false;
|
|
61
|
-
// ────────────────────────────────────────────────────
|
|
62
66
|
|
|
63
67
|
clock_t kk = clock();
|
|
64
68
|
|
|
65
|
-
//
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
69
|
+
// root
|
|
70
70
|
Tree.emplace_back(0, 0, 0);
|
|
71
71
|
|
|
72
72
|
if (!pre_pro) {
|
|
73
73
|
if (!Load_items(items_file))
|
|
74
74
|
return false;
|
|
75
|
+
|
|
75
76
|
DFS.reserve(L);
|
|
76
|
-
while (DFS.size() < L)
|
|
77
|
+
while (DFS.size() < L)
|
|
77
78
|
DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
|
|
78
|
-
|
|
79
|
+
|
|
79
80
|
VDFS.reserve(L);
|
|
80
|
-
while (VDFS.size() < L)
|
|
81
|
+
while (VDFS.size() < L)
|
|
81
82
|
VDFS.emplace_back(static_cast<int>(VDFS.size()));
|
|
82
|
-
|
|
83
|
-
if (thresh < 1.0)
|
|
83
|
+
|
|
84
|
+
if (thresh < 1.0)
|
|
84
85
|
theta = static_cast<unsigned long long>(ceil(thresh * N));
|
|
85
|
-
|
|
86
|
+
else
|
|
86
87
|
theta = static_cast<unsigned long long>(thresh);
|
|
87
|
-
|
|
88
|
+
|
|
89
|
+
start_time = clock();
|
|
88
90
|
}
|
|
89
91
|
else {
|
|
90
92
|
if (!Load_items(items_file))
|
|
91
93
|
return false;
|
|
92
|
-
|
|
94
|
+
|
|
95
|
+
if (thresh < 1.0)
|
|
93
96
|
theta = static_cast<unsigned long long>(ceil(thresh * N));
|
|
94
|
-
|
|
97
|
+
else
|
|
95
98
|
theta = static_cast<unsigned long long>(thresh);
|
|
96
|
-
|
|
99
|
+
|
|
100
|
+
start_time = clock();
|
|
97
101
|
}
|
|
98
102
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
103
|
+
// 👇 only print when verbose/b_disp
|
|
104
|
+
if (b_disp) {
|
|
105
|
+
cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
|
|
106
|
+
cout << "Found " << N << " sequence, with max line len " << M
|
|
107
|
+
<< ", and " << L << " items, and " << E << " enteries\n";
|
|
108
|
+
// cout << "Total Trie nodes: " << Tree.size()
|
|
109
|
+
// << " Total CTree nodes: " << CTree.size()
|
|
110
|
+
// << " Total VTree nodes: " << VTree.size() << endl;
|
|
111
|
+
}
|
|
105
112
|
|
|
106
113
|
return true;
|
|
107
114
|
}
|
|
108
115
|
|
|
109
116
|
|
|
117
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
118
|
+
// Preprocess
|
|
119
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
110
120
|
bool Preprocess(string &inst, double thresh) {
|
|
111
121
|
vector<unsigned long long int> MN(100, 0);
|
|
112
122
|
vector<vector<bool>> ML(100, vector<bool>(1000000, false));
|
|
113
123
|
|
|
114
124
|
ifstream file(inst);
|
|
115
125
|
if (!file.good()) {
|
|
116
|
-
|
|
126
|
+
if (b_disp)
|
|
127
|
+
cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
|
|
117
128
|
return false;
|
|
118
129
|
}
|
|
119
130
|
|
|
@@ -124,7 +135,7 @@ bool Preprocess(string &inst, double thresh) {
|
|
|
124
135
|
int ditem;
|
|
125
136
|
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
126
137
|
++N;
|
|
127
|
-
if (N % 10000000 == 0)
|
|
138
|
+
if (b_disp && N % 10000000 == 0)
|
|
128
139
|
cout << "N: " << N << endl;
|
|
129
140
|
|
|
130
141
|
istringstream word(line);
|
|
@@ -164,11 +175,10 @@ bool Preprocess(string &inst, double thresh) {
|
|
|
164
175
|
M = size_m;
|
|
165
176
|
}
|
|
166
177
|
|
|
167
|
-
if (thresh < 1.0)
|
|
178
|
+
if (thresh < 1.0)
|
|
168
179
|
theta = static_cast<unsigned long long>(ceil(thresh * N));
|
|
169
|
-
|
|
180
|
+
else
|
|
170
181
|
theta = static_cast<unsigned long long>(thresh);
|
|
171
|
-
}
|
|
172
182
|
|
|
173
183
|
int real_L = 0;
|
|
174
184
|
item_dic.assign(L, -1);
|
|
@@ -176,11 +186,12 @@ bool Preprocess(string &inst, double thresh) {
|
|
|
176
186
|
for (int i = 0; i < (int)L; ++i) {
|
|
177
187
|
if (freq[i] >= theta) {
|
|
178
188
|
item_dic[i] = ++real_L;
|
|
179
|
-
item_in[i]
|
|
189
|
+
item_in[i] = true;
|
|
180
190
|
}
|
|
181
191
|
}
|
|
182
192
|
|
|
183
|
-
|
|
193
|
+
if (b_disp)
|
|
194
|
+
cout << "Original number of items: " << L << " Reduced to: " << real_L << endl;
|
|
184
195
|
|
|
185
196
|
unsigned long long int LpM = 1;
|
|
186
197
|
mlim = M;
|
|
@@ -195,12 +206,17 @@ bool Preprocess(string &inst, double thresh) {
|
|
|
195
206
|
++ml;
|
|
196
207
|
}
|
|
197
208
|
LpM *= ml * (1 + itmset_exists);
|
|
198
|
-
|
|
209
|
+
|
|
210
|
+
if (b_disp)
|
|
211
|
+
cout << ml << " " << LpM << " " << MN[i] << endl;
|
|
212
|
+
|
|
199
213
|
if (LpM * ulim > MN[i]) {
|
|
200
214
|
orgmlim = i;
|
|
201
215
|
while (i + ulim - 1 < (int)MN.size() && i + ulim - 1 < (int)M) {
|
|
202
|
-
|
|
203
|
-
|
|
216
|
+
if (b_disp)
|
|
217
|
+
cout << (MN[i - 1] - MN[i + ulim - 1]) << " "
|
|
218
|
+
<< MN[i + ulim - 1] << endl;
|
|
219
|
+
|
|
204
220
|
if ((MN[i - 1] - MN[i + ulim - 1]) < MN[i + ulim - 1]
|
|
205
221
|
&& MN[i + ulim - 1] < 600000000) {
|
|
206
222
|
mlim = i - 1;
|
|
@@ -212,11 +228,12 @@ bool Preprocess(string &inst, double thresh) {
|
|
|
212
228
|
}
|
|
213
229
|
}
|
|
214
230
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
231
|
+
if (b_disp)
|
|
232
|
+
cout << "M is: " << M << " Mlim is: " << mlim
|
|
233
|
+
<< " ulim is: " << ulim
|
|
234
|
+
<< " original mlim is: " << orgmlim
|
|
235
|
+
<< " guess is: "
|
|
236
|
+
<< round((log(N) - log(6)) / log(real_L)) << endl;
|
|
220
237
|
|
|
221
238
|
if (mlim < (int)M) {
|
|
222
239
|
for (int i = 0; i < real_L; ++i)
|
|
@@ -230,10 +247,14 @@ bool Preprocess(string &inst, double thresh) {
|
|
|
230
247
|
}
|
|
231
248
|
|
|
232
249
|
|
|
250
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
251
|
+
// Load_items_pre
|
|
252
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
233
253
|
bool Load_items_pre(string &inst_name) {
|
|
234
254
|
ifstream file(inst_name);
|
|
235
255
|
if (!file.good()) {
|
|
236
|
-
|
|
256
|
+
if (b_disp)
|
|
257
|
+
cout << "!!!!!! No such file exists: " << inst_name << " !!!!!!\n";
|
|
237
258
|
return false;
|
|
238
259
|
}
|
|
239
260
|
|
|
@@ -246,7 +267,6 @@ bool Load_items_pre(string &inst_name) {
|
|
|
246
267
|
vector<int> temp_lim;
|
|
247
268
|
bool sgn = false;
|
|
248
269
|
|
|
249
|
-
// L is final from Preprocess
|
|
250
270
|
while (word >> itm) {
|
|
251
271
|
ditem = stoi(itm);
|
|
252
272
|
if (item_dic[std::abs(ditem) - 1] == -1) {
|
|
@@ -274,18 +294,16 @@ bool Load_items_pre(string &inst_name) {
|
|
|
274
294
|
continue;
|
|
275
295
|
|
|
276
296
|
++N;
|
|
277
|
-
if (N % 10000000 == 0)
|
|
297
|
+
if (b_disp && N % 10000000 == 0)
|
|
278
298
|
cout << N << endl;
|
|
279
299
|
|
|
280
300
|
if (temp_vec.size() + temp_lim.size() > (size_t)M)
|
|
281
301
|
M = static_cast<unsigned int>(temp_vec.size() + temp_lim.size());
|
|
282
302
|
|
|
283
|
-
// ─── Ensure DFS/VDFS size before Build_MDD ───
|
|
284
303
|
while (DFS.size() < L)
|
|
285
304
|
DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
|
|
286
305
|
while (VDFS.size() < L)
|
|
287
306
|
VDFS.emplace_back(static_cast<int>(VDFS.size()));
|
|
288
|
-
// ──────────────────────────────────────────────
|
|
289
307
|
|
|
290
308
|
Build_MDD(temp_vec, temp_lim);
|
|
291
309
|
}
|
|
@@ -294,12 +312,14 @@ bool Load_items_pre(string &inst_name) {
|
|
|
294
312
|
}
|
|
295
313
|
|
|
296
314
|
|
|
315
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
316
|
+
// Load_items (no preprocess)
|
|
317
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
297
318
|
bool Load_items(string &inst_name) {
|
|
298
|
-
// std::cerr << "[SANITY] In Load_items: inst_name='"
|
|
299
|
-
// << inst_name << "'" << std::endl;
|
|
300
319
|
ifstream file(inst_name);
|
|
301
320
|
if (!file.good()) {
|
|
302
|
-
|
|
321
|
+
if (b_disp)
|
|
322
|
+
cout << "!!!!!! No such file exists: " << inst_name << " !!!!!!\n";
|
|
303
323
|
return false;
|
|
304
324
|
}
|
|
305
325
|
|
|
@@ -307,7 +327,7 @@ bool Load_items(string &inst_name) {
|
|
|
307
327
|
int ditem;
|
|
308
328
|
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
309
329
|
++N;
|
|
310
|
-
if (N % 1000000 == 0)
|
|
330
|
+
if (b_disp && N % 1000000 == 0)
|
|
311
331
|
cout << "Found " << N << " sequence, with max line len "
|
|
312
332
|
<< M << ", and " << L << " items, and " << E
|
|
313
333
|
<< " enteries\n";
|
|
@@ -325,7 +345,7 @@ bool Load_items(string &inst_name) {
|
|
|
325
345
|
|
|
326
346
|
if (L < static_cast<unsigned int>(std::abs(ditem))) {
|
|
327
347
|
L = static_cast<unsigned int>(std::abs(ditem));
|
|
328
|
-
|
|
348
|
+
|
|
329
349
|
while (DFS.size() < L)
|
|
330
350
|
DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
|
|
331
351
|
while (VDFS.size() < L)
|
|
@@ -337,16 +357,14 @@ bool Load_items(string &inst_name) {
|
|
|
337
357
|
else
|
|
338
358
|
temp_lim.push_back(ditem);
|
|
339
359
|
}
|
|
340
|
-
|
|
360
|
+
E += static_cast<unsigned long long>(temp_vec.size() + temp_lim.size());
|
|
341
361
|
if (temp_vec.size() + temp_lim.size() > (size_t)M)
|
|
342
362
|
M = static_cast<unsigned int>(temp_vec.size() + temp_lim.size());
|
|
343
363
|
|
|
344
|
-
// ─── Ensure DFS/VDFS size before Build_MDD ───
|
|
345
364
|
while (DFS.size() < L)
|
|
346
365
|
DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
|
|
347
366
|
while (VDFS.size() < L)
|
|
348
367
|
VDFS.emplace_back(static_cast<int>(VDFS.size()));
|
|
349
|
-
// ──────────────────────────────────────────────
|
|
350
368
|
|
|
351
369
|
Build_MDD(temp_vec, temp_lim);
|
|
352
370
|
}
|