effspm 0.1.5__cp39-cp39-win_amd64.whl → 0.2.7__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/__init__.py +9 -2
- effspm/_core.cpp +91 -13
- effspm/_effspm.cp39-win_amd64.pyd +0 -0
- effspm/_effspm.cpp +609 -0
- effspm/btminer/src/build_mdd.cpp +63 -0
- effspm/btminer/src/build_mdd.hpp +40 -0
- effspm/btminer/src/freq_miner.cpp +179 -0
- effspm/btminer/src/freq_miner.hpp +39 -0
- effspm/btminer/src/load_inst.cpp +200 -0
- effspm/btminer/src/load_inst.hpp +25 -0
- effspm/btminer/src/utility.cpp +65 -0
- effspm/btminer/src/utility.hpp +40 -0
- effspm/freq_miner.hpp +7 -2
- effspm/htminer/src/build_mdd.cpp +192 -0
- effspm/htminer/src/build_mdd.hpp +64 -0
- effspm/htminer/src/freq_miner.cpp +350 -0
- effspm/htminer/src/freq_miner.hpp +60 -0
- effspm/htminer/src/load_inst.cpp +394 -0
- effspm/htminer/src/load_inst.hpp +23 -0
- effspm/htminer/src/utility.cpp +72 -0
- effspm/htminer/src/utility.hpp +77 -0
- effspm/largebm/src/build_mdd.cpp +137 -0
- effspm/largebm/src/build_mdd.hpp +47 -0
- effspm/largebm/src/freq_miner.cpp +349 -0
- effspm/largebm/src/freq_miner.hpp +48 -0
- effspm/largebm/src/load_inst.cpp +230 -0
- effspm/largebm/src/load_inst.hpp +45 -0
- effspm/largebm/src/utility.cpp +45 -0
- effspm/largebm/src/utility.hpp +18 -0
- effspm/largehm/src/build_mdd.cpp +174 -0
- effspm/largehm/src/build_mdd.hpp +93 -0
- effspm/largehm/src/freq_miner.cpp +445 -0
- effspm/largehm/src/freq_miner.hpp +77 -0
- effspm/largehm/src/load_inst.cpp +357 -0
- effspm/largehm/src/load_inst.hpp +64 -0
- effspm/largehm/src/utility.cpp +38 -0
- effspm/largehm/src/utility.hpp +29 -0
- effspm/largepp/src/freq_miner.cpp +170 -0
- effspm/largepp/src/freq_miner.hpp +43 -0
- effspm/largepp/src/load_inst.cpp +219 -0
- effspm/largepp/src/load_inst.hpp +28 -0
- effspm/largepp/src/utility.cpp +34 -0
- effspm/largepp/src/utility.hpp +21 -0
- effspm/load_inst.hpp +18 -12
- effspm-0.2.7.dist-info/METADATA +237 -0
- effspm-0.2.7.dist-info/RECORD +53 -0
- {effspm-0.1.5.dist-info → effspm-0.2.7.dist-info}/WHEEL +1 -1
- effspm/_core.cp39-win_amd64.pyd +0 -0
- effspm-0.1.5.dist-info/METADATA +0 -38
- effspm-0.1.5.dist-info/RECORD +0 -14
- {effspm-0.1.5.dist-info → effspm-0.2.7.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.1.5.dist-info → effspm-0.2.7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <cmath>
|
|
5
|
+
#include "load_inst.hpp"
|
|
6
|
+
|
|
7
|
+
namespace btminer {
|
|
8
|
+
|
|
9
|
+
void Build_MDD(std::vector<int>& items);
|
|
10
|
+
|
|
11
|
+
class Arc {
|
|
12
|
+
public:
|
|
13
|
+
int chld = -1;
|
|
14
|
+
int sibl = -1;
|
|
15
|
+
int freq = 0;
|
|
16
|
+
int anct;
|
|
17
|
+
int itmset;
|
|
18
|
+
int item;
|
|
19
|
+
|
|
20
|
+
Arc(int _itm, int _itmset, int _anc) {
|
|
21
|
+
itmset = _itmset;
|
|
22
|
+
anct = _anc;
|
|
23
|
+
item = _itm;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
Arc(int _itm, int _anc) {
|
|
27
|
+
item = _itm;
|
|
28
|
+
anct = _anc;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
Arc() {
|
|
32
|
+
chld = -1;
|
|
33
|
+
sibl = -1;
|
|
34
|
+
freq = 0;
|
|
35
|
+
}
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
extern std::vector<Arc> Tree;
|
|
39
|
+
|
|
40
|
+
}
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
#include <iostream>
|
|
2
|
+
#include <time.h>
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <fstream>
|
|
5
|
+
#include <cmath>
|
|
6
|
+
#include "freq_miner.hpp"
|
|
7
|
+
#include "build_mdd.hpp"
|
|
8
|
+
#include "utility.hpp"
|
|
9
|
+
|
|
10
|
+
namespace btminer {
|
|
11
|
+
|
|
12
|
+
void Out_patt(std::vector<int>& seq, int freq);
|
|
13
|
+
void Extend_patt(Pattern _patt);
|
|
14
|
+
|
|
15
|
+
int num_patt = 0;
|
|
16
|
+
|
|
17
|
+
void Freq_miner() {
|
|
18
|
+
std::vector<int> islist;
|
|
19
|
+
for (int i = 0; i < L; ++i) {
|
|
20
|
+
if (DFS[i].freq >= theta)
|
|
21
|
+
islist.push_back(i);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
for (int i = 0; i < DFS.size(); ++i) {
|
|
25
|
+
DFS[i].ilist = islist;
|
|
26
|
+
DFS[i].slist = islist;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
|
|
30
|
+
if (DFS.back().freq >= theta)
|
|
31
|
+
Extend_patt(DFS.back());
|
|
32
|
+
else
|
|
33
|
+
DFS.pop_back();
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
void Extend_patt(Pattern _patt) {
|
|
38
|
+
DFS.pop_back();
|
|
39
|
+
std::vector<bool> slist(L, 0);
|
|
40
|
+
std::vector<bool> ilist(L, 0);
|
|
41
|
+
|
|
42
|
+
for (auto it : _patt.slist) slist[it] = 1;
|
|
43
|
+
for (auto it : _patt.ilist) ilist[it] = 1;
|
|
44
|
+
|
|
45
|
+
int itmset_size = 1;
|
|
46
|
+
int last_neg = _patt.seq.size() - 1;
|
|
47
|
+
while (_patt.seq[last_neg] > 0) {
|
|
48
|
+
--last_neg;
|
|
49
|
+
++itmset_size;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
std::vector<Pattern> pot_patt(2 * L);
|
|
53
|
+
std::vector<int> DFS_patt_init, DFS_patt, DFS_numfound, last_strpnt(L, 0);
|
|
54
|
+
|
|
55
|
+
for (int pnt = 0; pnt < _patt.str_pnt.size(); ++pnt) {
|
|
56
|
+
DFS_patt_init.push_back(_patt.str_pnt[pnt]);
|
|
57
|
+
while (!DFS_patt_init.empty()) {
|
|
58
|
+
int cur_sibl = Tree[DFS_patt_init.back()].chld;
|
|
59
|
+
DFS_patt_init.pop_back();
|
|
60
|
+
while (cur_sibl != -1) {
|
|
61
|
+
int cur_itm = Tree[cur_sibl].item;
|
|
62
|
+
if (cur_itm < 0) {
|
|
63
|
+
cur_itm = -cur_itm;
|
|
64
|
+
if (slist[cur_itm - 1]) {
|
|
65
|
+
pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
|
|
66
|
+
pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
|
|
67
|
+
}
|
|
68
|
+
if (Tree[cur_sibl].chld != -1) {
|
|
69
|
+
DFS_patt.push_back(cur_sibl);
|
|
70
|
+
DFS_numfound.push_back(cur_itm == -_patt.seq[last_neg] ? 1 : 0);
|
|
71
|
+
}
|
|
72
|
+
} else {
|
|
73
|
+
if (ilist[cur_itm - 1]) {
|
|
74
|
+
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
75
|
+
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
76
|
+
}
|
|
77
|
+
if (Tree[cur_sibl].chld != -1)
|
|
78
|
+
DFS_patt_init.push_back(cur_sibl);
|
|
79
|
+
}
|
|
80
|
+
cur_sibl = Tree[cur_sibl].sibl;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
for (auto it : _patt.ilist)
|
|
85
|
+
last_strpnt[it] = pot_patt[it].str_pnt.size();
|
|
86
|
+
|
|
87
|
+
while (!DFS_patt.empty()) {
|
|
88
|
+
int cur_sibl = Tree[DFS_patt.back()].chld;
|
|
89
|
+
int num_found = DFS_numfound.back();
|
|
90
|
+
DFS_patt.pop_back();
|
|
91
|
+
DFS_numfound.pop_back();
|
|
92
|
+
while (cur_sibl != -1) {
|
|
93
|
+
int cur_itm = Tree[cur_sibl].item;
|
|
94
|
+
if (cur_itm > 0) {
|
|
95
|
+
if (num_found == itmset_size && ilist[cur_itm - 1] &&
|
|
96
|
+
(Tree[Tree[cur_sibl].anct].itmset < Tree[_patt.str_pnt[pnt]].itmset ||
|
|
97
|
+
!check_parent(cur_sibl, _patt.str_pnt[pnt], last_strpnt[cur_itm - 1], pot_patt[cur_itm - 1].str_pnt))) {
|
|
98
|
+
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
99
|
+
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
100
|
+
}
|
|
101
|
+
if (slist[cur_itm - 1] && Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
|
|
102
|
+
pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
|
|
103
|
+
pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
|
|
104
|
+
}
|
|
105
|
+
if (Tree[cur_sibl].chld != -1) {
|
|
106
|
+
DFS_patt.push_back(cur_sibl);
|
|
107
|
+
if (!_patt.ilist.empty()) {
|
|
108
|
+
DFS_numfound.push_back((num_found < itmset_size && cur_itm == abs(_patt.seq[last_neg + num_found])) ? num_found + 1 : num_found);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
} else {
|
|
112
|
+
cur_itm = -cur_itm;
|
|
113
|
+
if (slist[cur_itm - 1] && Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
|
|
114
|
+
pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
|
|
115
|
+
pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
|
|
116
|
+
}
|
|
117
|
+
if (Tree[cur_sibl].chld != -1) {
|
|
118
|
+
DFS_patt.push_back(cur_sibl);
|
|
119
|
+
if (!_patt.ilist.empty()) {
|
|
120
|
+
DFS_numfound.push_back(cur_itm == -_patt.seq[last_neg] ? 1 : 0);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
cur_sibl = Tree[cur_sibl].sibl;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
std::vector<int> slistp, ilistp;
|
|
130
|
+
for (auto it : _patt.ilist) if (pot_patt[it].freq >= theta) ilistp.push_back(it);
|
|
131
|
+
for (auto it : _patt.slist) if (pot_patt[it + L].freq >= theta) slistp.push_back(it);
|
|
132
|
+
|
|
133
|
+
for (auto it : ilistp) {
|
|
134
|
+
pot_patt[it].str_pnt.shrink_to_fit();
|
|
135
|
+
DFS.push_back(pot_patt[it]);
|
|
136
|
+
DFS.back().seq = _patt.seq;
|
|
137
|
+
DFS.back().seq.push_back(it + 1);
|
|
138
|
+
DFS.back().seq.shrink_to_fit();
|
|
139
|
+
DFS.back().slist = slistp;
|
|
140
|
+
DFS.back().ilist = ilistp;
|
|
141
|
+
if (b_disp || b_write) Out_patt(DFS.back().seq, DFS.back().freq);
|
|
142
|
+
++num_patt;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
for (auto it : slistp) {
|
|
146
|
+
pot_patt[it + L].str_pnt.shrink_to_fit();
|
|
147
|
+
DFS.push_back(pot_patt[it + L]);
|
|
148
|
+
DFS.back().seq = _patt.seq;
|
|
149
|
+
DFS.back().seq.push_back(-it - 1);
|
|
150
|
+
DFS.back().seq.shrink_to_fit();
|
|
151
|
+
DFS.back().slist = slistp;
|
|
152
|
+
DFS.back().ilist = slistp;
|
|
153
|
+
if (b_disp || b_write) Out_patt(DFS.back().seq, DFS.back().freq);
|
|
154
|
+
++num_patt;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
void Out_patt(std::vector<int>& seq, int freq) {
|
|
159
|
+
|
|
160
|
+
btminer::collected.push_back(seq); // make pattern visible to Python
|
|
161
|
+
|
|
162
|
+
std::ofstream file_o;
|
|
163
|
+
if (b_write) file_o.open(out_file, std::ios::app);
|
|
164
|
+
|
|
165
|
+
for (int ii = 0; ii < seq.size(); ii++) {
|
|
166
|
+
if (b_disp) std::cout << seq[ii] << " ";
|
|
167
|
+
if (b_write) file_o << seq[ii] << " ";
|
|
168
|
+
}
|
|
169
|
+
if (b_disp) std::cout << std::endl;
|
|
170
|
+
if (b_write) file_o << std::endl;
|
|
171
|
+
|
|
172
|
+
if (b_disp) std::cout << "************** Freq: " << freq << std::endl;
|
|
173
|
+
if (b_write) {
|
|
174
|
+
file_o << "************** Freq: " << freq << std::endl;
|
|
175
|
+
file_o.close();
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
} // namespace btminer
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include "load_inst.hpp"
|
|
5
|
+
#include "build_mdd.hpp"
|
|
6
|
+
|
|
7
|
+
namespace btminer {
|
|
8
|
+
|
|
9
|
+
void Freq_miner();
|
|
10
|
+
|
|
11
|
+
class Pattern {
|
|
12
|
+
public:
|
|
13
|
+
std::vector<int> seq;
|
|
14
|
+
std::vector<int> str_pnt;
|
|
15
|
+
std::vector<int> slist;
|
|
16
|
+
std::vector<int> ilist;
|
|
17
|
+
int freq;
|
|
18
|
+
|
|
19
|
+
Pattern(std::vector<int>& _seq, int item) {
|
|
20
|
+
seq.swap(_seq);
|
|
21
|
+
seq.push_back(item);
|
|
22
|
+
freq = 0;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
Pattern(int item) {
|
|
26
|
+
seq.push_back(item);
|
|
27
|
+
freq = 0;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
Pattern() {
|
|
31
|
+
freq = 0;
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
extern int num_patt;
|
|
36
|
+
extern int num_max_patt;
|
|
37
|
+
extern std::vector<Pattern> DFS;
|
|
38
|
+
|
|
39
|
+
} // namespace btminer
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
|
|
2
|
+
#include <iostream>
|
|
3
|
+
#include <sstream>
|
|
4
|
+
#include <fstream>
|
|
5
|
+
#include <cmath>
|
|
6
|
+
#include <ctime>
|
|
7
|
+
#include <map>
|
|
8
|
+
#include <vector>
|
|
9
|
+
#include <algorithm>
|
|
10
|
+
#include "load_inst.hpp"
|
|
11
|
+
#include "utility.hpp"
|
|
12
|
+
#include "build_mdd.hpp"
|
|
13
|
+
#include "freq_miner.hpp"
|
|
14
|
+
|
|
15
|
+
namespace btminer {
|
|
16
|
+
|
|
17
|
+
using namespace std;
|
|
18
|
+
|
|
19
|
+
extern int num_nodes, cur_node;
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
map<string, int> item_map;
|
|
23
|
+
map<int, string> item_map_rev;
|
|
24
|
+
vector<int> freq;
|
|
25
|
+
vector<int> item_dic;
|
|
26
|
+
|
|
27
|
+
void Load_items_pre(string& inst_name);
|
|
28
|
+
bool Load_items(string& inst_name);
|
|
29
|
+
bool Preprocess(string& inst, double thresh);
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
bool Load_instance(string& items_file, double thresh) {
|
|
35
|
+
clock_t kk = clock();
|
|
36
|
+
Tree.emplace_back(0, 0, 0);
|
|
37
|
+
|
|
38
|
+
if (pre_pro) {
|
|
39
|
+
if (!Preprocess(items_file, thresh))
|
|
40
|
+
return false;
|
|
41
|
+
|
|
42
|
+
cout << "\nPreprocess done in " << give_time(clock() - kk) << " seconds\n\n";
|
|
43
|
+
|
|
44
|
+
DFS.reserve(L);
|
|
45
|
+
for (int i = 0; i < L; ++i)
|
|
46
|
+
DFS.emplace_back(-i - 1);
|
|
47
|
+
|
|
48
|
+
kk = clock();
|
|
49
|
+
Load_items_pre(items_file);
|
|
50
|
+
} else if (!Load_items(items_file))
|
|
51
|
+
return false;
|
|
52
|
+
else {
|
|
53
|
+
theta = (thresh < 1) ? ceil(thresh * N * N_mult) : thresh;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
|
|
57
|
+
cout << "Found " << N * N_mult << " sequence, with max line len " << M << ", and " << L << " items, and " << E << " enteries\n";
|
|
58
|
+
cout << "Total MDD nodes: " << Tree.size() << endl;
|
|
59
|
+
|
|
60
|
+
return true;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
bool Preprocess(string& inst, double thresh) {
|
|
64
|
+
ifstream file(inst);
|
|
65
|
+
if (!file.good()) {
|
|
66
|
+
cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
|
|
67
|
+
return false;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
string line;
|
|
71
|
+
int size_m, ditem;
|
|
72
|
+
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
73
|
+
++N;
|
|
74
|
+
vector<bool> counted(L, 0);
|
|
75
|
+
istringstream word(line);
|
|
76
|
+
string itm;
|
|
77
|
+
while (word >> itm) {
|
|
78
|
+
ditem = stoi(itm);
|
|
79
|
+
if (L < abs(ditem)) L = abs(ditem);
|
|
80
|
+
while (freq.size() < L) {
|
|
81
|
+
freq.push_back(0);
|
|
82
|
+
counted.push_back(0);
|
|
83
|
+
}
|
|
84
|
+
if (!counted[abs(ditem) - 1]) {
|
|
85
|
+
++freq[abs(ditem) - 1];
|
|
86
|
+
counted[abs(ditem) - 1] = 1;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
theta = (thresh < 1) ? ceil(thresh * N * N_mult) : thresh;
|
|
92
|
+
|
|
93
|
+
int real_L = 0;
|
|
94
|
+
item_dic = vector<int>(L, -1);
|
|
95
|
+
for (int i = 0; i < L; ++i) {
|
|
96
|
+
if (freq[i] >= theta)
|
|
97
|
+
item_dic[i] = ++real_L;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
cout << "Original number of items: " << L << " Reduced to: " << real_L << endl;
|
|
101
|
+
L = real_L;
|
|
102
|
+
N = 0;
|
|
103
|
+
return true;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
void Load_items_pre(string& inst_name) {
|
|
107
|
+
ifstream file(inst_name);
|
|
108
|
+
if (!file.good()) return;
|
|
109
|
+
|
|
110
|
+
string line;
|
|
111
|
+
int ditem;
|
|
112
|
+
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
113
|
+
istringstream word(line);
|
|
114
|
+
string itm;
|
|
115
|
+
vector<int> temp_vec;
|
|
116
|
+
bool sgn = 0;
|
|
117
|
+
while (word >> itm) {
|
|
118
|
+
if (use_dic) {
|
|
119
|
+
auto it = item_map.find(itm);
|
|
120
|
+
if (it == item_map.end()) {
|
|
121
|
+
item_map[itm] = ++L;
|
|
122
|
+
item_map_rev[L] = itm;
|
|
123
|
+
ditem = L;
|
|
124
|
+
} else {
|
|
125
|
+
ditem = it->second;
|
|
126
|
+
}
|
|
127
|
+
} else {
|
|
128
|
+
ditem = stoi(itm);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if (pre_pro && freq.size() > abs(ditem) - 1 && freq[abs(ditem) - 1] < theta) {
|
|
132
|
+
if (!sgn)
|
|
133
|
+
sgn = ditem < 0;
|
|
134
|
+
continue;
|
|
135
|
+
} else if (pre_pro) {
|
|
136
|
+
ditem = (ditem > 0) ? item_dic[ditem - 1] : -item_dic[-ditem - 1];
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (sgn && ditem > 0)
|
|
140
|
+
ditem = -ditem;
|
|
141
|
+
sgn = 0;
|
|
142
|
+
|
|
143
|
+
temp_vec.push_back(ditem);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (temp_vec.empty()) continue;
|
|
147
|
+
|
|
148
|
+
++N;
|
|
149
|
+
if (temp_vec.size() > M) M = temp_vec.size();
|
|
150
|
+
|
|
151
|
+
E += temp_vec.size(); // <-- make sure E gets incremented
|
|
152
|
+
Build_MDD(temp_vec);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
bool Load_items(string& inst_name) {
|
|
157
|
+
ifstream file(inst_name);
|
|
158
|
+
if (!file.good()) {
|
|
159
|
+
cout << "!!!!!! No such file exists: " << inst_name << " !!!!!!\n";
|
|
160
|
+
return false;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
string line;
|
|
164
|
+
int ditem;
|
|
165
|
+
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
166
|
+
++N;
|
|
167
|
+
istringstream word(line);
|
|
168
|
+
string itm;
|
|
169
|
+
vector<int> temp_vec;
|
|
170
|
+
while (word >> itm) {
|
|
171
|
+
if (use_dic) {
|
|
172
|
+
auto it = item_map.find(itm);
|
|
173
|
+
if (it == item_map.end()) {
|
|
174
|
+
item_map[itm] = ++L;
|
|
175
|
+
item_map_rev[L] = itm;
|
|
176
|
+
ditem = L;
|
|
177
|
+
} else {
|
|
178
|
+
ditem = it->second;
|
|
179
|
+
}
|
|
180
|
+
} else {
|
|
181
|
+
ditem = stoi(itm);
|
|
182
|
+
if (L < abs(ditem)) {
|
|
183
|
+
L = abs(ditem);
|
|
184
|
+
while (DFS.size() < L && !just_build) {
|
|
185
|
+
DFS.reserve(L);
|
|
186
|
+
DFS.emplace_back(-DFS.size() - 1);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
temp_vec.push_back(ditem);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (temp_vec.size() > M) M = temp_vec.size();
|
|
194
|
+
E += temp_vec.size(); // <-- make sure E gets incremented
|
|
195
|
+
Build_MDD(temp_vec);
|
|
196
|
+
}
|
|
197
|
+
return true;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
} // namespace btminer
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <string>
|
|
5
|
+
#include <fstream>
|
|
6
|
+
#include <map>
|
|
7
|
+
#include <unordered_set>
|
|
8
|
+
#include <unordered_map>
|
|
9
|
+
#include <ctime>
|
|
10
|
+
|
|
11
|
+
namespace btminer {
|
|
12
|
+
|
|
13
|
+
bool Load_instance(std::string& items_file, double thresh);
|
|
14
|
+
|
|
15
|
+
extern std::string out_file, folder;
|
|
16
|
+
|
|
17
|
+
extern bool b_disp, b_write, use_dic, just_build, pre_pro;
|
|
18
|
+
|
|
19
|
+
extern int N, M, L, theta, num_nodes, M_mult, N_mult, time_limit, cur_node;
|
|
20
|
+
|
|
21
|
+
extern clock_t start_time;
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
} // namespace btminer
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
#include "utility.hpp"
|
|
2
|
+
#include "build_mdd.hpp"
|
|
3
|
+
#include "load_inst.hpp"
|
|
4
|
+
#include <iostream>
|
|
5
|
+
|
|
6
|
+
namespace btminer {
|
|
7
|
+
|
|
8
|
+
// ─── Global definitions ──────────────────────────────────────────
|
|
9
|
+
bool use_dic = false;
|
|
10
|
+
std::vector<std::vector<int>> items;
|
|
11
|
+
bool use_list = false;
|
|
12
|
+
bool just_build = false;
|
|
13
|
+
int E = 0, M = 0, N = 0, L = 0, theta = 0;
|
|
14
|
+
std::vector<Pattern> DFS;
|
|
15
|
+
clock_t start_time = 0;
|
|
16
|
+
bool b_disp = false, b_write = false;
|
|
17
|
+
std::string out_file;
|
|
18
|
+
|
|
19
|
+
bool pre_pro = true;
|
|
20
|
+
int N_mult = 1, M_mult = 1;
|
|
21
|
+
int time_limit = 30 * 3600;
|
|
22
|
+
|
|
23
|
+
// buffer of mined patterns returned to Python
|
|
24
|
+
std::vector<std::vector<int>> collected;
|
|
25
|
+
|
|
26
|
+
void ClearCollected() { collected.clear(); }
|
|
27
|
+
const std::vector<std::vector<int>>& GetCollected() { return collected; }
|
|
28
|
+
|
|
29
|
+
// ─── Utility functions ───────────────────────────────────────────
|
|
30
|
+
int find_ID(std::vector<int>& vec, int itm)
|
|
31
|
+
{
|
|
32
|
+
int plc = 0;
|
|
33
|
+
while (plc < static_cast<int>(vec.size()) && vec[plc] != itm) ++plc;
|
|
34
|
+
return (plc == static_cast<int>(vec.size())) ? -1 : plc;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
bool check_parent(int cur_arc, int str_pnt, int start,
|
|
38
|
+
std::vector<int>& strpnt_vec)
|
|
39
|
+
{
|
|
40
|
+
std::vector<int> ancestors;
|
|
41
|
+
int cur_anct = Tree[cur_arc].anct;
|
|
42
|
+
|
|
43
|
+
while (Tree[cur_anct].itmset > Tree[str_pnt].itmset) {
|
|
44
|
+
if (Tree[cur_anct].item > 0) ancestors.push_back(cur_anct);
|
|
45
|
+
cur_anct = Tree[cur_anct].anct;
|
|
46
|
+
}
|
|
47
|
+
if (Tree[cur_anct].itmset == Tree[str_pnt].itmset) return true;
|
|
48
|
+
|
|
49
|
+
for (auto it = ancestors.rbegin(); it != ancestors.rend(); ++it)
|
|
50
|
+
for (int i = start; i < static_cast<int>(strpnt_vec.size()); ++i)
|
|
51
|
+
if (strpnt_vec[i] == *it) return true;
|
|
52
|
+
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
bool find_pnt(Arc* pnt, std::vector<Arc*>& vec, int pos)
|
|
57
|
+
{
|
|
58
|
+
for (size_t i = pos; i < vec.size(); ++i)
|
|
59
|
+
if (vec[i] == pnt) return true;
|
|
60
|
+
return false;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
double give_time(clock_t kk) { return double(kk) / CLOCKS_PER_SEC; }
|
|
64
|
+
|
|
65
|
+
} // namespace btminer
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <ctime>
|
|
5
|
+
#include <string>
|
|
6
|
+
#include "build_mdd.hpp"
|
|
7
|
+
#include "freq_miner.hpp"
|
|
8
|
+
#include "load_inst.hpp"
|
|
9
|
+
|
|
10
|
+
namespace btminer {
|
|
11
|
+
|
|
12
|
+
// === Utility function declarations ===
|
|
13
|
+
bool find_pnt(Arc* pnt, std::vector<Arc*>& vec, int pos);
|
|
14
|
+
int find_ID(std::vector<int>& vec, int itm);
|
|
15
|
+
double give_time(clock_t kk);
|
|
16
|
+
bool check_parent(int cur_arc, int str_pnt, int start, std::vector<int>& strpnt_vec);
|
|
17
|
+
|
|
18
|
+
// === Global variables (DECLARATIONS ONLY) ===
|
|
19
|
+
|
|
20
|
+
extern bool use_list;
|
|
21
|
+
extern bool just_build;
|
|
22
|
+
extern int E, M, N, L, theta;
|
|
23
|
+
extern std::vector<Pattern> DFS;
|
|
24
|
+
extern clock_t start_time;
|
|
25
|
+
extern bool b_disp, b_write;
|
|
26
|
+
extern std::string out_file;
|
|
27
|
+
extern bool pre_pro;
|
|
28
|
+
extern int N_mult, M_mult;
|
|
29
|
+
extern int time_limit;
|
|
30
|
+
extern std::vector<std::vector<int>> items;
|
|
31
|
+
|
|
32
|
+
extern std::vector<std::vector<int>> collected;
|
|
33
|
+
|
|
34
|
+
void ClearCollected();
|
|
35
|
+
const std::vector<std::vector<int>>& GetCollected();
|
|
36
|
+
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
// namespace btminer
|
effspm/freq_miner.hpp
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
#pragma once
|
|
2
|
-
|
|
2
|
+
#include <vector>
|
|
3
3
|
#include "load_inst.hpp"
|
|
4
|
+
#include <cstdlib>
|
|
5
|
+
#include <cmath>
|
|
6
|
+
#include <cstddef> // for std::size_t
|
|
4
7
|
|
|
8
|
+
using namespace std;
|
|
5
9
|
void Freq_miner();
|
|
6
10
|
void Out_patt(std::vector<int>& seq, unsigned int freq);
|
|
7
11
|
|
|
@@ -20,7 +24,8 @@ public:
|
|
|
20
24
|
|
|
21
25
|
Pattern(vector<int>& _seq, int item) {
|
|
22
26
|
seq.reserve(_seq.size());
|
|
23
|
-
for (
|
|
27
|
+
for (std::size_t i = 0; i < _seq.size(); ++i)
|
|
28
|
+
|
|
24
29
|
seq.push_back(_seq[i]);
|
|
25
30
|
seq.push_back(item);
|
|
26
31
|
freq = 0;
|