effspm 0.1.7__cp312-cp312-macosx_11_0_arm64.whl → 0.2.7__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/__init__.py +10 -2
- effspm/_effspm.cpp +609 -0
- effspm/_effspm.cpython-312-darwin.so +0 -0
- effspm/btminer/src/build_mdd.cpp +63 -0
- effspm/btminer/src/build_mdd.hpp +40 -0
- effspm/btminer/src/freq_miner.cpp +179 -0
- effspm/btminer/src/freq_miner.hpp +39 -0
- effspm/btminer/src/load_inst.cpp +200 -0
- effspm/btminer/src/load_inst.hpp +25 -0
- effspm/btminer/src/utility.cpp +65 -0
- effspm/btminer/src/utility.hpp +40 -0
- effspm/freq_miner.hpp +4 -1
- effspm/htminer/src/build_mdd.cpp +192 -0
- effspm/htminer/src/build_mdd.hpp +64 -0
- effspm/htminer/src/freq_miner.cpp +350 -0
- effspm/htminer/src/freq_miner.hpp +60 -0
- effspm/htminer/src/load_inst.cpp +394 -0
- effspm/htminer/src/load_inst.hpp +23 -0
- effspm/htminer/src/utility.cpp +72 -0
- effspm/htminer/src/utility.hpp +77 -0
- effspm/largebm/src/build_mdd.cpp +137 -0
- effspm/largebm/src/build_mdd.hpp +47 -0
- effspm/largebm/src/freq_miner.cpp +349 -0
- effspm/largebm/src/freq_miner.hpp +48 -0
- effspm/largebm/src/load_inst.cpp +230 -0
- effspm/largebm/src/load_inst.hpp +45 -0
- effspm/largebm/src/utility.cpp +45 -0
- effspm/largebm/src/utility.hpp +18 -0
- effspm/largehm/src/build_mdd.cpp +174 -0
- effspm/largehm/src/build_mdd.hpp +93 -0
- effspm/largehm/src/freq_miner.cpp +445 -0
- effspm/largehm/src/freq_miner.hpp +77 -0
- effspm/largehm/src/load_inst.cpp +357 -0
- effspm/largehm/src/load_inst.hpp +64 -0
- effspm/largehm/src/utility.cpp +38 -0
- effspm/largehm/src/utility.hpp +29 -0
- effspm/largepp/src/freq_miner.cpp +170 -0
- effspm/largepp/src/freq_miner.hpp +43 -0
- effspm/largepp/src/load_inst.cpp +219 -0
- effspm/largepp/src/load_inst.hpp +28 -0
- effspm/largepp/src/utility.cpp +34 -0
- effspm/largepp/src/utility.hpp +21 -0
- effspm/load_inst.hpp +2 -1
- effspm-0.2.7.dist-info/METADATA +237 -0
- effspm-0.2.7.dist-info/RECORD +53 -0
- {effspm-0.1.7.dist-info → effspm-0.2.7.dist-info}/WHEEL +1 -2
- effspm/_core.cpython-312-darwin.so +0 -0
- effspm-0.1.7.dist-info/METADATA +0 -38
- effspm-0.1.7.dist-info/RECORD +0 -14
- {effspm-0.1.7.dist-info → effspm-0.2.7.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.1.7.dist-info → effspm-0.2.7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
#include <vector>
|
|
2
|
+
#include <iostream>
|
|
3
|
+
#include "load_inst.hpp"
|
|
4
|
+
#include "build_mdd.hpp"
|
|
5
|
+
#include "freq_miner.hpp"
|
|
6
|
+
#include "utility.hpp"
|
|
7
|
+
|
|
8
|
+
namespace htminer {
|
|
9
|
+
|
|
10
|
+
// Forward declarations (unchanged)
|
|
11
|
+
int Add_arc(int item, unsigned int last_arc, int& itmset, std::vector<unsigned int>& ancest_map);
|
|
12
|
+
void Add_vec(std::vector<int>& items_lim, std::vector<unsigned int>& ancest_map, unsigned int last_arc, int itmset);
|
|
13
|
+
|
|
14
|
+
// Global trees (unchanged)
|
|
15
|
+
std::vector<Arc> Tree;
|
|
16
|
+
std::vector<VArc> VTree;
|
|
17
|
+
std::vector<CArc> CTree;
|
|
18
|
+
|
|
19
|
+
void Build_MDD(std::vector<int>& items, std::vector<int>& items_lim) {
|
|
20
|
+
// DEBUG: entry into Build_MDD
|
|
21
|
+
// std::cerr << "[HTMiner::Build_MDD] called with items.size()=" << items.size()
|
|
22
|
+
// << " items_lim.size()=" << items_lim.size() << std::endl;
|
|
23
|
+
|
|
24
|
+
// // Prepare ancestor map of size L
|
|
25
|
+
std::vector<unsigned int> ancest_map(L, 0);
|
|
26
|
+
|
|
27
|
+
unsigned int last_arc = 0;
|
|
28
|
+
int itmset = 0;
|
|
29
|
+
|
|
30
|
+
// Iterate over items
|
|
31
|
+
for (size_t idx = 0; idx < items.size(); ++idx) {
|
|
32
|
+
int curr_item = items[idx];
|
|
33
|
+
// std::cerr << "[HTMiner::Build_MDD] processing items[" << idx
|
|
34
|
+
// << "]=" << curr_item << " last_arc=" << last_arc
|
|
35
|
+
// << " itmset=" << itmset << std::endl;
|
|
36
|
+
|
|
37
|
+
last_arc = Add_arc(curr_item, last_arc, itmset, ancest_map);
|
|
38
|
+
|
|
39
|
+
// std::cerr << "[HTMiner::Build_MDD] returned from Add_arc, new last_arc="
|
|
40
|
+
// << last_arc << " itmset=" << itmset << std::endl;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// If there are limited items, handle them
|
|
44
|
+
if (!items_lim.empty()) {
|
|
45
|
+
// std::cerr << "[HTMiner::Build_MDD] items_lim is not empty; size="
|
|
46
|
+
// << items_lim.size() << std::endl;
|
|
47
|
+
Add_vec(items_lim, ancest_map, last_arc, itmset);
|
|
48
|
+
// std::cerr << "[HTMiner::Build_MDD] returned from Add_vec" << std::endl;
|
|
49
|
+
} else {
|
|
50
|
+
// std::cerr << "[HTMiner::Build_MDD] items_lim is empty; skipping Add_vec" << std::endl;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// DEBUG: exit Build_MDD
|
|
54
|
+
// std::cerr << "[HTMiner::Build_MDD] exiting; Tree.size()=" << Tree.size()
|
|
55
|
+
// << " CTree.size()=" << CTree.size()
|
|
56
|
+
// << " VTree.size()=" << VTree.size() << std::endl;
|
|
57
|
+
//
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
int Add_arc(int item, unsigned int last_arc, int& itmset, std::vector<unsigned int>& ancest_map) {
|
|
61
|
+
unsigned int anct = ancest_map[std::abs(item) - 1];
|
|
62
|
+
if (item < 0) {
|
|
63
|
+
++itmset;
|
|
64
|
+
// std::cerr << "[HTMiner::Add_arc] negative item detected; itmset incremented to "
|
|
65
|
+
// << itmset << std::endl;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
unsigned int last_sibl = Tree[last_arc].chld;
|
|
69
|
+
// std::cerr << "[HTMiner::Add_arc] starting with last_sibl=" << last_sibl
|
|
70
|
+
// << " anct=" << anct << std::endl;
|
|
71
|
+
|
|
72
|
+
if (last_sibl == 0) {
|
|
73
|
+
Tree.emplace_back(item, itmset, anct);
|
|
74
|
+
last_sibl = static_cast<unsigned int>(Tree.size() - 1);
|
|
75
|
+
Tree[last_arc].chld = last_sibl;
|
|
76
|
+
// std::cerr << "[HTMiner::Add_arc] created new arc at index=" << last_sibl
|
|
77
|
+
// << " setting Tree[" << last_arc << "].chld=" << last_sibl << std::endl;
|
|
78
|
+
if (anct == 0) {
|
|
79
|
+
DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
|
|
80
|
+
// std::cerr << "[HTMiner::Add_arc] appended to DFS[" << (std::abs(item) - 1)
|
|
81
|
+
// << "].str_pnt -> " << last_sibl << std::endl;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
// std::cerr << "[HTMiner::Add_arc] traversing siblings starting at " << last_sibl << std::endl;
|
|
86
|
+
while (Tree[last_sibl].item != item) {
|
|
87
|
+
if (Tree[last_sibl].sibl == 0) {
|
|
88
|
+
Tree.emplace_back(item, itmset, anct);
|
|
89
|
+
Tree[last_sibl].sibl = static_cast<unsigned int>(Tree.size() - 1);
|
|
90
|
+
last_sibl = static_cast<unsigned int>(Tree.size() - 1);
|
|
91
|
+
// std::cerr << "[HTMiner::Add_arc] created sibling arc at index=" << last_sibl
|
|
92
|
+
// << " setting Tree[" << (last_sibl - 1) << "].sibl=" << last_sibl << std::endl;
|
|
93
|
+
if (anct == 0) {
|
|
94
|
+
DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
|
|
95
|
+
// std::cerr << "[HTMiner::Add_arc] appended to DFS[" << (std::abs(item) - 1)
|
|
96
|
+
// << "].str_pnt -> " << last_sibl << std::endl;
|
|
97
|
+
}
|
|
98
|
+
break;
|
|
99
|
+
}
|
|
100
|
+
last_sibl = Tree[last_sibl].sibl;
|
|
101
|
+
// std::cerr << "[HTMiner::Add_arc] moving to next sibling: " << last_sibl << std::endl;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (anct == 0) {
|
|
106
|
+
++DFS[std::abs(item) - 1].freq;
|
|
107
|
+
// std::cerr << "[HTMiner::Add_arc] incremented DFS[" << (std::abs(item) - 1)
|
|
108
|
+
// << "].freq -> " << DFS[std::abs(item) - 1].freq << std::endl;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
++Tree[last_sibl].freq;
|
|
112
|
+
// std::cerr << "[HTMiner::Add_arc] incremented Tree[" << last_sibl << "].freq -> "
|
|
113
|
+
// << Tree[last_sibl].freq << std::endl;
|
|
114
|
+
|
|
115
|
+
ancest_map[std::abs(item) - 1] = last_sibl;
|
|
116
|
+
// std::cerr << "[HTMiner::Add_arc] updated ancest_map[" << (std::abs(item) - 1)
|
|
117
|
+
// << "] -> " << last_sibl << std::endl;
|
|
118
|
+
|
|
119
|
+
return static_cast<int>(last_sibl);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
void Add_vec(std::vector<int>& items_lim, std::vector<unsigned int>& ancest, unsigned int last_arc, int itmset) {
|
|
123
|
+
items_lim.shrink_to_fit();
|
|
124
|
+
// std::cerr << "[HTMiner::Add_vec] called with items_lim.size()=" << items_lim.size()
|
|
125
|
+
// << " last_arc=" << last_arc << " itmset=" << itmset << std::endl;
|
|
126
|
+
|
|
127
|
+
std::vector<bool> counted(L, false);
|
|
128
|
+
|
|
129
|
+
if (Tree[last_arc].itmset > 0) {
|
|
130
|
+
ancest.push_back(0);
|
|
131
|
+
ancest.shrink_to_fit();
|
|
132
|
+
// std::cerr << "[HTMiner::Add_vec] Tree[" << last_arc << "].itmset > 0; pushing 0 to ancest" << std::endl;
|
|
133
|
+
|
|
134
|
+
for (size_t i = 0; i < items_lim.size(); ++i) {
|
|
135
|
+
int cur_itm = std::abs(items_lim[i]);
|
|
136
|
+
if (ancest[cur_itm - 1] == 0 && !counted[cur_itm - 1]) {
|
|
137
|
+
if (i + 1 < static_cast<int>(items_lim.size())) {
|
|
138
|
+
VDFS[cur_itm - 1].str_pnt.push_back(-static_cast<int>(i) - 1);
|
|
139
|
+
VDFS[cur_itm - 1].seq_ID.push_back(static_cast<unsigned int>(CTree.size()));
|
|
140
|
+
// std::cerr << "[HTMiner::Add_vec] appended negative str_pnt to VDFS["
|
|
141
|
+
// << (cur_itm - 1) << "] -> " << (-static_cast<int>(i) - 1) << std::endl;
|
|
142
|
+
}
|
|
143
|
+
++DFS[cur_itm - 1].freq;
|
|
144
|
+
counted[cur_itm - 1] = true;
|
|
145
|
+
// std::cerr << "[HTMiner::Add_vec] incremented DFS[" << (cur_itm - 1)
|
|
146
|
+
// << "].freq -> " << DFS[cur_itm - 1].freq << std::endl;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
CTree.emplace_back(ancest, items_lim);
|
|
151
|
+
//std::cerr << "[HTMiner::Add_vec] added new CTree node; CTree.size()=" << CTree.size() << std::endl;
|
|
152
|
+
|
|
153
|
+
Tree[last_arc].chld = static_cast<unsigned int>(CTree.size() - 1);
|
|
154
|
+
Tree[last_arc].itmset = -itmset;
|
|
155
|
+
// std::cerr << "[HTMiner::Add_vec] updated Tree[" << last_arc
|
|
156
|
+
// << "].chld=" << Tree[last_arc].chld
|
|
157
|
+
// << " Tree[" << last_arc << "].itmset=" << Tree[last_arc].itmset << std::endl;
|
|
158
|
+
//
|
|
159
|
+
}
|
|
160
|
+
else {
|
|
161
|
+
std::vector<unsigned int>& ancest_ct = CTree[Tree[last_arc].chld].ancest;
|
|
162
|
+
// std::cerr << "[HTMiner::Add_vec] Tree[" << last_arc << "].itmset <= 0; using existing CTree node "
|
|
163
|
+
// << Tree[last_arc].chld << std::endl;
|
|
164
|
+
|
|
165
|
+
for (size_t i = 0; i < items_lim.size(); ++i) {
|
|
166
|
+
int cur_itm = std::abs(items_lim[i]);
|
|
167
|
+
if (!counted[cur_itm - 1] && ancest_ct[cur_itm - 1] == 0) {
|
|
168
|
+
if (i + 1 < static_cast<int>(items_lim.size())) {
|
|
169
|
+
VDFS[cur_itm - 1].str_pnt.push_back(static_cast<unsigned int>(i) + 1);
|
|
170
|
+
VDFS[cur_itm - 1].seq_ID.push_back(static_cast<unsigned int>(VTree.size()));
|
|
171
|
+
// std::cerr << "[HTMiner::Add_vec] appended positive str_pnt to VDFS["
|
|
172
|
+
// << (cur_itm - 1) << "] -> " << (static_cast<unsigned int>(i) + 1) << std::endl;
|
|
173
|
+
}
|
|
174
|
+
++DFS[cur_itm - 1].freq;
|
|
175
|
+
counted[cur_itm - 1] = true;
|
|
176
|
+
// std::cerr << "[HTMiner::Add_vec] incremented DFS[" << (cur_itm - 1)
|
|
177
|
+
// << "].freq -> " << DFS[cur_itm - 1].freq << std::endl;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
VTree.emplace_back(items_lim, ancest_ct.back());
|
|
182
|
+
// std::cerr << "[HTMiner::Add_vec] added new VTree node; VTree.size()=" << VTree.size() << std::endl;
|
|
183
|
+
|
|
184
|
+
CTree[Tree[last_arc].chld].ancest.back() = static_cast<unsigned int>(VTree.size());
|
|
185
|
+
// std::cerr << "[HTMiner::Add_vec] updated CTree[" << Tree[last_arc].chld
|
|
186
|
+
// << "].ancest.back()=" << CTree[Tree[last_arc].chld].ancest.back() << std::endl;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
//std::cerr << "[HTMiner::Add_vec] exiting" << std::endl;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
} // namespace htminer
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <cmath>
|
|
5
|
+
#include "load_inst.hpp"
|
|
6
|
+
|
|
7
|
+
namespace htminer {
|
|
8
|
+
void Build_MDD(std::vector<int>& items, std::vector<int>& items_lim);
|
|
9
|
+
|
|
10
|
+
class Arc {
|
|
11
|
+
public:
|
|
12
|
+
unsigned int chld;
|
|
13
|
+
unsigned int sibl;
|
|
14
|
+
unsigned int freq;
|
|
15
|
+
unsigned int anct;
|
|
16
|
+
int itmset;
|
|
17
|
+
int item;
|
|
18
|
+
|
|
19
|
+
Arc(unsigned int _itm, int _itmset, unsigned int _anc) {
|
|
20
|
+
chld = 0;
|
|
21
|
+
sibl = 0;
|
|
22
|
+
freq = 0;
|
|
23
|
+
itmset = _itmset;
|
|
24
|
+
anct = _anc;
|
|
25
|
+
item = _itm;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
Arc() {
|
|
29
|
+
chld = 0;
|
|
30
|
+
sibl = 0;
|
|
31
|
+
freq = 0;
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
class VArc {
|
|
36
|
+
public:
|
|
37
|
+
unsigned int sibl;
|
|
38
|
+
std::vector<int> seq;
|
|
39
|
+
|
|
40
|
+
VArc(std::vector<int>& items, unsigned int _sib) {
|
|
41
|
+
sibl = _sib;
|
|
42
|
+
seq.swap(items);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
VArc() {
|
|
46
|
+
sibl = 0;
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
class CArc {
|
|
51
|
+
public:
|
|
52
|
+
std::vector<int> seq;
|
|
53
|
+
std::vector<unsigned int> ancest;
|
|
54
|
+
|
|
55
|
+
CArc(std::vector<unsigned int>& _anc, std::vector<int>& items) {
|
|
56
|
+
ancest.swap(_anc);
|
|
57
|
+
seq.swap(items);
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
extern std::vector<Arc> Tree;
|
|
62
|
+
extern std::vector<VArc> VTree;
|
|
63
|
+
extern std::vector<CArc> CTree;
|
|
64
|
+
}
|
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
#include <iostream>
|
|
2
|
+
#include <time.h>
|
|
3
|
+
#include "freq_miner.hpp"
|
|
4
|
+
#include "build_mdd.hpp"
|
|
5
|
+
#include "utility.hpp"
|
|
6
|
+
|
|
7
|
+
namespace htminer {
|
|
8
|
+
void Out_patt(vector<int>& seq, unsigned int freq);
|
|
9
|
+
void Extend_patt(Pattern& _patt);
|
|
10
|
+
void Mine_vec(unsigned int seq_ID, int pos, int num_found, vector<unsigned int>& ancest, vector<int>& items, unsigned int inod, int sgn);
|
|
11
|
+
|
|
12
|
+
unsigned long long int num_patt = 0;
|
|
13
|
+
|
|
14
|
+
vector<bool> ilist;
|
|
15
|
+
vector<bool> slist;
|
|
16
|
+
|
|
17
|
+
vector<Pattern> pot_patt;
|
|
18
|
+
vector<VPattern> pot_vpatt;
|
|
19
|
+
vector<unsigned int> last_strpnt;
|
|
20
|
+
vector<unsigned int> ancest_base;
|
|
21
|
+
vector<int> DFS_numfound;
|
|
22
|
+
|
|
23
|
+
Pattern _patt;
|
|
24
|
+
VPattern _vpatt;
|
|
25
|
+
|
|
26
|
+
int itmset_size;
|
|
27
|
+
int last_neg;
|
|
28
|
+
|
|
29
|
+
bool ilist_nempty;
|
|
30
|
+
|
|
31
|
+
void Freq_miner() {
|
|
32
|
+
collectedPatterns.clear();
|
|
33
|
+
vector<int> list;
|
|
34
|
+
|
|
35
|
+
for (int i = 0; i < L; ++i) {
|
|
36
|
+
if (DFS[i].freq >= theta) {
|
|
37
|
+
list.push_back(-i-1);
|
|
38
|
+
if (itmset_exists)
|
|
39
|
+
list.push_back(i+1);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
for (int i = 0; i < DFS.size(); ++i)
|
|
44
|
+
DFS[i].list = list;
|
|
45
|
+
|
|
46
|
+
while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
|
|
47
|
+
if (DFS.back().freq >= theta)
|
|
48
|
+
Extend_patt(DFS.back());
|
|
49
|
+
else {
|
|
50
|
+
DFS.pop_back();
|
|
51
|
+
if (!VDFS.empty() && VDFS.back().ass_patt == DFS.size())
|
|
52
|
+
VDFS.pop_back();
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
void Extend_patt(Pattern& _pattern) {
|
|
58
|
+
|
|
59
|
+
swap(_patt, _pattern);
|
|
60
|
+
DFS.pop_back();
|
|
61
|
+
|
|
62
|
+
slist = vector<bool>(L, 0);
|
|
63
|
+
ilist_nempty = 0;
|
|
64
|
+
|
|
65
|
+
if (itmset_exists) {
|
|
66
|
+
ilist = vector<bool>(L, 0);
|
|
67
|
+
for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it) {
|
|
68
|
+
if (*it < 0)
|
|
69
|
+
slist[-(*it) - 1] = 1;
|
|
70
|
+
else {
|
|
71
|
+
ilist[(*it) - 1] = 1;
|
|
72
|
+
ilist_nempty = 1;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it)
|
|
78
|
+
slist[-(*it) - 1] = 1;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
last_neg = _patt.seq.size() - 1;
|
|
82
|
+
while (_patt.seq[last_neg] > 0)
|
|
83
|
+
--last_neg;
|
|
84
|
+
itmset_size = _patt.seq.size() - last_neg;
|
|
85
|
+
|
|
86
|
+
pot_patt = vector<Pattern>(L + L * ilist_nempty);
|
|
87
|
+
if (!CTree.empty())
|
|
88
|
+
pot_vpatt = vector<VPattern>(L + L * ilist_nempty);
|
|
89
|
+
|
|
90
|
+
last_strpnt = vector<unsigned int>(L, 0);
|
|
91
|
+
|
|
92
|
+
if (!VDFS.empty() && VDFS.back().ass_patt == DFS.size()) {
|
|
93
|
+
swap(_vpatt, VDFS.back());
|
|
94
|
+
VDFS.pop_back();
|
|
95
|
+
for (unsigned int pnt = 0; pnt < _vpatt.str_pnt.size(); ++pnt) {
|
|
96
|
+
if (_vpatt.str_pnt[pnt] < 0)
|
|
97
|
+
Mine_vec(_vpatt.seq_ID[pnt], -_vpatt.str_pnt[pnt], -1, ancest_base, CTree[_vpatt.seq_ID[pnt]].seq, 0, -1); //starting search from vpatt should start from 1 position ahead of pointer
|
|
98
|
+
else //-1:no need to check ancest for remaining itemset items
|
|
99
|
+
Mine_vec(_vpatt.seq_ID[pnt], _vpatt.str_pnt[pnt], -1, ancest_base, VTree[_vpatt.seq_ID[pnt]].seq, 0, 1);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
vector<unsigned int> DFS_itm; //for initial itemset extention
|
|
104
|
+
vector<unsigned int> DFS_seq; //for initial itemset extention
|
|
105
|
+
if (ilist_nempty)
|
|
106
|
+
DFS_numfound.clear(); //tracks whether the current itemset is found anywhere along the search path, by counting how many of the current items in the itemset are found
|
|
107
|
+
for (unsigned int pnt = 0; pnt < _patt.str_pnt.size(); ++pnt) {
|
|
108
|
+
DFS_itm.push_back(_patt.str_pnt[pnt]);
|
|
109
|
+
while(!DFS_itm.empty()) {
|
|
110
|
+
unsigned int cur_sibl = DFS_itm.back();
|
|
111
|
+
DFS_itm.pop_back();
|
|
112
|
+
if (Tree[cur_sibl].itmset < 0) {
|
|
113
|
+
unsigned int carc = Tree[cur_sibl].chld;
|
|
114
|
+
Mine_vec(carc, 0, -1, CTree[carc].ancest, CTree[carc].seq, _patt.str_pnt[pnt], -1);
|
|
115
|
+
cur_sibl = CTree[carc].ancest.back();
|
|
116
|
+
while (cur_sibl != 0) {
|
|
117
|
+
Mine_vec(cur_sibl - 1, 0, -1, CTree[carc].ancest, VTree[cur_sibl - 1].seq, _patt.str_pnt[pnt], 1);
|
|
118
|
+
cur_sibl = VTree[cur_sibl - 1].sibl;
|
|
119
|
+
}
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
cur_sibl = Tree[cur_sibl].chld;
|
|
123
|
+
while (cur_sibl != 0) {
|
|
124
|
+
int cur_itm = Tree[cur_sibl].item;
|
|
125
|
+
if (cur_itm < 0) {
|
|
126
|
+
cur_itm = -cur_itm;
|
|
127
|
+
if (slist[cur_itm - 1]) {
|
|
128
|
+
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
129
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
130
|
+
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
131
|
+
}
|
|
132
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
|
|
133
|
+
DFS_seq.push_back(cur_sibl);
|
|
134
|
+
if (ilist_nempty) {
|
|
135
|
+
if (cur_itm == -_patt.seq[last_neg])
|
|
136
|
+
DFS_numfound.push_back(1);
|
|
137
|
+
else
|
|
138
|
+
DFS_numfound.push_back(0);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
else {
|
|
143
|
+
if (ilist[cur_itm - 1]) {
|
|
144
|
+
pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
|
|
145
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
146
|
+
pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
|
|
147
|
+
}
|
|
148
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
149
|
+
DFS_itm.push_back(cur_sibl);
|
|
150
|
+
}
|
|
151
|
+
cur_sibl = Tree[cur_sibl].sibl;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
if (ilist_nempty) {
|
|
155
|
+
for (int i = 0; i < L; ++i) {
|
|
156
|
+
if (ilist[i])
|
|
157
|
+
last_strpnt[i] = pot_patt[i + L].str_pnt.size();
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
while(!DFS_seq.empty()) {
|
|
161
|
+
unsigned int cur_sibl = DFS_seq.back();
|
|
162
|
+
DFS_seq.pop_back();
|
|
163
|
+
int num_found = 0;
|
|
164
|
+
if (ilist_nempty) {
|
|
165
|
+
num_found = DFS_numfound.back();
|
|
166
|
+
DFS_numfound.pop_back();
|
|
167
|
+
}
|
|
168
|
+
if (Tree[cur_sibl].itmset < 0) {
|
|
169
|
+
unsigned int carc = Tree[cur_sibl].chld;
|
|
170
|
+
Mine_vec(carc, 0, num_found, CTree[carc].ancest, CTree[carc].seq, _patt.str_pnt[pnt], -1);
|
|
171
|
+
cur_sibl = CTree[carc].ancest.back();
|
|
172
|
+
while (cur_sibl != 0) {
|
|
173
|
+
Mine_vec(cur_sibl - 1, 0, num_found, CTree[carc].ancest, VTree[cur_sibl - 1].seq, _patt.str_pnt[pnt], 1);
|
|
174
|
+
cur_sibl = VTree[cur_sibl - 1].sibl;
|
|
175
|
+
}
|
|
176
|
+
continue;
|
|
177
|
+
}
|
|
178
|
+
cur_sibl = Tree[cur_sibl].chld;
|
|
179
|
+
while (cur_sibl != 0) {
|
|
180
|
+
int cur_itm = Tree[cur_sibl].item;
|
|
181
|
+
if (cur_itm > 0) {
|
|
182
|
+
if (num_found == itmset_size && ilist[cur_itm - 1] && (abs(Tree[Tree[cur_sibl].anct].itmset) < abs(Tree[_patt.str_pnt[pnt]].itmset) || !check_parent(Tree[cur_sibl].anct, _patt.str_pnt[pnt], last_strpnt[cur_itm - 1], pot_patt[cur_itm + L - 1].str_pnt))) {
|
|
183
|
+
pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
|
|
184
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
185
|
+
pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
|
|
186
|
+
}
|
|
187
|
+
if (slist[cur_itm - 1] && abs(Tree[Tree[cur_sibl].anct].itmset) <= abs(Tree[_patt.str_pnt[pnt]].itmset)) {
|
|
188
|
+
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
189
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
190
|
+
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
191
|
+
}
|
|
192
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
|
|
193
|
+
DFS_seq.push_back(cur_sibl);
|
|
194
|
+
if (ilist_nempty) {
|
|
195
|
+
if (num_found < itmset_size && cur_itm == abs(_patt.seq[last_neg + num_found]))
|
|
196
|
+
DFS_numfound.push_back(num_found + 1);
|
|
197
|
+
else
|
|
198
|
+
DFS_numfound.push_back(num_found);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
else {
|
|
203
|
+
cur_itm = -cur_itm;
|
|
204
|
+
if (slist[cur_itm - 1] && abs(Tree[Tree[cur_sibl].anct].itmset) <= abs(Tree[_patt.str_pnt[pnt]].itmset)) {
|
|
205
|
+
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
206
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0)
|
|
207
|
+
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
208
|
+
}
|
|
209
|
+
if (Tree[cur_sibl].chld != 0 || Tree[cur_sibl].itmset < 0) {
|
|
210
|
+
DFS_seq.push_back(cur_sibl);
|
|
211
|
+
if (ilist_nempty) {
|
|
212
|
+
if (cur_itm == -_patt.seq[last_neg])
|
|
213
|
+
DFS_numfound.push_back(1);
|
|
214
|
+
else
|
|
215
|
+
DFS_numfound.push_back(0);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
cur_sibl = Tree[cur_sibl].sibl;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
vector<int> ilistp;
|
|
225
|
+
vector<int> slistp;
|
|
226
|
+
for (vector<int>::iterator it = _patt.list.begin(); it != _patt.list.end(); ++it) {
|
|
227
|
+
if (*it > 0 && pot_patt[(*it) + L - 1].freq >= theta)
|
|
228
|
+
ilistp.push_back(*it);
|
|
229
|
+
else if (*it < 0 && pot_patt[-(*it) - 1].freq >= theta) {
|
|
230
|
+
if (itmset_exists)
|
|
231
|
+
slistp.push_back(-(*it));
|
|
232
|
+
ilistp.push_back(*it);
|
|
233
|
+
slistp.push_back(*it);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
for (vector<int>::iterator it = ilistp.begin(); it != ilistp.end(); ++it) {
|
|
238
|
+
int p;
|
|
239
|
+
if (*it < 0)
|
|
240
|
+
p = -(*it) - 1;
|
|
241
|
+
else
|
|
242
|
+
p = (*it) - 1 + L;
|
|
243
|
+
DFS.emplace_back();
|
|
244
|
+
swap(DFS.back(), pot_patt[p]);
|
|
245
|
+
DFS.back().seq = _patt.seq;
|
|
246
|
+
DFS.back().seq.push_back(*it);
|
|
247
|
+
if (*it < 0)
|
|
248
|
+
DFS.back().list = slistp;
|
|
249
|
+
else
|
|
250
|
+
DFS.back().list = ilistp;
|
|
251
|
+
if (!CTree.empty() && !pot_vpatt[p].str_pnt.empty()) {
|
|
252
|
+
pot_vpatt[p].ass_patt = DFS.size() - 1;
|
|
253
|
+
VDFS.emplace_back();
|
|
254
|
+
swap(VDFS.back(), pot_vpatt[p]);
|
|
255
|
+
}
|
|
256
|
+
if (b_disp || b_write)
|
|
257
|
+
Out_patt(DFS.back().seq, DFS.back().freq);
|
|
258
|
+
htminer::collectedPatterns.emplace_back(DFS.back().seq);
|
|
259
|
+
++num_patt;
|
|
260
|
+
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
void Mine_vec(unsigned int seq_ID, int pos, int num_found, vector<unsigned int>& ancest, vector<int>& items, unsigned int pnt, int sgn) {
|
|
266
|
+
|
|
267
|
+
vector<bool> found(L + L * ilist_nempty, 0);
|
|
268
|
+
int num_ext = 0;
|
|
269
|
+
|
|
270
|
+
if (num_found == -1) {
|
|
271
|
+
while (pos < items.size() && items[pos] > 0 && num_ext < _patt.list.size()) {
|
|
272
|
+
int cur_itm = items[pos];
|
|
273
|
+
if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
|
|
274
|
+
if (pos + 1 < items.size()) {
|
|
275
|
+
pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
|
|
276
|
+
pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (pos + 1));
|
|
277
|
+
}
|
|
278
|
+
++pot_patt[cur_itm + L - 1].freq;
|
|
279
|
+
found[cur_itm + L - 1] = 1;
|
|
280
|
+
++num_ext;
|
|
281
|
+
}
|
|
282
|
+
++pos;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
for (unsigned int k = pos; k < items.size() && num_ext < _patt.list.size(); ++k) {
|
|
287
|
+
int cur_itm = abs(items[k]);
|
|
288
|
+
if (items[k] < 0)
|
|
289
|
+
num_found = 0;
|
|
290
|
+
if (slist[cur_itm - 1] && !found[cur_itm - 1]) {
|
|
291
|
+
if (ancest.empty() || abs(Tree[ancest[cur_itm - 1]].itmset) <= abs(Tree[pnt].itmset)) {
|
|
292
|
+
if (k + 1 < items.size()) {
|
|
293
|
+
pot_vpatt[cur_itm - 1].seq_ID.push_back(seq_ID);
|
|
294
|
+
pot_vpatt[cur_itm - 1].str_pnt.push_back(sgn * (k + 1));
|
|
295
|
+
}
|
|
296
|
+
++pot_patt[cur_itm - 1].freq;
|
|
297
|
+
}
|
|
298
|
+
found[cur_itm - 1] = 1;
|
|
299
|
+
++num_ext;
|
|
300
|
+
}
|
|
301
|
+
if (num_found == itmset_size) {
|
|
302
|
+
if (ilist[cur_itm - 1] && !found[cur_itm + L - 1]) {
|
|
303
|
+
if (ancest.empty() || abs(Tree[ancest[cur_itm - 1]].itmset) < abs(Tree[pnt].itmset) || !check_parent(ancest[cur_itm - 1], pnt, last_strpnt[cur_itm - 1], pot_patt[cur_itm + L - 1].str_pnt)) {
|
|
304
|
+
if (k + 1 < items.size()) {
|
|
305
|
+
pot_vpatt[cur_itm + L - 1].seq_ID.push_back(seq_ID);
|
|
306
|
+
pot_vpatt[cur_itm + L - 1].str_pnt.push_back(sgn * (k + 1));
|
|
307
|
+
}
|
|
308
|
+
++pot_patt[cur_itm + L - 1].freq;
|
|
309
|
+
}
|
|
310
|
+
found[cur_itm + L - 1] = 1;
|
|
311
|
+
++num_ext;
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
else if (cur_itm == abs(_patt.seq[last_neg + num_found]))
|
|
315
|
+
++num_found;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
void Out_patt(vector<int>& seq, unsigned int freq) {
|
|
321
|
+
|
|
322
|
+
ofstream file_o;
|
|
323
|
+
if (b_write)
|
|
324
|
+
file_o.open(out_file, std::ios::app);
|
|
325
|
+
|
|
326
|
+
for (int ii = 0; ii < seq.size(); ii++) {
|
|
327
|
+
if (b_disp)
|
|
328
|
+
cout << seq[ii] << " ";
|
|
329
|
+
if (b_write)
|
|
330
|
+
file_o << seq[ii] << " ";
|
|
331
|
+
}
|
|
332
|
+
if (b_disp)
|
|
333
|
+
cout << endl;
|
|
334
|
+
if (b_write)
|
|
335
|
+
file_o << endl;
|
|
336
|
+
|
|
337
|
+
if (b_disp)
|
|
338
|
+
cout << "************** Freq: " << freq << endl;
|
|
339
|
+
if (b_write) {
|
|
340
|
+
file_o << "************** Freq: " << freq << endl;
|
|
341
|
+
file_o.close();
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "load_inst.hpp"
|
|
4
|
+
#include "build_mdd.hpp"
|
|
5
|
+
|
|
6
|
+
namespace htminer {
|
|
7
|
+
void Freq_miner();
|
|
8
|
+
|
|
9
|
+
class Pattern {
|
|
10
|
+
public:
|
|
11
|
+
|
|
12
|
+
vector<int> seq;
|
|
13
|
+
vector<unsigned int> str_pnt;
|
|
14
|
+
vector<int> list;
|
|
15
|
+
|
|
16
|
+
unsigned long long int freq;
|
|
17
|
+
|
|
18
|
+
Pattern(int item) {
|
|
19
|
+
seq.push_back(item);
|
|
20
|
+
freq = 0;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
Pattern(size_t _pnt, bool _res) {
|
|
24
|
+
str_pnt.reserve(_pnt);
|
|
25
|
+
freq = 0;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
Pattern() {
|
|
29
|
+
freq = 0;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
class VPattern {
|
|
36
|
+
public:
|
|
37
|
+
|
|
38
|
+
unsigned long long int ass_patt;
|
|
39
|
+
|
|
40
|
+
vector<int> str_pnt;
|
|
41
|
+
vector<unsigned int> seq_ID;
|
|
42
|
+
|
|
43
|
+
VPattern(unsigned long long int _patt) {
|
|
44
|
+
ass_patt = _patt;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
VPattern(size_t _pnt, bool a) {
|
|
48
|
+
str_pnt.reserve(_pnt);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
VPattern() {}
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
extern unsigned long long int num_patt;
|
|
56
|
+
extern vector<Pattern> DFS;
|
|
57
|
+
extern vector<VPattern> VDFS;
|
|
58
|
+
|
|
59
|
+
}
|
|
60
|
+
|