effspm 0.1.5__cp311-cp311-win_amd64.whl → 0.2.6__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/__init__.py +9 -2
- effspm/_core.cpp +91 -13
- effspm/_effspm.cp311-win_amd64.pyd +0 -0
- effspm/_effspm.cpp +609 -0
- effspm/btminer/src/build_mdd.cpp +63 -0
- effspm/btminer/src/build_mdd.hpp +40 -0
- effspm/btminer/src/freq_miner.cpp +179 -0
- effspm/btminer/src/freq_miner.hpp +39 -0
- effspm/btminer/src/load_inst.cpp +200 -0
- effspm/btminer/src/load_inst.hpp +25 -0
- effspm/btminer/src/utility.cpp +65 -0
- effspm/btminer/src/utility.hpp +40 -0
- effspm/freq_miner.hpp +7 -2
- effspm/htminer/src/build_mdd.cpp +192 -0
- effspm/htminer/src/build_mdd.hpp +64 -0
- effspm/htminer/src/freq_miner.cpp +350 -0
- effspm/htminer/src/freq_miner.hpp +60 -0
- effspm/htminer/src/load_inst.cpp +394 -0
- effspm/htminer/src/load_inst.hpp +23 -0
- effspm/htminer/src/utility.cpp +72 -0
- effspm/htminer/src/utility.hpp +77 -0
- effspm/largebm/src/build_mdd.cpp +137 -0
- effspm/largebm/src/build_mdd.hpp +47 -0
- effspm/largebm/src/freq_miner.cpp +349 -0
- effspm/largebm/src/freq_miner.hpp +48 -0
- effspm/largebm/src/load_inst.cpp +230 -0
- effspm/largebm/src/load_inst.hpp +45 -0
- effspm/largebm/src/utility.cpp +45 -0
- effspm/largebm/src/utility.hpp +18 -0
- effspm/largehm/src/build_mdd.cpp +174 -0
- effspm/largehm/src/build_mdd.hpp +93 -0
- effspm/largehm/src/freq_miner.cpp +445 -0
- effspm/largehm/src/freq_miner.hpp +77 -0
- effspm/largehm/src/load_inst.cpp +357 -0
- effspm/largehm/src/load_inst.hpp +64 -0
- effspm/largehm/src/utility.cpp +38 -0
- effspm/largehm/src/utility.hpp +29 -0
- effspm/largepp/src/freq_miner.cpp +170 -0
- effspm/largepp/src/freq_miner.hpp +43 -0
- effspm/largepp/src/load_inst.cpp +219 -0
- effspm/largepp/src/load_inst.hpp +28 -0
- effspm/largepp/src/utility.cpp +34 -0
- effspm/largepp/src/utility.hpp +21 -0
- effspm/load_inst.hpp +18 -12
- effspm-0.2.6.dist-info/METADATA +237 -0
- effspm-0.2.6.dist-info/RECORD +53 -0
- {effspm-0.1.5.dist-info → effspm-0.2.6.dist-info}/WHEEL +1 -1
- effspm/_core.cp311-win_amd64.pyd +0 -0
- effspm-0.1.5.dist-info/METADATA +0 -38
- effspm-0.1.5.dist-info/RECORD +0 -14
- {effspm-0.1.5.dist-info → effspm-0.2.6.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.1.5.dist-info → effspm-0.2.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
// File: effspm/largebm/src/load_inst.cpp
|
|
2
|
+
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <iostream>
|
|
5
|
+
#include <unordered_map>
|
|
6
|
+
#include "load_inst.hpp"
|
|
7
|
+
#include "build_mdd.hpp"
|
|
8
|
+
#include "freq_miner.hpp"
|
|
9
|
+
#include "utility.hpp"
|
|
10
|
+
|
|
11
|
+
namespace largebm {
|
|
12
|
+
|
|
13
|
+
// Forward declaration for Add_arc
|
|
14
|
+
int Add_arc(int item, unsigned long long int last_arc, int& itmset,
|
|
15
|
+
std::unordered_map<int, unsigned long long int>& ancest_map);
|
|
16
|
+
|
|
17
|
+
// Global MDD tree and other globals (declared in headers)
|
|
18
|
+
std::vector<Arc> Tree;
|
|
19
|
+
|
|
20
|
+
void Build_MDD(std::vector<int>& items) {
|
|
21
|
+
std::unordered_map<int, unsigned long long int> ancest_map;
|
|
22
|
+
unsigned long long int last_arc = 0;
|
|
23
|
+
int itmset = 0;
|
|
24
|
+
|
|
25
|
+
for (auto it = items.begin(); it != items.end(); ++it) {
|
|
26
|
+
last_arc = Add_arc(*it, last_arc, itmset, ancest_map);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
int Add_arc(int item, unsigned long long int last_arc, int& itmset,
|
|
32
|
+
std::unordered_map<int, unsigned long long int>& ancest_map) {
|
|
33
|
+
|
|
34
|
+
unsigned idx = std::abs(item) - 1;
|
|
35
|
+
|
|
36
|
+
// ─── DEBUG ────────────────────────────────────────────────
|
|
37
|
+
// std::cout << "[Add_arc] item=" << item
|
|
38
|
+
// << " idx=" << idx
|
|
39
|
+
// << " last_arc=" << last_arc
|
|
40
|
+
// << " Tree.size=" << Tree.size()
|
|
41
|
+
// << " DFS.size=" << DFS.size()
|
|
42
|
+
// << std::endl;
|
|
43
|
+
|
|
44
|
+
// Ensure DFS can hold this index
|
|
45
|
+
if (idx >= DFS.size()) {
|
|
46
|
+
// std::cout << "[Add_arc] • resizing DFS to " << (idx + 1) << std::endl;
|
|
47
|
+
DFS.reserve(idx + 1);
|
|
48
|
+
while (DFS.size() <= idx) {
|
|
49
|
+
DFS.emplace_back(-static_cast<int>(DFS.size()) - 1); // Pattern(-id)
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
unsigned long long int anct;
|
|
54
|
+
auto p = ancest_map.find(std::abs(item));
|
|
55
|
+
if (p == ancest_map.end()) {
|
|
56
|
+
anct = 0;
|
|
57
|
+
} else {
|
|
58
|
+
anct = p->second;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (item < 0) {
|
|
62
|
+
++itmset;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Before accessing Tree[last_arc].chld, check bounds
|
|
66
|
+
if (last_arc >= Tree.size()) {
|
|
67
|
+
// std::cout << "[Add_arc] !!! last_arc OOB last_arc="
|
|
68
|
+
// << last_arc << " Tree.size=" << Tree.size()
|
|
69
|
+
// << std::endl;
|
|
70
|
+
// We still proceed so we can see crash context:
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
unsigned long long int last_sibl = 0;
|
|
74
|
+
if (last_arc < Tree.size()) {
|
|
75
|
+
last_sibl = Tree[last_arc].chld;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (last_sibl == 0) {
|
|
79
|
+
// Insert new node as first child
|
|
80
|
+
Tree.emplace_back(item, itmset, anct);
|
|
81
|
+
last_sibl = Tree.size() - 1;
|
|
82
|
+
|
|
83
|
+
if (last_arc < Tree.size()) {
|
|
84
|
+
Tree[last_arc].chld = last_sibl;
|
|
85
|
+
}
|
|
86
|
+
if (anct == 0) {
|
|
87
|
+
// Debug before DFS access
|
|
88
|
+
// std::cout << "[Add_arc] • DFS access at index=" << (std::abs(item) - 1)
|
|
89
|
+
// << " DFS.size=" << DFS.size() << std::endl;
|
|
90
|
+
DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
} else {
|
|
94
|
+
|
|
95
|
+
// Walk siblings until find matching item or end
|
|
96
|
+
while (true) {
|
|
97
|
+
if (last_sibl >= Tree.size()) {
|
|
98
|
+
// std::cout << "[Add_arc] !!! last_sibl OOB last_sibl="
|
|
99
|
+
// << last_sibl << " Tree.size=" << Tree.size()
|
|
100
|
+
// << std::endl;
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
103
|
+
if (Tree[last_sibl].item == item) {
|
|
104
|
+
break;
|
|
105
|
+
}
|
|
106
|
+
if (Tree[last_sibl].sibl == 0) {
|
|
107
|
+
Tree.emplace_back(item, itmset, anct);
|
|
108
|
+
Tree[last_sibl].sibl = Tree.size() - 1;
|
|
109
|
+
last_sibl = Tree.size() - 1;
|
|
110
|
+
if (anct == 0) {
|
|
111
|
+
// std::cout << "[Add_arc] • DFS access at index=" << (std::abs(item) - 1)
|
|
112
|
+
// << " DFS.size=" << DFS.size() << std::endl;
|
|
113
|
+
DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
|
|
114
|
+
}
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
last_sibl = Tree[last_sibl].sibl;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
if (anct == 0) {
|
|
122
|
+
// std::cout << "[Add_arc] • increment DFS.freq at index=" << (std::abs(item) - 1)
|
|
123
|
+
// << " DFS.size=" << DFS.size() << std::endl;
|
|
124
|
+
DFS[std::abs(item) - 1].freq++;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (last_sibl < Tree.size()) {
|
|
128
|
+
// std::cout << "[Add_arc] • increment Tree.freq at node=" << last_sibl
|
|
129
|
+
// << " Tree.size=" << Tree.size() << std::endl;
|
|
130
|
+
Tree[last_sibl].freq++;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
ancest_map[std::abs(item)] = last_sibl;
|
|
134
|
+
return last_sibl;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
} // namespace largebm
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include<vector>
|
|
4
|
+
#include <cmath>
|
|
5
|
+
#include "load_inst.hpp"
|
|
6
|
+
|
|
7
|
+
namespace largebm {
|
|
8
|
+
void Build_MDD(std::vector<int>& items);
|
|
9
|
+
|
|
10
|
+
class Arc {
|
|
11
|
+
public:
|
|
12
|
+
|
|
13
|
+
unsigned long long int chld;
|
|
14
|
+
unsigned long long int sibl;
|
|
15
|
+
unsigned long long int freq;
|
|
16
|
+
unsigned long long int anct;
|
|
17
|
+
int itmset;
|
|
18
|
+
int item;
|
|
19
|
+
|
|
20
|
+
Arc(int _itm, int _itmset, unsigned long long int _anc) {
|
|
21
|
+
itmset = _itmset;
|
|
22
|
+
anct = _anc;
|
|
23
|
+
item = _itm;
|
|
24
|
+
freq = 0;
|
|
25
|
+
chld = 0;
|
|
26
|
+
sibl = 0;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
Arc(int _itm, int _anc) {
|
|
30
|
+
item = _itm;
|
|
31
|
+
anct = _anc;
|
|
32
|
+
freq = 0;
|
|
33
|
+
chld = 0;
|
|
34
|
+
sibl = 0;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
Arc() {
|
|
38
|
+
freq = 0;
|
|
39
|
+
chld = 0;
|
|
40
|
+
sibl = 0;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
extern std::vector<Arc> Tree;
|
|
47
|
+
}
|
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
#include <vector>
|
|
2
|
+
#include <algorithm>
|
|
3
|
+
#include <iostream>
|
|
4
|
+
#include <fstream>
|
|
5
|
+
#include <ctime>
|
|
6
|
+
#include <unordered_map>
|
|
7
|
+
#include <unordered_set>
|
|
8
|
+
|
|
9
|
+
#include "freq_miner.hpp" // must come before load_inst.hpp
|
|
10
|
+
#include "load_inst.hpp"
|
|
11
|
+
#include "utility.hpp"
|
|
12
|
+
#include "build_mdd.hpp"
|
|
13
|
+
|
|
14
|
+
namespace largebm {
|
|
15
|
+
|
|
16
|
+
// Helper declarations (must match headers exactly)
|
|
17
|
+
static void Out_patt(const std::vector<int>& seq, unsigned long long freq);
|
|
18
|
+
static void Extend_patt(Pattern& patt);
|
|
19
|
+
|
|
20
|
+
// Globals (declared once; types must match freq_miner.hpp)
|
|
21
|
+
unsigned long long int num_patt = 0;
|
|
22
|
+
std::vector<bool> ilist;
|
|
23
|
+
std::vector<bool> slist;
|
|
24
|
+
std::vector<int> DFS_numfound;
|
|
25
|
+
Pattern _patt;
|
|
26
|
+
|
|
27
|
+
void Freq_miner() {
|
|
28
|
+
// ─── RESET per‐run state ──────────────────────────────────────
|
|
29
|
+
collected.clear();
|
|
30
|
+
num_patt = 0;
|
|
31
|
+
// Ensure DFS has at least L entries (so DFS[i] is valid for 0..L-1)
|
|
32
|
+
if (static_cast<int>(DFS.size()) < static_cast<int>(L)) {
|
|
33
|
+
DFS.resize(L);
|
|
34
|
+
}
|
|
35
|
+
// ─────────────────────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
std::vector<int> list;
|
|
38
|
+
|
|
39
|
+
if (use_list) {
|
|
40
|
+
// List‐based routine
|
|
41
|
+
std::vector<int> empty_pref;
|
|
42
|
+
Freq_miner_list(items, empty_pref, theta, collected);
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// MDD‐based initialization
|
|
47
|
+
for (int i = 0; i < static_cast<int>(L); ++i) {
|
|
48
|
+
if (DFS[i].freq >= theta) {
|
|
49
|
+
list.push_back(-i - 1);
|
|
50
|
+
if (itmset_exists) {
|
|
51
|
+
list.push_back(i + 1);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
for (size_t i = 0; i < DFS.size(); ++i) {
|
|
56
|
+
DFS[i].list = list;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
|
|
60
|
+
if (DFS.back().freq >= theta) {
|
|
61
|
+
Extend_patt(DFS.back());
|
|
62
|
+
} else {
|
|
63
|
+
DFS.pop_back();
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
void Extend_patt(Pattern& _pattern) {
|
|
69
|
+
swap(_patt, _pattern);
|
|
70
|
+
DFS.pop_back();
|
|
71
|
+
|
|
72
|
+
slist = std::vector<bool>(L, false);
|
|
73
|
+
bool ilist_nempty = false;
|
|
74
|
+
|
|
75
|
+
if (itmset_exists) {
|
|
76
|
+
ilist = std::vector<bool>(L, false);
|
|
77
|
+
for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
|
|
78
|
+
if (*it < 0) {
|
|
79
|
+
slist[-(*it) - 1] = true;
|
|
80
|
+
} else {
|
|
81
|
+
ilist[(*it) - 1] = true;
|
|
82
|
+
ilist_nempty = true;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
} else {
|
|
86
|
+
for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
|
|
87
|
+
slist[-(*it) - 1] = true;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
int itmset_size = 1;
|
|
92
|
+
int last_neg = static_cast<int>(_patt.seq.size()) - 1;
|
|
93
|
+
while (_patt.seq[last_neg] > 0) {
|
|
94
|
+
--last_neg;
|
|
95
|
+
++itmset_size;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
std::vector<Pattern> pot_patt(L + (ilist_nempty ? L : 0));
|
|
99
|
+
std::vector<unsigned long long int> DFS_patt_init;
|
|
100
|
+
std::vector<unsigned long long int> DFS_patt;
|
|
101
|
+
if (ilist_nempty) {
|
|
102
|
+
DFS_numfound.clear();
|
|
103
|
+
}
|
|
104
|
+
std::vector<unsigned long long int> last_strpnt(L, 0);
|
|
105
|
+
|
|
106
|
+
for (unsigned long long int pnt = 0; pnt < _patt.str_pnt.size(); ++pnt) {
|
|
107
|
+
DFS_patt_init.push_back(_patt.str_pnt[pnt]);
|
|
108
|
+
while (!DFS_patt_init.empty()) {
|
|
109
|
+
unsigned long long int cur_sibl = Tree[DFS_patt_init.back()].chld;
|
|
110
|
+
DFS_patt_init.pop_back();
|
|
111
|
+
while (cur_sibl != 0) {
|
|
112
|
+
int cur_itm = Tree[cur_sibl].item;
|
|
113
|
+
if (cur_itm < 0) {
|
|
114
|
+
cur_itm = -cur_itm;
|
|
115
|
+
if (slist[cur_itm - 1]) {
|
|
116
|
+
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
117
|
+
if (Tree[cur_sibl].chld != 0) {
|
|
118
|
+
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
|
|
122
|
+
DFS_patt.push_back(cur_sibl);
|
|
123
|
+
if (ilist_nempty) {
|
|
124
|
+
if (cur_itm == -_patt.seq[last_neg]) {
|
|
125
|
+
DFS_numfound.push_back(1);
|
|
126
|
+
} else {
|
|
127
|
+
DFS_numfound.push_back(0);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
} else {
|
|
132
|
+
if (ilist[cur_itm - 1]) {
|
|
133
|
+
pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
|
|
134
|
+
if (Tree[cur_sibl].chld != 0) {
|
|
135
|
+
pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
|
|
139
|
+
DFS_patt_init.push_back(cur_sibl);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
cur_sibl = Tree[cur_sibl].sibl;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
if (ilist_nempty) {
|
|
146
|
+
for (int i = 0; i < static_cast<int>(L); ++i) {
|
|
147
|
+
if (ilist[i]) {
|
|
148
|
+
last_strpnt[i] = pot_patt[i + L].str_pnt.size();
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
while (!DFS_patt.empty()) {
|
|
153
|
+
unsigned long long int cur_sibl = Tree[DFS_patt.back()].chld;
|
|
154
|
+
DFS_patt.pop_back();
|
|
155
|
+
int num_found = 0;
|
|
156
|
+
if (ilist_nempty) {
|
|
157
|
+
num_found = DFS_numfound.back();
|
|
158
|
+
DFS_numfound.pop_back();
|
|
159
|
+
}
|
|
160
|
+
while (cur_sibl != 0) {
|
|
161
|
+
int cur_itm = Tree[cur_sibl].item;
|
|
162
|
+
if (cur_itm > 0) {
|
|
163
|
+
if (num_found == itmset_size &&
|
|
164
|
+
ilist[cur_itm - 1] &&
|
|
165
|
+
(Tree[Tree[cur_sibl].anct].itmset < Tree[_patt.str_pnt[pnt]].itmset ||
|
|
166
|
+
!check_parent(cur_sibl, _patt.str_pnt[pnt],
|
|
167
|
+
last_strpnt[cur_itm - 1],
|
|
168
|
+
pot_patt[cur_itm + L - 1].str_pnt))) {
|
|
169
|
+
pot_patt[cur_itm + L - 1].freq += Tree[cur_sibl].freq;
|
|
170
|
+
if (Tree[cur_sibl].chld != 0) {
|
|
171
|
+
pot_patt[cur_itm + L - 1].str_pnt.push_back(cur_sibl);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
if (slist[cur_itm - 1] &&
|
|
175
|
+
Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
|
|
176
|
+
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
177
|
+
if (Tree[cur_sibl].chld != 0) {
|
|
178
|
+
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
|
|
182
|
+
DFS_patt.push_back(cur_sibl);
|
|
183
|
+
if (ilist_nempty) {
|
|
184
|
+
if (num_found < itmset_size &&
|
|
185
|
+
cur_itm == std::abs(_patt.seq[last_neg + num_found])) {
|
|
186
|
+
DFS_numfound.push_back(num_found + 1);
|
|
187
|
+
} else {
|
|
188
|
+
DFS_numfound.push_back(num_found);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
} else {
|
|
193
|
+
cur_itm = -cur_itm;
|
|
194
|
+
if (slist[cur_itm - 1] &&
|
|
195
|
+
Tree[Tree[cur_sibl].anct].itmset <= Tree[_patt.str_pnt[pnt]].itmset) {
|
|
196
|
+
pot_patt[cur_itm - 1].freq += Tree[cur_sibl].freq;
|
|
197
|
+
if (Tree[cur_sibl].chld != 0) {
|
|
198
|
+
pot_patt[cur_itm - 1].str_pnt.push_back(cur_sibl);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
if (Tree[cur_sibl].chld != static_cast<unsigned long long>(-1)) {
|
|
202
|
+
DFS_patt.push_back(cur_sibl);
|
|
203
|
+
if (ilist_nempty) {
|
|
204
|
+
if (cur_itm == -_patt.seq[last_neg]) {
|
|
205
|
+
DFS_numfound.push_back(1);
|
|
206
|
+
} else {
|
|
207
|
+
DFS_numfound.push_back(0);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
cur_sibl = Tree[cur_sibl].sibl;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
std::vector<int> ilistp;
|
|
218
|
+
std::vector<int> slistp;
|
|
219
|
+
for (auto it = _patt.list.begin(); it != _patt.list.end(); ++it) {
|
|
220
|
+
int idx = (*it < 0) ? (-(*it) - 1) : ((*it) - 1 + static_cast<int>(L));
|
|
221
|
+
if (*it > 0 && pot_patt[idx].freq >= theta) {
|
|
222
|
+
ilistp.push_back(*it);
|
|
223
|
+
} else if (*it < 0 && pot_patt[-(*it) - 1].freq >= theta) {
|
|
224
|
+
if (itmset_exists) {
|
|
225
|
+
slistp.push_back(-(*it));
|
|
226
|
+
}
|
|
227
|
+
ilistp.push_back(*it);
|
|
228
|
+
slistp.push_back(*it);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
for (auto it = ilistp.begin(); it != ilistp.end(); ++it) {
|
|
233
|
+
int p;
|
|
234
|
+
if (*it < 0) {
|
|
235
|
+
p = -(*it) - 1;
|
|
236
|
+
} else {
|
|
237
|
+
p = (*it) - 1 + static_cast<int>(L);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
DFS.emplace_back();
|
|
241
|
+
swap(DFS.back(), pot_patt[p]);
|
|
242
|
+
DFS.back().seq = _patt.seq;
|
|
243
|
+
DFS.back().seq.push_back(*it);
|
|
244
|
+
if (*it < 0) {
|
|
245
|
+
DFS.back().list = slistp;
|
|
246
|
+
} else {
|
|
247
|
+
DFS.back().list = ilistp;
|
|
248
|
+
}
|
|
249
|
+
if (b_disp || b_write) {
|
|
250
|
+
Out_patt(DFS.back().seq, DFS.back().freq);
|
|
251
|
+
}
|
|
252
|
+
++num_patt;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
void Out_patt(const std::vector<int>& seq, unsigned long long freq) {
|
|
257
|
+
if (b_disp || b_write) {
|
|
258
|
+
std::ofstream file_o;
|
|
259
|
+
if (b_write) {
|
|
260
|
+
file_o.open(out_file, std::ios::app);
|
|
261
|
+
}
|
|
262
|
+
for (int v : seq) {
|
|
263
|
+
if (b_disp) std::cout << v << ' ';
|
|
264
|
+
if (b_write) file_o << v << ' ';
|
|
265
|
+
}
|
|
266
|
+
if (b_disp) std::cout << '\n';
|
|
267
|
+
if (b_write) file_o << '\n';
|
|
268
|
+
|
|
269
|
+
if (b_disp) {
|
|
270
|
+
std::cout << "************** Freq: " << freq << '\n';
|
|
271
|
+
}
|
|
272
|
+
if (b_write) {
|
|
273
|
+
file_o << "************** Freq: " << freq << '\n';
|
|
274
|
+
file_o.close();
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
collected.push_back(seq);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
void Freq_miner_list(const std::vector<std::vector<int>>& db,
|
|
281
|
+
std::vector<int>& prefix,
|
|
282
|
+
unsigned long long minsup,
|
|
283
|
+
std::vector<std::vector<int>>& out) {
|
|
284
|
+
// 1) count single‐item support (one count per sequence)
|
|
285
|
+
std::unordered_map<int, unsigned long long> freq;
|
|
286
|
+
for (auto const& seq : db) {
|
|
287
|
+
std::unordered_set<int> seen;
|
|
288
|
+
for (int x : seq) {
|
|
289
|
+
if (seen.insert(x).second) {
|
|
290
|
+
++freq[x];
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// 2) collect the frequent candidates
|
|
296
|
+
std::vector<std::pair<int, unsigned long long>> cand;
|
|
297
|
+
cand.reserve(freq.size());
|
|
298
|
+
for (auto& p : freq) {
|
|
299
|
+
if (p.second >= minsup) {
|
|
300
|
+
cand.emplace_back(p.first, p.second);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// 3) sort by absolute item ID
|
|
305
|
+
std::sort(cand.begin(), cand.end(),
|
|
306
|
+
[](const std::pair<int, unsigned long long>& a,
|
|
307
|
+
const std::pair<int, unsigned long long>& b) {
|
|
308
|
+
return std::abs(a.first) < std::abs(b.first);
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
// 4) depth‐first enumerate them
|
|
312
|
+
for (auto const& pr : cand) {
|
|
313
|
+
int item = pr.first;
|
|
314
|
+
prefix.push_back(item);
|
|
315
|
+
|
|
316
|
+
if (use_dic) {
|
|
317
|
+
// “un‐compress” each pattern back to original IDs
|
|
318
|
+
std::vector<int> unmapped;
|
|
319
|
+
unmapped.reserve(prefix.size());
|
|
320
|
+
for (int cid : prefix) {
|
|
321
|
+
int abs_id = std::abs(cid);
|
|
322
|
+
int o = inv_item_dic[abs_id];
|
|
323
|
+
unmapped.push_back(cid < 0 ? -o : o);
|
|
324
|
+
}
|
|
325
|
+
out.push_back(std::move(unmapped));
|
|
326
|
+
} else {
|
|
327
|
+
// just store the raw prefix
|
|
328
|
+
out.push_back(prefix);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// 5) project on the *first* occurrence of `item`
|
|
332
|
+
std::vector<std::vector<int>> proj;
|
|
333
|
+
proj.reserve(db.size());
|
|
334
|
+
for (auto const& seq : db) {
|
|
335
|
+
auto it = std::find(seq.begin(), seq.end(), item);
|
|
336
|
+
if (it != seq.end() && ++it != seq.end()) {
|
|
337
|
+
proj.emplace_back(it, seq.end());
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
if (!proj.empty()) {
|
|
342
|
+
Freq_miner_list(proj, prefix, minsup, out);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
prefix.pop_back();
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
} // namespace largebm
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "load_inst.hpp"
|
|
4
|
+
#include "build_mdd.hpp"
|
|
5
|
+
|
|
6
|
+
namespace largebm {
|
|
7
|
+
|
|
8
|
+
void Freq_miner();
|
|
9
|
+
// recursive helper for the list‐based mode
|
|
10
|
+
void Freq_miner_list(const std::vector<std::vector<int>>& db,
|
|
11
|
+
std::vector<int>& prefix,
|
|
12
|
+
unsigned long long theta,
|
|
13
|
+
std::vector<std::vector<int>>& out);
|
|
14
|
+
class Pattern {
|
|
15
|
+
public:
|
|
16
|
+
|
|
17
|
+
vector<int> seq;
|
|
18
|
+
vector<unsigned long long int> str_pnt;
|
|
19
|
+
vector<int> list;
|
|
20
|
+
|
|
21
|
+
unsigned long long int freq;
|
|
22
|
+
|
|
23
|
+
Pattern(vector<int>& _seq, int item) {
|
|
24
|
+
seq.swap(_seq);
|
|
25
|
+
seq.push_back(item);
|
|
26
|
+
freq = 0;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
Pattern(int item) {
|
|
30
|
+
seq.push_back(item);
|
|
31
|
+
freq = 0;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
Pattern() {
|
|
35
|
+
freq = 0;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
extern unsigned long long int num_patt;
|
|
42
|
+
extern std::vector<bool> ilist;
|
|
43
|
+
extern std::vector<bool> slist;
|
|
44
|
+
extern std::vector<int> DFS_numfound;
|
|
45
|
+
extern Pattern _patt;
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
}
|