effspm 0.2.7__cp39-cp39-win_amd64.whl → 0.3.3__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/_effspm.cp39-win_amd64.pyd +0 -0
- effspm/_effspm.cpp +961 -210
- effspm/btminer/src/build_mdd.cpp +42 -17
- effspm/btminer/src/build_mdd.hpp +13 -19
- effspm/btminer/src/freq_miner.cpp +134 -49
- effspm/btminer/src/freq_miner.hpp +16 -0
- effspm/btminer/src/load_inst.cpp +211 -126
- effspm/btminer/src/load_inst.hpp +22 -4
- effspm/btminer/src/main.cpp +83 -0
- effspm/btminer/src/utility.cpp +26 -41
- effspm/btminer/src/utility.hpp +6 -30
- effspm/freq_miner.hpp +2 -1
- effspm/htminer/src/build_mdd.cpp +46 -124
- effspm/htminer/src/build_mdd.hpp +56 -49
- effspm/htminer/src/freq_miner.cpp +341 -307
- effspm/htminer/src/freq_miner.hpp +39 -40
- effspm/htminer/src/load_inst.cpp +287 -336
- effspm/htminer/src/load_inst.hpp +23 -6
- effspm/htminer/src/main.cpp +97 -0
- effspm/htminer/src/utility.cpp +38 -57
- effspm/htminer/src/utility.hpp +9 -64
- effspm/largebm/src/build_mdd.cpp +69 -110
- effspm/largebm/src/build_mdd.hpp +22 -37
- effspm/largebm/src/freq_miner.cpp +241 -291
- effspm/largebm/src/freq_miner.hpp +25 -36
- effspm/largebm/src/load_inst.cpp +20 -26
- effspm/largebm/src/load_inst.hpp +24 -34
- effspm/largebm/src/main.cpp +95 -0
- effspm/largebm/src/utility.cpp +11 -21
- effspm/largebm/src/utility.hpp +7 -10
- effspm/largehm/src/build_mdd.cpp +75 -110
- effspm/largehm/src/build_mdd.hpp +53 -73
- effspm/largehm/src/freq_miner.cpp +134 -191
- effspm/largehm/src/freq_miner.hpp +37 -60
- effspm/largehm/src/load_inst.cpp +137 -174
- effspm/largehm/src/load_inst.hpp +13 -50
- effspm/largehm/src/main.cpp +95 -0
- effspm/largehm/src/utility.cpp +46 -28
- effspm/largehm/src/utility.hpp +18 -16
- effspm/largepp/src/freq_miner.cpp +184 -156
- effspm/largepp/src/freq_miner.hpp +11 -36
- effspm/largepp/src/load_inst.cpp +32 -12
- effspm/largepp/src/load_inst.hpp +15 -9
- effspm/largepp/src/main.cpp +108 -0
- effspm/largepp/src/pattern.hpp +31 -0
- effspm/load_inst.cpp +8 -8
- effspm/load_inst.hpp +1 -1
- effspm/main.cpp +103 -0
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
- effspm-0.3.3.dist-info/RECORD +60 -0
- effspm-0.2.7.dist-info/RECORD +0 -53
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
effspm/largehm/src/utility.cpp
CHANGED
|
@@ -2,37 +2,55 @@
|
|
|
2
2
|
#include "build_mdd.hpp"
|
|
3
3
|
#include "load_inst.hpp"
|
|
4
4
|
#include <iostream>
|
|
5
|
+
|
|
5
6
|
namespace largehm {
|
|
6
|
-
std::vector<std::vector<int>> collected;
|
|
7
|
-
bool check_parent(unsigned long long int cur_anct, unsigned long long int str_pnt, unsigned long long int start, vector<unsigned long long int>& strpnt_vec) {
|
|
8
|
-
|
|
9
|
-
vector<unsigned long long int> ancestors;
|
|
10
|
-
|
|
11
|
-
while (abs(Tree[cur_anct].itmset) > abs(Tree[str_pnt].itmset)) {
|
|
12
|
-
if (Tree[cur_anct].item > 0)
|
|
13
|
-
ancestors.push_back(cur_anct);
|
|
14
|
-
cur_anct = Tree[cur_anct].anct;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
if (abs(Tree[cur_anct].itmset) == abs(Tree[str_pnt].itmset))
|
|
18
|
-
return 1;
|
|
19
|
-
else {
|
|
20
|
-
for (vector<unsigned long long int>::reverse_iterator it = ancestors.rbegin(); it != ancestors.rend(); ++it) {
|
|
21
|
-
for (unsigned int i = start; i < strpnt_vec.size(); ++i) {
|
|
22
|
-
if (strpnt_vec[i] == *it)
|
|
23
|
-
return 1;
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
return 0;
|
|
29
7
|
|
|
8
|
+
using namespace std;
|
|
9
|
+
|
|
10
|
+
// storage for mined patterns (each pattern = vector<int>)
|
|
11
|
+
std::vector<std::vector<int>> collectedPatterns;
|
|
12
|
+
|
|
13
|
+
bool check_parent(unsigned long long int cur_anct,
|
|
14
|
+
unsigned long long int str_pnt,
|
|
15
|
+
unsigned long long int start,
|
|
16
|
+
vector<unsigned long long int>& strpnt_vec) {
|
|
17
|
+
|
|
18
|
+
vector<unsigned long long int> ancestors;
|
|
19
|
+
|
|
20
|
+
while (abs(Tree[cur_anct].itmset) > abs(Tree[str_pnt].itmset)) {
|
|
21
|
+
if (Tree[cur_anct].item > 0)
|
|
22
|
+
ancestors.push_back(cur_anct);
|
|
23
|
+
cur_anct = Tree[cur_anct].anct;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (abs(Tree[cur_anct].itmset) == abs(Tree[str_pnt].itmset))
|
|
27
|
+
return 1;
|
|
28
|
+
else {
|
|
29
|
+
for (vector<unsigned long long int>::reverse_iterator it = ancestors.rbegin();
|
|
30
|
+
it != ancestors.rend(); ++it) {
|
|
31
|
+
for (unsigned int i = start; i < strpnt_vec.size(); ++i) {
|
|
32
|
+
if (strpnt_vec[i] == *it)
|
|
33
|
+
return 1;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return 0;
|
|
30
39
|
}
|
|
31
40
|
|
|
41
|
+
float give_time(clock_t kk) {
|
|
42
|
+
float ll = ((float)kk) / CLOCKS_PER_SEC;
|
|
43
|
+
return ll;
|
|
44
|
+
}
|
|
32
45
|
|
|
46
|
+
// clear vector used to return patterns to Python
|
|
47
|
+
void ClearCollected() {
|
|
48
|
+
collectedPatterns.clear();
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// return reference so Python wrapper can build list[list[int]]
|
|
52
|
+
const std::vector<std::vector<int>>& GetCollected() {
|
|
53
|
+
return collectedPatterns;
|
|
54
|
+
}
|
|
33
55
|
|
|
34
|
-
//
|
|
35
|
-
// float ll = ((float)kk) / CLOCKS_PER_SEC;
|
|
36
|
-
// return ll;
|
|
37
|
-
// }
|
|
38
|
-
}
|
|
56
|
+
} // namespace largehm
|
effspm/largehm/src/utility.hpp
CHANGED
|
@@ -6,24 +6,26 @@
|
|
|
6
6
|
#include "build_mdd.hpp"
|
|
7
7
|
|
|
8
8
|
namespace largehm {
|
|
9
|
-
using namespace std;
|
|
10
9
|
|
|
11
|
-
|
|
10
|
+
using std::vector;
|
|
11
|
+
using std::string;
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
collected.clear();
|
|
16
|
-
}
|
|
17
|
-
inline const std::vector<std::vector<int>>& GetCollected() {
|
|
18
|
-
return collected;
|
|
19
|
-
}
|
|
13
|
+
// time helper
|
|
14
|
+
float give_time(clock_t kk);
|
|
20
15
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
bool check_parent(unsigned long long int cur_anct, unsigned long long int str_pnt, unsigned long long int start, vector<unsigned long long int>& strpnt_vec);
|
|
16
|
+
// ancestor-check helper
|
|
17
|
+
bool check_parent(unsigned long long int cur_anct,
|
|
18
|
+
unsigned long long int str_pnt,
|
|
19
|
+
unsigned long long int start,
|
|
20
|
+
vector<unsigned long long int>& strpnt_vec);
|
|
27
21
|
|
|
22
|
+
// pattern collection for Python wrapper
|
|
23
|
+
extern std::vector<std::vector<int>> collectedPatterns;
|
|
28
24
|
|
|
29
|
-
|
|
25
|
+
// clear collected patterns between runs
|
|
26
|
+
void ClearCollected();
|
|
27
|
+
|
|
28
|
+
// get collected patterns after mining
|
|
29
|
+
const std::vector<std::vector<int>>& GetCollected();
|
|
30
|
+
|
|
31
|
+
} // namespace largehm
|
|
@@ -1,170 +1,198 @@
|
|
|
1
|
+
#include <algorithm>
|
|
2
|
+
#include <cstdlib>
|
|
3
|
+
#include <fstream>
|
|
1
4
|
#include <iostream>
|
|
2
|
-
|
|
5
|
+
|
|
3
6
|
#include "freq_miner.hpp"
|
|
7
|
+
#include "pattern.hpp"
|
|
8
|
+
#include "load_inst.hpp"
|
|
4
9
|
#include "utility.hpp"
|
|
5
10
|
|
|
6
|
-
namespace largepp
|
|
7
|
-
{
|
|
8
|
-
void Out_patt(vector<int>& seq, unsigned int freq);
|
|
9
|
-
void Extend_patt(Pattern& _patt);
|
|
10
|
-
|
|
11
|
-
unsigned long long int num_patt = 0;
|
|
12
|
-
|
|
13
|
-
Pattern _patt;
|
|
11
|
+
namespace largepp {
|
|
14
12
|
|
|
15
|
-
|
|
13
|
+
using std::abs;
|
|
14
|
+
using std::cout;
|
|
15
|
+
using std::endl;
|
|
16
|
+
using std::ofstream;
|
|
17
|
+
using std::swap;
|
|
18
|
+
using std::vector;
|
|
16
19
|
|
|
17
|
-
|
|
20
|
+
static void Out_patt(vector<int>& seq, unsigned int freq);
|
|
21
|
+
static void Extend_patt(Pattern& _pattern);
|
|
18
22
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
islist.push_back(i);
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
for (int i = 0; i < DFS.size(); ++i) {
|
|
25
|
-
DFS[i].ilist = islist;
|
|
26
|
-
DFS[i].slist = islist;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
|
|
30
|
-
if (DFS.back().freq >= theta)
|
|
31
|
-
Extend_patt(DFS.back());
|
|
32
|
-
else
|
|
33
|
-
DFS.pop_back();
|
|
34
|
-
}
|
|
23
|
+
unsigned long long int num_patt = 0; // counter for emitted patterns
|
|
24
|
+
static Pattern _patt; // scratch pattern (for in-place extend)
|
|
35
25
|
|
|
26
|
+
/* ------------------------------------------------------------------ */
|
|
27
|
+
/* Driver */
|
|
28
|
+
/* ------------------------------------------------------------------ */
|
|
29
|
+
void Freq_miner()
|
|
30
|
+
{
|
|
31
|
+
// Build the candidate item list once (items that pass minsup at length-1)
|
|
32
|
+
vector<int> islist;
|
|
33
|
+
islist.reserve(L);
|
|
34
|
+
for (unsigned int i = 0; i < L; ++i) {
|
|
35
|
+
if (DFS[i].freq >= theta) islist.push_back(static_cast<int>(i));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Seed each 1-length pattern’s extension lists
|
|
39
|
+
for (unsigned int i = 0; i < DFS.size(); ++i) {
|
|
40
|
+
DFS[i].ilist = islist;
|
|
41
|
+
DFS[i].slist = islist;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// DFS over the stack, extending only nodes whose current support ≥ theta
|
|
45
|
+
while (!DFS.empty() && give_time(std::clock() - start_time) < time_limit) {
|
|
46
|
+
if (DFS.back().freq >= theta) {
|
|
47
|
+
Extend_patt(DFS.back());
|
|
48
|
+
} else {
|
|
49
|
+
DFS.pop_back();
|
|
50
|
+
}
|
|
51
|
+
}
|
|
36
52
|
}
|
|
37
53
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
54
|
+
/* ------------------------------------------------------------------ */
|
|
55
|
+
/* Extend_patt: given a frequent pattern, enumerate its i- and s-ext */
|
|
56
|
+
/* ------------------------------------------------------------------ */
|
|
57
|
+
static void Extend_patt(Pattern& _pattern)
|
|
58
|
+
{
|
|
59
|
+
swap(_patt, _pattern); // work on local scratch
|
|
60
|
+
DFS.pop_back(); // remove from stack
|
|
61
|
+
|
|
62
|
+
// Quick presence tables for allowed i-/s-extensions
|
|
63
|
+
vector<bool> slist(L, false);
|
|
64
|
+
vector<bool> ilist(L, false);
|
|
65
|
+
for (int idx : _patt.slist) slist[static_cast<size_t>(idx)] = true;
|
|
66
|
+
for (int idx : _patt.ilist) ilist[static_cast<size_t>(idx)] = true;
|
|
67
|
+
|
|
68
|
+
// Potential children buffers:
|
|
69
|
+
vector<Pattern> pot_patt(L * 2); // [0..L-1] = i-ext, [L..2L-1] = s-ext
|
|
70
|
+
|
|
71
|
+
// Find last negative from the end (boundary between itemsets)
|
|
72
|
+
int last_neg = static_cast<int>(_patt.seq.size()) - 1;
|
|
73
|
+
while (last_neg >= 0 && _patt.seq[static_cast<size_t>(last_neg)] > 0) --last_neg;
|
|
74
|
+
|
|
75
|
+
// Scan occurrences to build supports for all valid next-steps
|
|
76
|
+
for (size_t i = 0; i < _patt.str_pnt.size(); ++i) {
|
|
77
|
+
vector<bool> found(L * 2, false);
|
|
78
|
+
|
|
79
|
+
unsigned long long seq_id = _patt.seq_ID[i];
|
|
80
|
+
unsigned int j = _patt.str_pnt[i] + 1;
|
|
81
|
+
|
|
82
|
+
// 1) Same itemset (i-extension) forward until end-of-itemset (>0)
|
|
83
|
+
while (j < items[seq_id].size() && items[seq_id][j] > 0) {
|
|
84
|
+
int cur_itm = items[seq_id][j];
|
|
85
|
+
if (ilist[static_cast<size_t>(cur_itm - 1)]) {
|
|
86
|
+
pot_patt[static_cast<size_t>(cur_itm - 1)].seq_ID.push_back(seq_id);
|
|
87
|
+
pot_patt[static_cast<size_t>(cur_itm - 1)].str_pnt.push_back(j);
|
|
88
|
+
++pot_patt[static_cast<size_t>(cur_itm - 1)].freq;
|
|
89
|
+
found[static_cast<size_t>(cur_itm - 1)] = true;
|
|
90
|
+
}
|
|
91
|
+
++j;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// 2) Later itemsets (s-extension), plus special re-open i-ext rule
|
|
95
|
+
int num_itmfnd = 0;
|
|
96
|
+
for (size_t k = j; k < items[seq_id].size(); ++k) {
|
|
97
|
+
int cur = items[seq_id][k];
|
|
98
|
+
int cur_itm = abs(cur);
|
|
99
|
+
|
|
100
|
+
if (cur < 0) num_itmfnd = 0; // new itemset boundary seen
|
|
101
|
+
|
|
102
|
+
// s-extension: add cur_itm as new itemset element
|
|
103
|
+
if (slist[static_cast<size_t>(cur_itm - 1)] &&
|
|
104
|
+
!found[static_cast<size_t>(L + cur_itm - 1)]) {
|
|
105
|
+
pot_patt[static_cast<size_t>(L + cur_itm - 1)].seq_ID.push_back(seq_id);
|
|
106
|
+
pot_patt[static_cast<size_t>(L + cur_itm - 1)].str_pnt.push_back(k);
|
|
107
|
+
++pot_patt[static_cast<size_t>(L + cur_itm - 1)].freq;
|
|
108
|
+
found[static_cast<size_t>(L + cur_itm - 1)] = true;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// once we've seen the suffix of the last itemset fully,
|
|
112
|
+
// allow i-extension again (across future itemsets)
|
|
113
|
+
if (num_itmfnd == static_cast<int>(_patt.seq.size()) - last_neg) {
|
|
114
|
+
if (ilist[static_cast<size_t>(cur_itm - 1)] &&
|
|
115
|
+
!found[static_cast<size_t>(cur_itm - 1)]) {
|
|
116
|
+
pot_patt[static_cast<size_t>(cur_itm - 1)].seq_ID.push_back(seq_id);
|
|
117
|
+
pot_patt[static_cast<size_t>(cur_itm - 1)].str_pnt.push_back(k);
|
|
118
|
+
++pot_patt[static_cast<size_t>(cur_itm - 1)].freq;
|
|
119
|
+
found[static_cast<size_t>(cur_itm - 1)] = true;
|
|
120
|
+
}
|
|
121
|
+
} else if (last_neg + num_itmfnd >= 0 &&
|
|
122
|
+
cur_itm == abs(_patt.seq[static_cast<size_t>(last_neg + num_itmfnd)])) {
|
|
123
|
+
++num_itmfnd;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Filter children by support threshold
|
|
129
|
+
vector<int> ilistp;
|
|
130
|
+
vector<int> slistp;
|
|
131
|
+
ilistp.reserve(_patt.ilist.size());
|
|
132
|
+
slistp.reserve(_patt.slist.size());
|
|
133
|
+
|
|
134
|
+
for (int idx : _patt.ilist) {
|
|
135
|
+
if (pot_patt[static_cast<size_t>(idx)].freq >= theta)
|
|
136
|
+
ilistp.push_back(idx);
|
|
137
|
+
}
|
|
138
|
+
for (int idx : _patt.slist) {
|
|
139
|
+
if (pot_patt[static_cast<size_t>(idx + static_cast<int>(L))].freq >= theta)
|
|
140
|
+
slistp.push_back(idx);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Push all i-extensions
|
|
144
|
+
for (int idx : ilistp) {
|
|
145
|
+
DFS.emplace_back();
|
|
146
|
+
swap(DFS.back(), pot_patt[static_cast<size_t>(idx)]);
|
|
147
|
+
|
|
148
|
+
DFS.back().seq = _patt.seq;
|
|
149
|
+
DFS.back().seq.push_back(idx + 1);
|
|
150
|
+
|
|
151
|
+
DFS.back().slist = slistp;
|
|
152
|
+
DFS.back().ilist = ilistp;
|
|
153
|
+
|
|
154
|
+
// ALWAYS emit (so collected fills even if !b_disp && !b_write)
|
|
155
|
+
Out_patt(DFS.back().seq, DFS.back().freq);
|
|
156
|
+
++num_patt;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Push all s-extensions
|
|
160
|
+
for (int idx : slistp) {
|
|
161
|
+
DFS.emplace_back();
|
|
162
|
+
swap(DFS.back(), pot_patt[static_cast<size_t>(idx + static_cast<int>(L))]);
|
|
163
|
+
|
|
164
|
+
DFS.back().seq = _patt.seq;
|
|
165
|
+
DFS.back().seq.push_back(-(idx + 1)); // negative encodes new itemset
|
|
166
|
+
|
|
167
|
+
DFS.back().slist = slistp;
|
|
168
|
+
DFS.back().ilist = slistp; // as in original code
|
|
169
|
+
|
|
170
|
+
// ALWAYS emit
|
|
171
|
+
Out_patt(DFS.back().seq, DFS.back().freq);
|
|
172
|
+
++num_patt;
|
|
173
|
+
}
|
|
137
174
|
}
|
|
138
175
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
if (b_disp)
|
|
160
|
-
cout << "************** Freq: " << freq << endl;
|
|
161
|
-
if (b_write) {
|
|
162
|
-
file_o << "************** Freq: " << freq << endl;
|
|
163
|
-
file_o.close();
|
|
164
|
-
}
|
|
165
|
-
}
|
|
176
|
+
/* ------------------------------------------------------------------ */
|
|
177
|
+
/* Out_patt: append to buffer; optionally print/write */
|
|
178
|
+
/* ------------------------------------------------------------------ */
|
|
179
|
+
static void Out_patt(vector<int>& seq, unsigned int freq)
|
|
180
|
+
{
|
|
181
|
+
// Always append to in-memory results returned to Python
|
|
182
|
+
largepp::collected.push_back(seq);
|
|
183
|
+
|
|
184
|
+
ofstream file_o;
|
|
185
|
+
if (b_write) file_o.open(out_file, std::ios::app);
|
|
186
|
+
|
|
187
|
+
if (b_disp) {
|
|
188
|
+
for (int v : seq) cout << v << " ";
|
|
189
|
+
cout << "\n************** Freq: " << freq << endl;
|
|
190
|
+
}
|
|
191
|
+
if (b_write) {
|
|
192
|
+
for (int v : seq) file_o << v << " ";
|
|
193
|
+
file_o << "\n************** Freq: " << freq << "\n";
|
|
194
|
+
file_o.close();
|
|
195
|
+
}
|
|
166
196
|
}
|
|
167
197
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
198
|
+
} // namespace largepp
|
|
@@ -1,43 +1,18 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
|
-
#include
|
|
4
|
-
|
|
5
|
-
{
|
|
6
|
-
void Freq_miner();
|
|
7
|
-
|
|
8
|
-
class Pattern {
|
|
9
|
-
public:
|
|
10
|
-
|
|
11
|
-
vector<int> seq;
|
|
12
|
-
vector<unsigned int> str_pnt;
|
|
13
|
-
vector<unsigned long long int> seq_ID;
|
|
14
|
-
|
|
15
|
-
vector<int> slist;
|
|
16
|
-
vector<int> ilist;
|
|
17
|
-
|
|
18
|
-
unsigned long long int freq;
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <string>
|
|
19
5
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
seq.push_back(_seq[i]);
|
|
24
|
-
seq.push_back(item);
|
|
25
|
-
freq = 0;
|
|
26
|
-
}
|
|
6
|
+
#include "pattern.hpp" // defines largepp::Pattern
|
|
7
|
+
#include "load_inst.hpp" // declares externs: items, L, theta, DFS, etc.
|
|
8
|
+
#include "utility.hpp" // flags, collected buffer, timers, helpers
|
|
27
9
|
|
|
10
|
+
namespace largepp {
|
|
28
11
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
freq = 0;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
Pattern() {
|
|
35
|
-
freq = 0;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
};
|
|
39
|
-
|
|
40
|
-
extern vector<Pattern> DFS; //DFS queue of potential patterns to extend
|
|
12
|
+
// Public entry point
|
|
13
|
+
void Freq_miner();
|
|
41
14
|
|
|
15
|
+
// (defined in the .cpp)
|
|
42
16
|
extern unsigned long long int num_patt;
|
|
43
|
-
|
|
17
|
+
|
|
18
|
+
} // namespace largepp
|
effspm/largepp/src/load_inst.cpp
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
#include <sstream>
|
|
3
3
|
#include <algorithm>
|
|
4
4
|
#include <cmath>
|
|
5
|
+
#include <fstream>
|
|
5
6
|
#include "load_inst.hpp"
|
|
6
7
|
#include "freq_miner.hpp"
|
|
7
8
|
#include "utility.hpp"
|
|
@@ -36,6 +37,7 @@ bool Load_instance(string& items_file, double thresh)
|
|
|
36
37
|
|
|
37
38
|
cout << "\nPreprocess done in " << give_time(clock() - kk) << " seconds\n\n";
|
|
38
39
|
|
|
40
|
+
DFS.clear();
|
|
39
41
|
DFS.reserve(L);
|
|
40
42
|
for (unsigned int i = 0; i < L; ++i)
|
|
41
43
|
DFS.emplace_back(-int(i) - 1);
|
|
@@ -48,10 +50,28 @@ bool Load_instance(string& items_file, double thresh)
|
|
|
48
50
|
return false;
|
|
49
51
|
else
|
|
50
52
|
theta = (thresh < 1.0) ? ceil(thresh * N) : thresh;
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
53
|
+
if (b_disp)
|
|
54
|
+
cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
|
|
55
|
+
if (b_disp)
|
|
56
|
+
cout << "Found " << N << " sequence, with max line len " << M
|
|
57
|
+
<< ", and " << L << " items, and " << E << " enteries\n";
|
|
58
|
+
|
|
59
|
+
// ───────────────────────────────────────────────────────────
|
|
60
|
+
// DEBUG snapshot of seeds right after loading
|
|
61
|
+
// ───────────────────────────────────────────────────────────
|
|
62
|
+
{
|
|
63
|
+
unsigned long long seeds_ge_theta = 0, seeds_nonzero = 0, max_freq = 0;
|
|
64
|
+
for (size_t i = 0; i < DFS.size(); ++i) {
|
|
65
|
+
if (DFS[i].freq > 0) ++seeds_nonzero;
|
|
66
|
+
if (DFS[i].freq >= theta) ++seeds_ge_theta;
|
|
67
|
+
if (DFS[i].freq > max_freq) max_freq = DFS[i].freq;
|
|
68
|
+
}
|
|
69
|
+
// std::cout << " theta=" << theta
|
|
70
|
+
// << " | DFS.size=" << DFS.size()
|
|
71
|
+
// << " | seeds>=theta=" << seeds_ge_theta
|
|
72
|
+
// << " | seeds>0=" << seeds_nonzero
|
|
73
|
+
// << " | max_seed_freq=" << max_freq << "\n";
|
|
74
|
+
}
|
|
55
75
|
|
|
56
76
|
return true;
|
|
57
77
|
}
|
|
@@ -67,12 +87,12 @@ void Load_py(const pybind11::object& data, double thresh)
|
|
|
67
87
|
int max_id = 0;
|
|
68
88
|
M = 0; E = 0;
|
|
69
89
|
for (auto& seq : items) {
|
|
70
|
-
M = max<unsigned int>(M, seq.size());
|
|
90
|
+
M = max<unsigned int>(M, static_cast<unsigned int>(seq.size()));
|
|
71
91
|
E += seq.size();
|
|
72
92
|
for (int x : seq)
|
|
73
93
|
max_id = max(max_id, abs(x));
|
|
74
94
|
}
|
|
75
|
-
L = max_id;
|
|
95
|
+
L = static_cast<unsigned int>(max_id);
|
|
76
96
|
theta = (thresh < 1.0) ? ceil(thresh * N) : thresh;
|
|
77
97
|
|
|
78
98
|
DFS.clear();
|
|
@@ -82,7 +102,7 @@ void Load_py(const pybind11::object& data, double thresh)
|
|
|
82
102
|
}
|
|
83
103
|
|
|
84
104
|
/* =================================================================
|
|
85
|
-
* The professor’s original helpers — untouched
|
|
105
|
+
* The professor’s original helpers — untouched except minor safety
|
|
86
106
|
* ================================================================= */
|
|
87
107
|
static bool Preprocess(string& inst, double thresh)
|
|
88
108
|
{
|
|
@@ -97,7 +117,7 @@ static bool Preprocess(string& inst, double thresh)
|
|
|
97
117
|
string itm;
|
|
98
118
|
while (word >> itm) {
|
|
99
119
|
ditem = stoi(itm);
|
|
100
|
-
L = max<unsigned int>(L, abs(ditem));
|
|
120
|
+
L = max<unsigned int>(L, static_cast<unsigned int>(abs(ditem)));
|
|
101
121
|
|
|
102
122
|
if (freq.size() < L) {
|
|
103
123
|
freq.resize(L, 0);
|
|
@@ -170,7 +190,7 @@ static void Load_items_pre(string& inst)
|
|
|
170
190
|
}
|
|
171
191
|
if (empty_seq) continue;
|
|
172
192
|
|
|
173
|
-
++N; E += size_m; M = max<unsigned int>(M, size_m);
|
|
193
|
+
++N; E += size_m; M = max<unsigned int>(M, static_cast<unsigned int>(size_m));
|
|
174
194
|
}
|
|
175
195
|
}
|
|
176
196
|
|
|
@@ -193,8 +213,8 @@ static bool Load_items(string& inst)
|
|
|
193
213
|
|
|
194
214
|
while (word >> itm) {
|
|
195
215
|
ditem = stoi(itm);
|
|
196
|
-
if (L < abs(ditem)) {
|
|
197
|
-
L = abs(ditem);
|
|
216
|
+
if (L < static_cast<unsigned int>(abs(ditem))) {
|
|
217
|
+
L = static_cast<unsigned int>(abs(ditem));
|
|
198
218
|
while (DFS.size() < L) {
|
|
199
219
|
DFS.emplace_back(-int(DFS.size()) - 1);
|
|
200
220
|
counted.push_back(0);
|
|
@@ -211,7 +231,7 @@ static bool Load_items(string& inst)
|
|
|
211
231
|
++size_m;
|
|
212
232
|
}
|
|
213
233
|
E += size_m;
|
|
214
|
-
M = max<unsigned int>(M, size_m);
|
|
234
|
+
M = max<unsigned int>(M, static_cast<unsigned int>(size_m));
|
|
215
235
|
}
|
|
216
236
|
return true;
|
|
217
237
|
}
|