effspm 0.2.6__cp310-cp310-win_amd64.whl → 0.3.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/_effspm.cp310-win_amd64.pyd +0 -0
- effspm/_effspm.cpp +310 -240
- effspm/btminer/src/build_mdd.cpp +42 -17
- effspm/btminer/src/build_mdd.hpp +13 -19
- effspm/btminer/src/freq_miner.cpp +134 -49
- effspm/btminer/src/freq_miner.hpp +16 -0
- effspm/btminer/src/load_inst.cpp +196 -121
- effspm/btminer/src/load_inst.hpp +22 -4
- effspm/btminer/src/utility.cpp +26 -41
- effspm/btminer/src/utility.hpp +6 -30
- effspm/freq_miner.hpp +2 -1
- effspm/htminer/src/build_mdd.cpp +33 -86
- effspm/largebm/src/build_mdd.cpp +69 -110
- effspm/largebm/src/build_mdd.hpp +22 -37
- effspm/largebm/src/freq_miner.cpp +241 -291
- effspm/largebm/src/freq_miner.hpp +25 -36
- effspm/largebm/src/load_inst.cpp +20 -26
- effspm/largebm/src/load_inst.hpp +24 -34
- effspm/largebm/src/utility.cpp +11 -21
- effspm/largebm/src/utility.hpp +7 -10
- effspm/largehm/src/freq_miner.cpp +62 -78
- effspm/largehm/src/load_inst.cpp +79 -61
- effspm/largepp/src/freq_miner.cpp +184 -156
- effspm/largepp/src/freq_miner.hpp +11 -36
- effspm/largepp/src/load_inst.cpp +27 -8
- effspm/largepp/src/load_inst.hpp +15 -9
- effspm/largepp/src/pattern.hpp +31 -0
- effspm/load_inst.hpp +1 -1
- {effspm-0.2.6.dist-info → effspm-0.3.0.dist-info}/METADATA +1 -1
- effspm-0.3.0.dist-info/RECORD +54 -0
- effspm-0.2.6.dist-info/RECORD +0 -53
- {effspm-0.2.6.dist-info → effspm-0.3.0.dist-info}/WHEEL +0 -0
- {effspm-0.2.6.dist-info → effspm-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.2.6.dist-info → effspm-0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -1,170 +1,198 @@
|
|
|
1
|
+
#include <algorithm>
|
|
2
|
+
#include <cstdlib>
|
|
3
|
+
#include <fstream>
|
|
1
4
|
#include <iostream>
|
|
2
|
-
|
|
5
|
+
|
|
3
6
|
#include "freq_miner.hpp"
|
|
7
|
+
#include "pattern.hpp"
|
|
8
|
+
#include "load_inst.hpp"
|
|
4
9
|
#include "utility.hpp"
|
|
5
10
|
|
|
6
|
-
namespace largepp
|
|
7
|
-
{
|
|
8
|
-
void Out_patt(vector<int>& seq, unsigned int freq);
|
|
9
|
-
void Extend_patt(Pattern& _patt);
|
|
10
|
-
|
|
11
|
-
unsigned long long int num_patt = 0;
|
|
12
|
-
|
|
13
|
-
Pattern _patt;
|
|
11
|
+
namespace largepp {
|
|
14
12
|
|
|
15
|
-
|
|
13
|
+
using std::abs;
|
|
14
|
+
using std::cout;
|
|
15
|
+
using std::endl;
|
|
16
|
+
using std::ofstream;
|
|
17
|
+
using std::swap;
|
|
18
|
+
using std::vector;
|
|
16
19
|
|
|
17
|
-
|
|
20
|
+
static void Out_patt(vector<int>& seq, unsigned int freq);
|
|
21
|
+
static void Extend_patt(Pattern& _pattern);
|
|
18
22
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
islist.push_back(i);
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
for (int i = 0; i < DFS.size(); ++i) {
|
|
25
|
-
DFS[i].ilist = islist;
|
|
26
|
-
DFS[i].slist = islist;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
while (!DFS.empty() && give_time(clock() - start_time) < time_limit) {
|
|
30
|
-
if (DFS.back().freq >= theta)
|
|
31
|
-
Extend_patt(DFS.back());
|
|
32
|
-
else
|
|
33
|
-
DFS.pop_back();
|
|
34
|
-
}
|
|
23
|
+
unsigned long long int num_patt = 0; // counter for emitted patterns
|
|
24
|
+
static Pattern _patt; // scratch pattern (for in-place extend)
|
|
35
25
|
|
|
26
|
+
/* ------------------------------------------------------------------ */
|
|
27
|
+
/* Driver */
|
|
28
|
+
/* ------------------------------------------------------------------ */
|
|
29
|
+
void Freq_miner()
|
|
30
|
+
{
|
|
31
|
+
// Build the candidate item list once (items that pass minsup at length-1)
|
|
32
|
+
vector<int> islist;
|
|
33
|
+
islist.reserve(L);
|
|
34
|
+
for (unsigned int i = 0; i < L; ++i) {
|
|
35
|
+
if (DFS[i].freq >= theta) islist.push_back(static_cast<int>(i));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Seed each 1-length pattern’s extension lists
|
|
39
|
+
for (unsigned int i = 0; i < DFS.size(); ++i) {
|
|
40
|
+
DFS[i].ilist = islist;
|
|
41
|
+
DFS[i].slist = islist;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// DFS over the stack, extending only nodes whose current support ≥ theta
|
|
45
|
+
while (!DFS.empty() && give_time(std::clock() - start_time) < time_limit) {
|
|
46
|
+
if (DFS.back().freq >= theta) {
|
|
47
|
+
Extend_patt(DFS.back());
|
|
48
|
+
} else {
|
|
49
|
+
DFS.pop_back();
|
|
50
|
+
}
|
|
51
|
+
}
|
|
36
52
|
}
|
|
37
53
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
54
|
+
/* ------------------------------------------------------------------ */
|
|
55
|
+
/* Extend_patt: given a frequent pattern, enumerate its i- and s-ext */
|
|
56
|
+
/* ------------------------------------------------------------------ */
|
|
57
|
+
static void Extend_patt(Pattern& _pattern)
|
|
58
|
+
{
|
|
59
|
+
swap(_patt, _pattern); // work on local scratch
|
|
60
|
+
DFS.pop_back(); // remove from stack
|
|
61
|
+
|
|
62
|
+
// Quick presence tables for allowed i-/s-extensions
|
|
63
|
+
vector<bool> slist(L, false);
|
|
64
|
+
vector<bool> ilist(L, false);
|
|
65
|
+
for (int idx : _patt.slist) slist[static_cast<size_t>(idx)] = true;
|
|
66
|
+
for (int idx : _patt.ilist) ilist[static_cast<size_t>(idx)] = true;
|
|
67
|
+
|
|
68
|
+
// Potential children buffers:
|
|
69
|
+
vector<Pattern> pot_patt(L * 2); // [0..L-1] = i-ext, [L..2L-1] = s-ext
|
|
70
|
+
|
|
71
|
+
// Find last negative from the end (boundary between itemsets)
|
|
72
|
+
int last_neg = static_cast<int>(_patt.seq.size()) - 1;
|
|
73
|
+
while (last_neg >= 0 && _patt.seq[static_cast<size_t>(last_neg)] > 0) --last_neg;
|
|
74
|
+
|
|
75
|
+
// Scan occurrences to build supports for all valid next-steps
|
|
76
|
+
for (size_t i = 0; i < _patt.str_pnt.size(); ++i) {
|
|
77
|
+
vector<bool> found(L * 2, false);
|
|
78
|
+
|
|
79
|
+
unsigned long long seq_id = _patt.seq_ID[i];
|
|
80
|
+
unsigned int j = _patt.str_pnt[i] + 1;
|
|
81
|
+
|
|
82
|
+
// 1) Same itemset (i-extension) forward until end-of-itemset (>0)
|
|
83
|
+
while (j < items[seq_id].size() && items[seq_id][j] > 0) {
|
|
84
|
+
int cur_itm = items[seq_id][j];
|
|
85
|
+
if (ilist[static_cast<size_t>(cur_itm - 1)]) {
|
|
86
|
+
pot_patt[static_cast<size_t>(cur_itm - 1)].seq_ID.push_back(seq_id);
|
|
87
|
+
pot_patt[static_cast<size_t>(cur_itm - 1)].str_pnt.push_back(j);
|
|
88
|
+
++pot_patt[static_cast<size_t>(cur_itm - 1)].freq;
|
|
89
|
+
found[static_cast<size_t>(cur_itm - 1)] = true;
|
|
90
|
+
}
|
|
91
|
+
++j;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// 2) Later itemsets (s-extension), plus special re-open i-ext rule
|
|
95
|
+
int num_itmfnd = 0;
|
|
96
|
+
for (size_t k = j; k < items[seq_id].size(); ++k) {
|
|
97
|
+
int cur = items[seq_id][k];
|
|
98
|
+
int cur_itm = abs(cur);
|
|
99
|
+
|
|
100
|
+
if (cur < 0) num_itmfnd = 0; // new itemset boundary seen
|
|
101
|
+
|
|
102
|
+
// s-extension: add cur_itm as new itemset element
|
|
103
|
+
if (slist[static_cast<size_t>(cur_itm - 1)] &&
|
|
104
|
+
!found[static_cast<size_t>(L + cur_itm - 1)]) {
|
|
105
|
+
pot_patt[static_cast<size_t>(L + cur_itm - 1)].seq_ID.push_back(seq_id);
|
|
106
|
+
pot_patt[static_cast<size_t>(L + cur_itm - 1)].str_pnt.push_back(k);
|
|
107
|
+
++pot_patt[static_cast<size_t>(L + cur_itm - 1)].freq;
|
|
108
|
+
found[static_cast<size_t>(L + cur_itm - 1)] = true;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// once we've seen the suffix of the last itemset fully,
|
|
112
|
+
// allow i-extension again (across future itemsets)
|
|
113
|
+
if (num_itmfnd == static_cast<int>(_patt.seq.size()) - last_neg) {
|
|
114
|
+
if (ilist[static_cast<size_t>(cur_itm - 1)] &&
|
|
115
|
+
!found[static_cast<size_t>(cur_itm - 1)]) {
|
|
116
|
+
pot_patt[static_cast<size_t>(cur_itm - 1)].seq_ID.push_back(seq_id);
|
|
117
|
+
pot_patt[static_cast<size_t>(cur_itm - 1)].str_pnt.push_back(k);
|
|
118
|
+
++pot_patt[static_cast<size_t>(cur_itm - 1)].freq;
|
|
119
|
+
found[static_cast<size_t>(cur_itm - 1)] = true;
|
|
120
|
+
}
|
|
121
|
+
} else if (last_neg + num_itmfnd >= 0 &&
|
|
122
|
+
cur_itm == abs(_patt.seq[static_cast<size_t>(last_neg + num_itmfnd)])) {
|
|
123
|
+
++num_itmfnd;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Filter children by support threshold
|
|
129
|
+
vector<int> ilistp;
|
|
130
|
+
vector<int> slistp;
|
|
131
|
+
ilistp.reserve(_patt.ilist.size());
|
|
132
|
+
slistp.reserve(_patt.slist.size());
|
|
133
|
+
|
|
134
|
+
for (int idx : _patt.ilist) {
|
|
135
|
+
if (pot_patt[static_cast<size_t>(idx)].freq >= theta)
|
|
136
|
+
ilistp.push_back(idx);
|
|
137
|
+
}
|
|
138
|
+
for (int idx : _patt.slist) {
|
|
139
|
+
if (pot_patt[static_cast<size_t>(idx + static_cast<int>(L))].freq >= theta)
|
|
140
|
+
slistp.push_back(idx);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Push all i-extensions
|
|
144
|
+
for (int idx : ilistp) {
|
|
145
|
+
DFS.emplace_back();
|
|
146
|
+
swap(DFS.back(), pot_patt[static_cast<size_t>(idx)]);
|
|
147
|
+
|
|
148
|
+
DFS.back().seq = _patt.seq;
|
|
149
|
+
DFS.back().seq.push_back(idx + 1);
|
|
150
|
+
|
|
151
|
+
DFS.back().slist = slistp;
|
|
152
|
+
DFS.back().ilist = ilistp;
|
|
153
|
+
|
|
154
|
+
// ALWAYS emit (so collected fills even if !b_disp && !b_write)
|
|
155
|
+
Out_patt(DFS.back().seq, DFS.back().freq);
|
|
156
|
+
++num_patt;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Push all s-extensions
|
|
160
|
+
for (int idx : slistp) {
|
|
161
|
+
DFS.emplace_back();
|
|
162
|
+
swap(DFS.back(), pot_patt[static_cast<size_t>(idx + static_cast<int>(L))]);
|
|
163
|
+
|
|
164
|
+
DFS.back().seq = _patt.seq;
|
|
165
|
+
DFS.back().seq.push_back(-(idx + 1)); // negative encodes new itemset
|
|
166
|
+
|
|
167
|
+
DFS.back().slist = slistp;
|
|
168
|
+
DFS.back().ilist = slistp; // as in original code
|
|
169
|
+
|
|
170
|
+
// ALWAYS emit
|
|
171
|
+
Out_patt(DFS.back().seq, DFS.back().freq);
|
|
172
|
+
++num_patt;
|
|
173
|
+
}
|
|
137
174
|
}
|
|
138
175
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
if (b_disp)
|
|
160
|
-
cout << "************** Freq: " << freq << endl;
|
|
161
|
-
if (b_write) {
|
|
162
|
-
file_o << "************** Freq: " << freq << endl;
|
|
163
|
-
file_o.close();
|
|
164
|
-
}
|
|
165
|
-
}
|
|
176
|
+
/* ------------------------------------------------------------------ */
|
|
177
|
+
/* Out_patt: append to buffer; optionally print/write */
|
|
178
|
+
/* ------------------------------------------------------------------ */
|
|
179
|
+
static void Out_patt(vector<int>& seq, unsigned int freq)
|
|
180
|
+
{
|
|
181
|
+
// Always append to in-memory results returned to Python
|
|
182
|
+
largepp::collected.push_back(seq);
|
|
183
|
+
|
|
184
|
+
ofstream file_o;
|
|
185
|
+
if (b_write) file_o.open(out_file, std::ios::app);
|
|
186
|
+
|
|
187
|
+
if (b_disp) {
|
|
188
|
+
for (int v : seq) cout << v << " ";
|
|
189
|
+
cout << "\n************** Freq: " << freq << endl;
|
|
190
|
+
}
|
|
191
|
+
if (b_write) {
|
|
192
|
+
for (int v : seq) file_o << v << " ";
|
|
193
|
+
file_o << "\n************** Freq: " << freq << "\n";
|
|
194
|
+
file_o.close();
|
|
195
|
+
}
|
|
166
196
|
}
|
|
167
197
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
198
|
+
} // namespace largepp
|
|
@@ -1,43 +1,18 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
|
-
#include
|
|
4
|
-
|
|
5
|
-
{
|
|
6
|
-
void Freq_miner();
|
|
7
|
-
|
|
8
|
-
class Pattern {
|
|
9
|
-
public:
|
|
10
|
-
|
|
11
|
-
vector<int> seq;
|
|
12
|
-
vector<unsigned int> str_pnt;
|
|
13
|
-
vector<unsigned long long int> seq_ID;
|
|
14
|
-
|
|
15
|
-
vector<int> slist;
|
|
16
|
-
vector<int> ilist;
|
|
17
|
-
|
|
18
|
-
unsigned long long int freq;
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <string>
|
|
19
5
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
seq.push_back(_seq[i]);
|
|
24
|
-
seq.push_back(item);
|
|
25
|
-
freq = 0;
|
|
26
|
-
}
|
|
6
|
+
#include "pattern.hpp" // defines largepp::Pattern
|
|
7
|
+
#include "load_inst.hpp" // declares externs: items, L, theta, DFS, etc.
|
|
8
|
+
#include "utility.hpp" // flags, collected buffer, timers, helpers
|
|
27
9
|
|
|
10
|
+
namespace largepp {
|
|
28
11
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
freq = 0;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
Pattern() {
|
|
35
|
-
freq = 0;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
};
|
|
39
|
-
|
|
40
|
-
extern vector<Pattern> DFS; //DFS queue of potential patterns to extend
|
|
12
|
+
// Public entry point
|
|
13
|
+
void Freq_miner();
|
|
41
14
|
|
|
15
|
+
// (defined in the .cpp)
|
|
42
16
|
extern unsigned long long int num_patt;
|
|
43
|
-
|
|
17
|
+
|
|
18
|
+
} // namespace largepp
|
effspm/largepp/src/load_inst.cpp
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
#include <sstream>
|
|
3
3
|
#include <algorithm>
|
|
4
4
|
#include <cmath>
|
|
5
|
+
#include <fstream>
|
|
5
6
|
#include "load_inst.hpp"
|
|
6
7
|
#include "freq_miner.hpp"
|
|
7
8
|
#include "utility.hpp"
|
|
@@ -36,6 +37,7 @@ bool Load_instance(string& items_file, double thresh)
|
|
|
36
37
|
|
|
37
38
|
cout << "\nPreprocess done in " << give_time(clock() - kk) << " seconds\n\n";
|
|
38
39
|
|
|
40
|
+
DFS.clear();
|
|
39
41
|
DFS.reserve(L);
|
|
40
42
|
for (unsigned int i = 0; i < L; ++i)
|
|
41
43
|
DFS.emplace_back(-int(i) - 1);
|
|
@@ -53,6 +55,23 @@ bool Load_instance(string& items_file, double thresh)
|
|
|
53
55
|
cout << "Found " << N << " sequence, with max line len " << M
|
|
54
56
|
<< ", and " << L << " items, and " << E << " enteries\n";
|
|
55
57
|
|
|
58
|
+
// ───────────────────────────────────────────────────────────
|
|
59
|
+
// DEBUG snapshot of seeds right after loading
|
|
60
|
+
// ───────────────────────────────────────────────────────────
|
|
61
|
+
{
|
|
62
|
+
unsigned long long seeds_ge_theta = 0, seeds_nonzero = 0, max_freq = 0;
|
|
63
|
+
for (size_t i = 0; i < DFS.size(); ++i) {
|
|
64
|
+
if (DFS[i].freq > 0) ++seeds_nonzero;
|
|
65
|
+
if (DFS[i].freq >= theta) ++seeds_ge_theta;
|
|
66
|
+
if (DFS[i].freq > max_freq) max_freq = DFS[i].freq;
|
|
67
|
+
}
|
|
68
|
+
// std::cout << " theta=" << theta
|
|
69
|
+
// << " | DFS.size=" << DFS.size()
|
|
70
|
+
// << " | seeds>=theta=" << seeds_ge_theta
|
|
71
|
+
// << " | seeds>0=" << seeds_nonzero
|
|
72
|
+
// << " | max_seed_freq=" << max_freq << "\n";
|
|
73
|
+
}
|
|
74
|
+
|
|
56
75
|
return true;
|
|
57
76
|
}
|
|
58
77
|
|
|
@@ -67,12 +86,12 @@ void Load_py(const pybind11::object& data, double thresh)
|
|
|
67
86
|
int max_id = 0;
|
|
68
87
|
M = 0; E = 0;
|
|
69
88
|
for (auto& seq : items) {
|
|
70
|
-
M = max<unsigned int>(M, seq.size());
|
|
89
|
+
M = max<unsigned int>(M, static_cast<unsigned int>(seq.size()));
|
|
71
90
|
E += seq.size();
|
|
72
91
|
for (int x : seq)
|
|
73
92
|
max_id = max(max_id, abs(x));
|
|
74
93
|
}
|
|
75
|
-
L = max_id;
|
|
94
|
+
L = static_cast<unsigned int>(max_id);
|
|
76
95
|
theta = (thresh < 1.0) ? ceil(thresh * N) : thresh;
|
|
77
96
|
|
|
78
97
|
DFS.clear();
|
|
@@ -82,7 +101,7 @@ void Load_py(const pybind11::object& data, double thresh)
|
|
|
82
101
|
}
|
|
83
102
|
|
|
84
103
|
/* =================================================================
|
|
85
|
-
* The professor’s original helpers — untouched
|
|
104
|
+
* The professor’s original helpers — untouched except minor safety
|
|
86
105
|
* ================================================================= */
|
|
87
106
|
static bool Preprocess(string& inst, double thresh)
|
|
88
107
|
{
|
|
@@ -97,7 +116,7 @@ static bool Preprocess(string& inst, double thresh)
|
|
|
97
116
|
string itm;
|
|
98
117
|
while (word >> itm) {
|
|
99
118
|
ditem = stoi(itm);
|
|
100
|
-
L = max<unsigned int>(L, abs(ditem));
|
|
119
|
+
L = max<unsigned int>(L, static_cast<unsigned int>(abs(ditem)));
|
|
101
120
|
|
|
102
121
|
if (freq.size() < L) {
|
|
103
122
|
freq.resize(L, 0);
|
|
@@ -170,7 +189,7 @@ static void Load_items_pre(string& inst)
|
|
|
170
189
|
}
|
|
171
190
|
if (empty_seq) continue;
|
|
172
191
|
|
|
173
|
-
++N; E += size_m; M = max<unsigned int>(M, size_m);
|
|
192
|
+
++N; E += size_m; M = max<unsigned int>(M, static_cast<unsigned int>(size_m));
|
|
174
193
|
}
|
|
175
194
|
}
|
|
176
195
|
|
|
@@ -193,8 +212,8 @@ static bool Load_items(string& inst)
|
|
|
193
212
|
|
|
194
213
|
while (word >> itm) {
|
|
195
214
|
ditem = stoi(itm);
|
|
196
|
-
if (L < abs(ditem)) {
|
|
197
|
-
L = abs(ditem);
|
|
215
|
+
if (L < static_cast<unsigned int>(abs(ditem))) {
|
|
216
|
+
L = static_cast<unsigned int>(abs(ditem));
|
|
198
217
|
while (DFS.size() < L) {
|
|
199
218
|
DFS.emplace_back(-int(DFS.size()) - 1);
|
|
200
219
|
counted.push_back(0);
|
|
@@ -211,7 +230,7 @@ static bool Load_items(string& inst)
|
|
|
211
230
|
++size_m;
|
|
212
231
|
}
|
|
213
232
|
E += size_m;
|
|
214
|
-
M = max<unsigned int>(M, size_m);
|
|
233
|
+
M = max<unsigned int>(M, static_cast<unsigned int>(size_m));
|
|
215
234
|
}
|
|
216
235
|
return true;
|
|
217
236
|
}
|
effspm/largepp/src/load_inst.hpp
CHANGED
|
@@ -6,23 +6,29 @@
|
|
|
6
6
|
#include <map>
|
|
7
7
|
#include <pybind11/pybind11.h>
|
|
8
8
|
|
|
9
|
+
#include "largepp/src/pattern.hpp" // ← ensure Pattern is a complete type here
|
|
10
|
+
|
|
9
11
|
namespace largepp {
|
|
10
12
|
using namespace std;
|
|
11
13
|
|
|
12
|
-
//
|
|
13
|
-
bool Load_instance(string& items_file, double thresh);
|
|
14
|
+
// public entry points
|
|
15
|
+
bool Load_instance(std::string& items_file, double thresh);
|
|
14
16
|
void Load_py(const pybind11::object& py_data, double thresh);
|
|
15
17
|
|
|
16
|
-
//
|
|
17
|
-
extern vector<vector<int>> items;
|
|
18
|
-
extern string
|
|
18
|
+
// shared state (defined in load_inst.cpp)
|
|
19
|
+
extern std::vector<std::vector<int>> items;
|
|
20
|
+
extern std::string out_file;
|
|
19
21
|
|
|
20
22
|
extern bool b_disp, b_write, use_dic, just_build, ovr_count, pre_pro;
|
|
23
|
+
extern bool use_list;
|
|
21
24
|
|
|
22
25
|
extern unsigned int M, L, time_limit;
|
|
23
|
-
extern unsigned long long N;
|
|
24
|
-
extern double theta;
|
|
25
|
-
extern unsigned long long E;
|
|
26
|
-
extern clock_t
|
|
26
|
+
extern unsigned long long N;
|
|
27
|
+
extern double theta;
|
|
28
|
+
extern unsigned long long E;
|
|
29
|
+
extern std::clock_t start_time;
|
|
30
|
+
|
|
31
|
+
// DFS queue of potential patterns to extend
|
|
32
|
+
extern std::vector<largepp::Pattern> DFS;
|
|
27
33
|
|
|
28
34
|
} // namespace largepp
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <vector>
|
|
3
|
+
|
|
4
|
+
namespace largepp {
|
|
5
|
+
|
|
6
|
+
class Pattern {
|
|
7
|
+
public:
|
|
8
|
+
std::vector<int> seq;
|
|
9
|
+
std::vector<unsigned int> str_pnt;
|
|
10
|
+
std::vector<unsigned long long> seq_ID;
|
|
11
|
+
|
|
12
|
+
std::vector<int> slist;
|
|
13
|
+
std::vector<int> ilist;
|
|
14
|
+
|
|
15
|
+
unsigned long long freq;
|
|
16
|
+
|
|
17
|
+
Pattern() : freq(0) {}
|
|
18
|
+
|
|
19
|
+
explicit Pattern(int item) : freq(0) {
|
|
20
|
+
seq.push_back(item);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
Pattern(std::vector<int>& _seq, int item) : freq(0) {
|
|
24
|
+
seq.reserve(_seq.size() + 1);
|
|
25
|
+
for (int i = 0; i < static_cast<int>(_seq.size()); ++i)
|
|
26
|
+
seq.push_back(_seq[i]);
|
|
27
|
+
seq.push_back(item);
|
|
28
|
+
}
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
} // namespace largepp
|
effspm/load_inst.hpp
CHANGED
|
@@ -11,7 +11,7 @@ using namespace std;
|
|
|
11
11
|
|
|
12
12
|
// ------------------------------------------------------------
|
|
13
13
|
// forward declare Pattern (defined in freq_miner.hpp)
|
|
14
|
-
|
|
14
|
+
struct Pattern;
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
// Main entrypoint: load your file on disk into 'items', build DFS, theta, etc.
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
effspm/__init__.py,sha256=SRhZaFwKFWVneO_TCS9_ld5akH3rq3XtHcinJlAvjPw,257
|
|
2
|
+
effspm/_core.cpp,sha256=JzUCIVmmDMVfRIjIsmXtVkSMkRMprE3wpOrx-jYhAQU,3781
|
|
3
|
+
effspm/_effspm.cp310-win_amd64.pyd,sha256=95nMHGmYmACSINFzbnD2OOBoFnjL6t1M62XdAaY_zig,388096
|
|
4
|
+
effspm/_effspm.cpp,sha256=iXMaevu3nN2p7jMJuf-IPDiGf7SZ7vlNXC8cga2pDU8,26021
|
|
5
|
+
effspm/freq_miner.cpp,sha256=qQDFPoPKY3ICaH2brm1CUKwNBhCy0-0dUWEoV_3FwME,4785
|
|
6
|
+
effspm/freq_miner.hpp,sha256=Ni4ayFci6zPnpwbLBQ6FU5MSvLjgn6jk2_GQ45HHtgo,835
|
|
7
|
+
effspm/load_inst.cpp,sha256=kTEucQ5YU7xPdRjcM9ixAPk49cLJ8H8YN9gJsTrm7mM,4769
|
|
8
|
+
effspm/load_inst.hpp,sha256=S8NvO3KCU6dGcDX8a24SE6uhD1cSAL0jCo0XYFnrwKE,938
|
|
9
|
+
effspm/utility.cpp,sha256=OD5K0K0jQKgqVGJm91pSofImXOEVkDnqQvFh1qytvpA,1458
|
|
10
|
+
effspm/utility.hpp,sha256=hECSm-ORd20QJMundbOLkZvo9nj-I2pXd5AokagyGqQ,773
|
|
11
|
+
effspm/btminer/src/build_mdd.cpp,sha256=t4Exn5vJTekl5nyRg77p898pwjC9NxECAgL4DKgwpR4,2297
|
|
12
|
+
effspm/btminer/src/build_mdd.hpp,sha256=3HDZdQ8k_sJR9UNPvO-48mdnjJGfFaE0vorp86DsQIU,661
|
|
13
|
+
effspm/btminer/src/freq_miner.cpp,sha256=GjttxpUpJpvKugXcKJ2utKG6IeZKVEyiv2RI-nE9jM0,9456
|
|
14
|
+
effspm/btminer/src/freq_miner.hpp,sha256=REbDhYzn7PG1XKjLqaMnk_PzUwwCQKiBGD0DPx3DBI4,1110
|
|
15
|
+
effspm/btminer/src/load_inst.cpp,sha256=g_kdYl7tIavuVBEQ30BKh19tfY9cNCBc2BGnnqrK5ZE,8269
|
|
16
|
+
effspm/btminer/src/load_inst.hpp,sha256=n47RjtvkBuBSb5OLxSuqvUQpPFd7ukMQTD0AIHiYIfw,1030
|
|
17
|
+
effspm/btminer/src/utility.cpp,sha256=5at1rTDLS3Jw90tENm0V3W1fEc7q_HhtIG6TyrKYKgg,1244
|
|
18
|
+
effspm/btminer/src/utility.hpp,sha256=ite_7AY33HJKZiFRENhQ3xjNmak5yKy_hE8Ku6-9q2Y,328
|
|
19
|
+
effspm/htminer/src/build_mdd.cpp,sha256=pTKq8Egyr185McfiMiV8nyeOHmYO3gNexSVF5jz5rY8,4511
|
|
20
|
+
effspm/htminer/src/build_mdd.hpp,sha256=h-Tp4Qeg1drRQkYqJ0cm33XUPT4yV4e-IYdifgPK1jk,1392
|
|
21
|
+
effspm/htminer/src/freq_miner.cpp,sha256=GzjM4V61nxp9Qtlx9mi8qSAfjNuvOXs0PNAkZBw3XYc,10940
|
|
22
|
+
effspm/htminer/src/freq_miner.hpp,sha256=dOR9wXwMBhnK3yHat6r_8WuehcgPTewb_XCUVM-JTNU,825
|
|
23
|
+
effspm/htminer/src/load_inst.cpp,sha256=jJhijLbsC3mGFH6fSBVXo3fkvKbZouL069jlv-dWbrA,11319
|
|
24
|
+
effspm/htminer/src/load_inst.hpp,sha256=mKC4b0Ji6-WDNP82jcvIi6X4PEVgjmyCp5BSj9VngiE,477
|
|
25
|
+
effspm/htminer/src/utility.cpp,sha256=uVvXQqyeVjJXcdLj8wHL1SLZh0fFda1P428_IgAGWrk,2446
|
|
26
|
+
effspm/htminer/src/utility.hpp,sha256=-tNvJdj6Poz0Vd_em0SYllBR32tqMH8Hmy_SakHeUfk,3650
|
|
27
|
+
effspm/largebm/src/build_mdd.cpp,sha256=7rFfsKMsmVPWpeawgAKhIT1g7IFJDAGND8Ru7E4nnFg,2680
|
|
28
|
+
effspm/largebm/src/build_mdd.hpp,sha256=ie7EYt7Cu8-04-f4QRrboVyxjstor1OB3V6eGAtBQiA,891
|
|
29
|
+
effspm/largebm/src/freq_miner.cpp,sha256=A6DcQRHJoJECbQPLeGpXmQhZtMQdZ8fOcvxlmJxJdRQ,11212
|
|
30
|
+
effspm/largebm/src/freq_miner.hpp,sha256=EaU1SL3K4ZB_7CFhCIv8pS5UkefqGWzr9iTjrDd23XY,911
|
|
31
|
+
effspm/largebm/src/load_inst.cpp,sha256=_Ym6viWbn6XyE8Mo1kMWujj7ud40egkw2M_eMY9lViE,7044
|
|
32
|
+
effspm/largebm/src/load_inst.hpp,sha256=ujfHWuK2W2ptQ8Pal2Z9ZEkwfImbSlZCspdHAg0BsJA,1193
|
|
33
|
+
effspm/largebm/src/utility.cpp,sha256=QfgLD5mss5FIWgWwPzIEPNzTIzEnTTYBzkznYrcGz-M,1084
|
|
34
|
+
effspm/largebm/src/utility.hpp,sha256=CZ70tPKMA5qZnqLGTRdgaqfdWzBb1Gfrc7rKniLOHAo,428
|
|
35
|
+
effspm/largehm/src/build_mdd.cpp,sha256=lev0YbLDfOzVAIJai0MOwtPYBVPJV9DGwU-QaLH_qDY,6090
|
|
36
|
+
effspm/largehm/src/build_mdd.hpp,sha256=HnBzd82X4C-DMoPa7A8vP0_9TxX23_RQ_qm8GkJ-h38,2880
|
|
37
|
+
effspm/largehm/src/freq_miner.cpp,sha256=jfPrFjERi8hnEL4f-_4bML_a082JTj_L477m1-R5PY8,16955
|
|
38
|
+
effspm/largehm/src/freq_miner.hpp,sha256=w-IhZj-k_z3__dikNwvNseapZ1fTAXrFgnPv7o3nzG4,2428
|
|
39
|
+
effspm/largehm/src/load_inst.cpp,sha256=rUnQ2vaQ4qIiXOvBPaXbtbU5dA8SoWI14RG_QSxGhig,12218
|
|
40
|
+
effspm/largehm/src/load_inst.hpp,sha256=7pFao_lo0fRvK1Hn_1nDXvmYleirI153992J7uZKNM0,1692
|
|
41
|
+
effspm/largehm/src/utility.cpp,sha256=jmlLXjOSIFl9p5LWNSmFUuTk6kFXei08xh0qHJNVZ9c,997
|
|
42
|
+
effspm/largehm/src/utility.hpp,sha256=F7qvWtZVqzz9VEBvduouG8pvt5ZHreAtGtXkCWgiafY,720
|
|
43
|
+
effspm/largepp/src/freq_miner.cpp,sha256=wep14Yt0ld_Y76LG1fwYw0zvoB3Vd29enx8GwOQBwDA,7432
|
|
44
|
+
effspm/largepp/src/freq_miner.hpp,sha256=achRvRSVUloeeAbRVhPK0M05pnf4DxSdYTBV8YvOz7w,417
|
|
45
|
+
effspm/largepp/src/load_inst.cpp,sha256=-vcPMdwWv9CMuRRwgn8xRGsQdvUakHIQsUPmP0R1sfg,8302
|
|
46
|
+
effspm/largepp/src/load_inst.hpp,sha256=fDvtWFTiHTIpjA-5n62OAbcS3cFP8XQ_rK1TlVhdpgw,941
|
|
47
|
+
effspm/largepp/src/pattern.hpp,sha256=UNaO53pDLnEoN5Vdyka4Dg9dFrt7ANH51oRXgdpiarA,670
|
|
48
|
+
effspm/largepp/src/utility.cpp,sha256=YzdLehjqe2arQCiAdkdLUZODLYooy_gWltJGhBAHtyc,988
|
|
49
|
+
effspm/largepp/src/utility.hpp,sha256=6zYJWHSqUL3deyvvNqfKQxabtLtRS5Kh52No5zZ78VQ,757
|
|
50
|
+
effspm-0.3.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
51
|
+
effspm-0.3.0.dist-info/METADATA,sha256=KLjqK-ecuSmneSbtCdV3hvXCrnxfeObgJlnYDz5IKy0,14464
|
|
52
|
+
effspm-0.3.0.dist-info/WHEEL,sha256=KUuBC6lxAbHCKilKua8R9W_TM71_-9Sg5uEP3uDWcoU,101
|
|
53
|
+
effspm-0.3.0.dist-info/top_level.txt,sha256=2O-AuI0nw0pDmJMo2jzM1wvV2rj48AmkjskkAnsuuQk,7
|
|
54
|
+
effspm-0.3.0.dist-info/RECORD,,
|