effspm 0.2.6__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/__init__.py +11 -0
- effspm/_core.cpp +106 -0
- effspm/_effspm.cpp +609 -0
- effspm/_effspm.cpython-312-x86_64-linux-gnu.so +0 -0
- effspm/btminer/src/build_mdd.cpp +63 -0
- effspm/btminer/src/build_mdd.hpp +40 -0
- effspm/btminer/src/freq_miner.cpp +179 -0
- effspm/btminer/src/freq_miner.hpp +39 -0
- effspm/btminer/src/load_inst.cpp +200 -0
- effspm/btminer/src/load_inst.hpp +25 -0
- effspm/btminer/src/utility.cpp +65 -0
- effspm/btminer/src/utility.hpp +40 -0
- effspm/freq_miner.cpp +143 -0
- effspm/freq_miner.hpp +48 -0
- effspm/htminer/src/build_mdd.cpp +192 -0
- effspm/htminer/src/build_mdd.hpp +64 -0
- effspm/htminer/src/freq_miner.cpp +350 -0
- effspm/htminer/src/freq_miner.hpp +60 -0
- effspm/htminer/src/load_inst.cpp +394 -0
- effspm/htminer/src/load_inst.hpp +23 -0
- effspm/htminer/src/utility.cpp +72 -0
- effspm/htminer/src/utility.hpp +77 -0
- effspm/largebm/src/build_mdd.cpp +137 -0
- effspm/largebm/src/build_mdd.hpp +47 -0
- effspm/largebm/src/freq_miner.cpp +349 -0
- effspm/largebm/src/freq_miner.hpp +48 -0
- effspm/largebm/src/load_inst.cpp +230 -0
- effspm/largebm/src/load_inst.hpp +45 -0
- effspm/largebm/src/utility.cpp +45 -0
- effspm/largebm/src/utility.hpp +18 -0
- effspm/largehm/src/build_mdd.cpp +174 -0
- effspm/largehm/src/build_mdd.hpp +93 -0
- effspm/largehm/src/freq_miner.cpp +445 -0
- effspm/largehm/src/freq_miner.hpp +77 -0
- effspm/largehm/src/load_inst.cpp +357 -0
- effspm/largehm/src/load_inst.hpp +64 -0
- effspm/largehm/src/utility.cpp +38 -0
- effspm/largehm/src/utility.hpp +29 -0
- effspm/largepp/src/freq_miner.cpp +170 -0
- effspm/largepp/src/freq_miner.hpp +43 -0
- effspm/largepp/src/load_inst.cpp +219 -0
- effspm/largepp/src/load_inst.hpp +28 -0
- effspm/largepp/src/utility.cpp +34 -0
- effspm/largepp/src/utility.hpp +21 -0
- effspm/load_inst.cpp +252 -0
- effspm/load_inst.hpp +31 -0
- effspm/utility.cpp +55 -0
- effspm/utility.hpp +29 -0
- effspm-0.2.6.dist-info/METADATA +237 -0
- effspm-0.2.6.dist-info/RECORD +53 -0
- effspm-0.2.6.dist-info/WHEEL +6 -0
- effspm-0.2.6.dist-info/licenses/LICENSE +201 -0
- effspm-0.2.6.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
#include <iostream>
|
|
2
|
+
#include <sstream>
|
|
3
|
+
#include <algorithm>
|
|
4
|
+
#include <cmath>
|
|
5
|
+
#include "load_inst.hpp"
|
|
6
|
+
#include "freq_miner.hpp"
|
|
7
|
+
#include "utility.hpp"
|
|
8
|
+
|
|
9
|
+
namespace largepp { // ─── BEGIN namespace ─────────────────────
|
|
10
|
+
using namespace std;
|
|
11
|
+
|
|
12
|
+
/* ------------------------------------------------------------------
|
|
13
|
+
* Global definitions (match the externs in load_inst.hpp)
|
|
14
|
+
* ---------------------------------------------------------------- */
|
|
15
|
+
unsigned int M = 0, L = 0;
|
|
16
|
+
unsigned long long N = 0, E = 0;
|
|
17
|
+
double theta = 0.01;
|
|
18
|
+
vector<vector<int>> items;
|
|
19
|
+
vector<Pattern> DFS;
|
|
20
|
+
vector<int> item_dic;
|
|
21
|
+
|
|
22
|
+
/* Forward decls for helper routines in this file */
|
|
23
|
+
static bool Load_items(string& inst);
|
|
24
|
+
static void Load_items_pre(string& inst);
|
|
25
|
+
static bool Preprocess(string& inst, double thresh);
|
|
26
|
+
|
|
27
|
+
/* ==================================================================
|
|
28
|
+
* MAIN ENTRY — load from disk
|
|
29
|
+
* ================================================================= */
|
|
30
|
+
bool Load_instance(string& items_file, double thresh)
|
|
31
|
+
{
|
|
32
|
+
clock_t kk = clock();
|
|
33
|
+
|
|
34
|
+
if (pre_pro) {
|
|
35
|
+
if (!Preprocess(items_file, thresh)) return false;
|
|
36
|
+
|
|
37
|
+
cout << "\nPreprocess done in " << give_time(clock() - kk) << " seconds\n\n";
|
|
38
|
+
|
|
39
|
+
DFS.reserve(L);
|
|
40
|
+
for (unsigned int i = 0; i < L; ++i)
|
|
41
|
+
DFS.emplace_back(-int(i) - 1);
|
|
42
|
+
|
|
43
|
+
kk = clock();
|
|
44
|
+
Load_items_pre(items_file);
|
|
45
|
+
N = items.size();
|
|
46
|
+
}
|
|
47
|
+
else if (!Load_items(items_file))
|
|
48
|
+
return false;
|
|
49
|
+
else
|
|
50
|
+
theta = (thresh < 1.0) ? ceil(thresh * N) : thresh;
|
|
51
|
+
|
|
52
|
+
cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
|
|
53
|
+
cout << "Found " << N << " sequence, with max line len " << M
|
|
54
|
+
<< ", and " << L << " items, and " << E << " enteries\n";
|
|
55
|
+
|
|
56
|
+
return true;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/* ==================================================================
|
|
60
|
+
* ALT ENTRY — load directly from a Python list of lists
|
|
61
|
+
* ================================================================= */
|
|
62
|
+
void Load_py(const pybind11::object& data, double thresh)
|
|
63
|
+
{
|
|
64
|
+
items = data.cast<vector<vector<int>>>();
|
|
65
|
+
N = items.size();
|
|
66
|
+
|
|
67
|
+
int max_id = 0;
|
|
68
|
+
M = 0; E = 0;
|
|
69
|
+
for (auto& seq : items) {
|
|
70
|
+
M = max<unsigned int>(M, seq.size());
|
|
71
|
+
E += seq.size();
|
|
72
|
+
for (int x : seq)
|
|
73
|
+
max_id = max(max_id, abs(x));
|
|
74
|
+
}
|
|
75
|
+
L = max_id;
|
|
76
|
+
theta = (thresh < 1.0) ? ceil(thresh * N) : thresh;
|
|
77
|
+
|
|
78
|
+
DFS.clear();
|
|
79
|
+
DFS.reserve(L);
|
|
80
|
+
for (unsigned int i = 0; i < L; ++i)
|
|
81
|
+
DFS.emplace_back(-int(i) - 1);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/* =================================================================
|
|
85
|
+
* The professor’s original helpers — untouched
|
|
86
|
+
* ================================================================= */
|
|
87
|
+
static bool Preprocess(string& inst, double thresh)
|
|
88
|
+
{
|
|
89
|
+
ifstream file(inst);
|
|
90
|
+
vector<unsigned long long> freq(1000000), counted(1000000, 0);
|
|
91
|
+
|
|
92
|
+
if (file.good()) {
|
|
93
|
+
string line; int ditem;
|
|
94
|
+
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
95
|
+
++N;
|
|
96
|
+
istringstream word(line);
|
|
97
|
+
string itm;
|
|
98
|
+
while (word >> itm) {
|
|
99
|
+
ditem = stoi(itm);
|
|
100
|
+
L = max<unsigned int>(L, abs(ditem));
|
|
101
|
+
|
|
102
|
+
if (freq.size() < L) {
|
|
103
|
+
freq.resize(L, 0);
|
|
104
|
+
counted.resize(L, 0);
|
|
105
|
+
}
|
|
106
|
+
if (counted[abs(ditem) - 1] != N) {
|
|
107
|
+
++freq[abs(ditem) - 1];
|
|
108
|
+
counted[abs(ditem) - 1] = N;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
} else {
|
|
113
|
+
cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
|
|
114
|
+
return false;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
theta = (thresh < 1.0) ? ceil(thresh * N) : thresh;
|
|
118
|
+
|
|
119
|
+
int real_L = 0;
|
|
120
|
+
item_dic.assign(L, -1);
|
|
121
|
+
for (unsigned int i = 0; i < L; ++i)
|
|
122
|
+
if (freq[i] >= theta) item_dic[i] = ++real_L;
|
|
123
|
+
|
|
124
|
+
cout << "Original number of items: " << L
|
|
125
|
+
<< " Reduced to: " << real_L << '\n';
|
|
126
|
+
|
|
127
|
+
L = real_L;
|
|
128
|
+
N = 0;
|
|
129
|
+
return true;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
static void Load_items_pre(string& inst)
|
|
133
|
+
{
|
|
134
|
+
ifstream file(inst);
|
|
135
|
+
|
|
136
|
+
if (!file.good()) return;
|
|
137
|
+
string line; int size_m, ditem; bool empty_seq = false;
|
|
138
|
+
|
|
139
|
+
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
140
|
+
vector<bool> counted(L, 0);
|
|
141
|
+
istringstream word(line);
|
|
142
|
+
|
|
143
|
+
if (!empty_seq) items.emplace_back();
|
|
144
|
+
string itm; size_m = 0; bool sgn = false; empty_seq = true;
|
|
145
|
+
|
|
146
|
+
while (word >> itm) {
|
|
147
|
+
ditem = stoi(itm);
|
|
148
|
+
|
|
149
|
+
if (item_dic[abs(ditem) - 1] == -1) {
|
|
150
|
+
if (!sgn) sgn = ditem < 0;
|
|
151
|
+
continue;
|
|
152
|
+
} else {
|
|
153
|
+
ditem = (ditem > 0)
|
|
154
|
+
? item_dic[ditem - 1]
|
|
155
|
+
: -item_dic[-ditem - 1];
|
|
156
|
+
}
|
|
157
|
+
empty_seq = false;
|
|
158
|
+
|
|
159
|
+
if (sgn) { if (ditem > 0) ditem = -ditem; sgn = false; }
|
|
160
|
+
|
|
161
|
+
items.back().push_back(ditem);
|
|
162
|
+
|
|
163
|
+
if (!counted[abs(ditem) - 1] && !just_build) {
|
|
164
|
+
DFS[abs(ditem) - 1].seq_ID.push_back(items.size() - 1);
|
|
165
|
+
DFS[abs(ditem) - 1].str_pnt.push_back(items.back().size() - 1);
|
|
166
|
+
++DFS[abs(ditem) - 1].freq;
|
|
167
|
+
counted[abs(ditem) - 1] = true;
|
|
168
|
+
}
|
|
169
|
+
++size_m;
|
|
170
|
+
}
|
|
171
|
+
if (empty_seq) continue;
|
|
172
|
+
|
|
173
|
+
++N; E += size_m; M = max<unsigned int>(M, size_m);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
static bool Load_items(string& inst)
|
|
178
|
+
{
|
|
179
|
+
ifstream file(inst);
|
|
180
|
+
if (!file.good()) {
|
|
181
|
+
cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
|
|
182
|
+
return false;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
string line; int size_m, ditem;
|
|
186
|
+
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
187
|
+
++N;
|
|
188
|
+
vector<bool> counted(L, 0);
|
|
189
|
+
istringstream word(line);
|
|
190
|
+
|
|
191
|
+
items.emplace_back();
|
|
192
|
+
string itm; size_m = 0;
|
|
193
|
+
|
|
194
|
+
while (word >> itm) {
|
|
195
|
+
ditem = stoi(itm);
|
|
196
|
+
if (L < abs(ditem)) {
|
|
197
|
+
L = abs(ditem);
|
|
198
|
+
while (DFS.size() < L) {
|
|
199
|
+
DFS.emplace_back(-int(DFS.size()) - 1);
|
|
200
|
+
counted.push_back(0);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
items.back().push_back(ditem);
|
|
204
|
+
|
|
205
|
+
if (!counted[abs(ditem) - 1] && !just_build) {
|
|
206
|
+
DFS[abs(ditem) - 1].seq_ID.push_back(items.size() - 1);
|
|
207
|
+
DFS[abs(ditem) - 1].str_pnt.push_back(items.back().size() - 1);
|
|
208
|
+
++DFS[abs(ditem) - 1].freq;
|
|
209
|
+
counted[abs(ditem) - 1] = true;
|
|
210
|
+
}
|
|
211
|
+
++size_m;
|
|
212
|
+
}
|
|
213
|
+
E += size_m;
|
|
214
|
+
M = max<unsigned int>(M, size_m);
|
|
215
|
+
}
|
|
216
|
+
return true;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
} // namespace largepp // ─── END namespace ──────────────────────
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <string>
|
|
5
|
+
#include <fstream>
|
|
6
|
+
#include <map>
|
|
7
|
+
#include <pybind11/pybind11.h>
|
|
8
|
+
|
|
9
|
+
namespace largepp {
|
|
10
|
+
using namespace std;
|
|
11
|
+
|
|
12
|
+
// ───── public entry points ───────────────────────────────────────
|
|
13
|
+
bool Load_instance(string& items_file, double thresh);
|
|
14
|
+
void Load_py(const pybind11::object& py_data, double thresh);
|
|
15
|
+
|
|
16
|
+
// ───── shared state (defined once in load_inst.cpp) ──────────────
|
|
17
|
+
extern vector<vector<int>> items; // encoded database
|
|
18
|
+
extern string out_file;
|
|
19
|
+
|
|
20
|
+
extern bool b_disp, b_write, use_dic, just_build, ovr_count, pre_pro;
|
|
21
|
+
|
|
22
|
+
extern unsigned int M, L, time_limit;
|
|
23
|
+
extern unsigned long long N; // # sequences
|
|
24
|
+
extern double theta; // support threshold
|
|
25
|
+
extern unsigned long long E; // total entries
|
|
26
|
+
extern clock_t start_time;
|
|
27
|
+
|
|
28
|
+
} // namespace largepp
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#include "utility.hpp"
|
|
2
|
+
#include <string>
|
|
3
|
+
|
|
4
|
+
namespace largepp {
|
|
5
|
+
|
|
6
|
+
// ─── instantiate the globals declared in the header ─────────────
|
|
7
|
+
bool b_disp = false;
|
|
8
|
+
bool b_write = false;
|
|
9
|
+
bool use_dic = false;
|
|
10
|
+
bool just_build = false;
|
|
11
|
+
bool ovr_count = false;
|
|
12
|
+
bool pre_pro = false;
|
|
13
|
+
bool use_list = true; // large-prefix flag the binder toggles
|
|
14
|
+
unsigned int time_limit = 36000;
|
|
15
|
+
std::string out_file;
|
|
16
|
+
std::vector<std::vector<int>> collected; // mined pattern output
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
std::clock_t start_time = 0;
|
|
20
|
+
|
|
21
|
+
// ─── helper implementations ─────────────────────────────────────
|
|
22
|
+
void ClearCollected() { collected.clear(); }
|
|
23
|
+
|
|
24
|
+
const std::vector<std::vector<int>>& GetCollected()
|
|
25
|
+
{
|
|
26
|
+
return collected;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
double give_time(std::clock_t ticks)
|
|
30
|
+
{
|
|
31
|
+
return static_cast<double>(ticks) / CLOCKS_PER_SEC;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
} // namespace largepp
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <vector>
|
|
3
|
+
#include <ctime>
|
|
4
|
+
#include <string>
|
|
5
|
+
|
|
6
|
+
namespace largepp {
|
|
7
|
+
|
|
8
|
+
// Flag & option globals (only declare here – actual values in utility.cpp)
|
|
9
|
+
extern bool b_disp, b_write, use_dic, just_build, ovr_count, pre_pro;
|
|
10
|
+
extern bool use_list; // ← NEW (large-prefix needs this)
|
|
11
|
+
extern unsigned int time_limit;
|
|
12
|
+
|
|
13
|
+
// Pattern buffer that _effspm.cpp_ returns to Python
|
|
14
|
+
extern std::vector<std::vector<int>> collected;
|
|
15
|
+
|
|
16
|
+
// Helper functions every source file uses
|
|
17
|
+
void ClearCollected(); // wipe buffer
|
|
18
|
+
const std::vector<std::vector<int>>& GetCollected(); // read buffer
|
|
19
|
+
double give_time(std::clock_t ticks); // secs from clocks
|
|
20
|
+
|
|
21
|
+
} // namespace largepp
|
effspm/load_inst.cpp
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
#include<iostream>
|
|
2
|
+
#include <sstream>
|
|
3
|
+
#include <algorithm>
|
|
4
|
+
#include "load_inst.hpp"
|
|
5
|
+
#include "freq_miner.hpp"
|
|
6
|
+
#include "utility.hpp"
|
|
7
|
+
#include <math.h>
|
|
8
|
+
|
|
9
|
+
using namespace std;
|
|
10
|
+
|
|
11
|
+
unsigned int M = 0, L = 0;
|
|
12
|
+
unsigned long long int N = 0, E = 0, theta;
|
|
13
|
+
|
|
14
|
+
vector<vector<int>> items;
|
|
15
|
+
vector<Pattern> DFS;
|
|
16
|
+
vector<int> item_dic;
|
|
17
|
+
|
|
18
|
+
bool Load_items(string& inst);
|
|
19
|
+
void Load_items_pre(string& inst);
|
|
20
|
+
bool Preprocess(string& inst, double thresh);
|
|
21
|
+
|
|
22
|
+
bool Load_instance(string &items_file, double thresh) {
|
|
23
|
+
|
|
24
|
+
clock_t kk = clock();
|
|
25
|
+
if (pre_pro) {
|
|
26
|
+
if(!Preprocess(items_file, thresh))
|
|
27
|
+
return 0;
|
|
28
|
+
|
|
29
|
+
cout << "\nPreprocess done in " << give_time(clock() - kk) << " seconds\n\n";
|
|
30
|
+
|
|
31
|
+
DFS.reserve(L);
|
|
32
|
+
for (int i = 0; i < L; ++i)
|
|
33
|
+
DFS.emplace_back(-i - 1);
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
kk = clock();
|
|
37
|
+
|
|
38
|
+
Load_items_pre(items_file);
|
|
39
|
+
|
|
40
|
+
N = items.size();
|
|
41
|
+
|
|
42
|
+
}
|
|
43
|
+
else if (!Load_items(items_file))
|
|
44
|
+
return 0;
|
|
45
|
+
else {
|
|
46
|
+
if (thresh < 1)
|
|
47
|
+
theta = ceil(thresh * N);
|
|
48
|
+
else
|
|
49
|
+
theta = thresh;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
|
|
53
|
+
|
|
54
|
+
cout << "Found " << N << " sequence, with max line len " << M << ", and " << L << " items, and " << E << " enteries\n";
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
return 1;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
bool Preprocess(string &inst, double thresh) {
|
|
61
|
+
|
|
62
|
+
ifstream file(inst);
|
|
63
|
+
|
|
64
|
+
vector<unsigned int> freq(1000000);
|
|
65
|
+
vector<unsigned int> counted(1000000, 0);
|
|
66
|
+
|
|
67
|
+
if (file.good()) {
|
|
68
|
+
string line;
|
|
69
|
+
int ditem;
|
|
70
|
+
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
71
|
+
++N;
|
|
72
|
+
istringstream word(line);
|
|
73
|
+
string itm;
|
|
74
|
+
while (word >> itm) {
|
|
75
|
+
ditem = stoi(itm);
|
|
76
|
+
if (L < abs(ditem))
|
|
77
|
+
L = abs(ditem);
|
|
78
|
+
|
|
79
|
+
if (freq.size() < L) {
|
|
80
|
+
freq.reserve(L);
|
|
81
|
+
counted.reserve(L);
|
|
82
|
+
while (freq.size() < L) {
|
|
83
|
+
freq.push_back(0);
|
|
84
|
+
counted.push_back(0);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if (counted[abs(ditem) - 1] != N) {
|
|
89
|
+
++freq[abs(ditem) - 1];
|
|
90
|
+
counted[abs(ditem) - 1] = N;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
|
|
97
|
+
return 0;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if (thresh < 1)
|
|
101
|
+
theta = ceil(thresh * N);
|
|
102
|
+
else
|
|
103
|
+
theta = thresh;
|
|
104
|
+
|
|
105
|
+
int real_L = 0;
|
|
106
|
+
item_dic = vector<int>(L, -1);
|
|
107
|
+
for (int i = 0; i < L; ++i) {
|
|
108
|
+
if (freq[i] >= theta)
|
|
109
|
+
item_dic[i] = ++real_L;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
cout << "Original number of items: " << L << " Reduced to: " << real_L << endl;
|
|
113
|
+
|
|
114
|
+
L = real_L;
|
|
115
|
+
N = 0;
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
return 1;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
void Load_items_pre(string &inst) {
|
|
123
|
+
|
|
124
|
+
ifstream file(inst);
|
|
125
|
+
|
|
126
|
+
if (file.good()) {
|
|
127
|
+
string line;
|
|
128
|
+
int size_m;
|
|
129
|
+
int ditem;
|
|
130
|
+
bool empty_seq = 0;
|
|
131
|
+
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
132
|
+
vector<bool> counted(L, 0);
|
|
133
|
+
istringstream word(line);
|
|
134
|
+
if (!empty_seq) {
|
|
135
|
+
vector<int> temp;
|
|
136
|
+
items.push_back(temp);
|
|
137
|
+
}
|
|
138
|
+
string itm;
|
|
139
|
+
size_m = 0;
|
|
140
|
+
bool sgn = 0;
|
|
141
|
+
empty_seq = 1;
|
|
142
|
+
while (word >> itm) {
|
|
143
|
+
|
|
144
|
+
ditem = stoi(itm);
|
|
145
|
+
|
|
146
|
+
if (item_dic[abs(ditem) - 1] == -1) {
|
|
147
|
+
if (!sgn)
|
|
148
|
+
sgn = ditem < 0;
|
|
149
|
+
continue;
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
if (ditem > 0)
|
|
153
|
+
ditem = item_dic[ditem - 1];
|
|
154
|
+
else
|
|
155
|
+
ditem = -item_dic[-ditem - 1];
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
empty_seq = 0;
|
|
159
|
+
|
|
160
|
+
if (sgn) {
|
|
161
|
+
if (ditem > 0)
|
|
162
|
+
ditem = -ditem;
|
|
163
|
+
sgn = 0;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
items.back().push_back(ditem);
|
|
167
|
+
|
|
168
|
+
if (!counted[abs(ditem) - 1]) {
|
|
169
|
+
DFS[abs(ditem) - 1].seq_ID.push_back(items.size() - 1);
|
|
170
|
+
DFS[abs(ditem) - 1].str_pnt.push_back(items.back().size() - 1);
|
|
171
|
+
++DFS[abs(ditem) - 1].freq;
|
|
172
|
+
counted[abs(ditem) - 1] = 1;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
++size_m;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (empty_seq)
|
|
179
|
+
continue;
|
|
180
|
+
|
|
181
|
+
++N;
|
|
182
|
+
|
|
183
|
+
E += size_m;
|
|
184
|
+
|
|
185
|
+
if (size_m > M)
|
|
186
|
+
M = size_m;
|
|
187
|
+
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
bool Load_items(string &inst) {
|
|
193
|
+
|
|
194
|
+
ifstream file(inst);
|
|
195
|
+
|
|
196
|
+
if (file.good()) {
|
|
197
|
+
string line;
|
|
198
|
+
int size_m;
|
|
199
|
+
int ditem;
|
|
200
|
+
while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
|
|
201
|
+
++N;
|
|
202
|
+
vector<bool> counted(L, 0);
|
|
203
|
+
istringstream word(line);
|
|
204
|
+
items.emplace_back();
|
|
205
|
+
string itm;
|
|
206
|
+
size_m = 0;
|
|
207
|
+
while (word >> itm) {
|
|
208
|
+
ditem = stoi(itm);
|
|
209
|
+
if (L < abs(ditem)) {
|
|
210
|
+
L = abs(ditem);
|
|
211
|
+
while (DFS.size() < L) {
|
|
212
|
+
DFS.emplace_back(-DFS.size() - 1);
|
|
213
|
+
counted.push_back(0);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
items.back().push_back(ditem);
|
|
218
|
+
|
|
219
|
+
if (!counted[abs(ditem) - 1]) {
|
|
220
|
+
DFS[abs(ditem) - 1].seq_ID.push_back(items.size() - 1);
|
|
221
|
+
DFS[abs(ditem) - 1].str_pnt.push_back(items.back().size() - 1);
|
|
222
|
+
++DFS[abs(ditem) - 1].freq;
|
|
223
|
+
counted[abs(ditem) - 1] = 1;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
++size_m;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
E += size_m;
|
|
230
|
+
|
|
231
|
+
if (size_m > M)
|
|
232
|
+
M = size_m;
|
|
233
|
+
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
else {
|
|
237
|
+
cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
|
|
238
|
+
return 0;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return 1;
|
|
242
|
+
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
|
effspm/load_inst.hpp
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
// effspm/load_inst.hpp
|
|
2
|
+
#pragma once
|
|
3
|
+
|
|
4
|
+
#include <vector>
|
|
5
|
+
#include <string>
|
|
6
|
+
#include <fstream>
|
|
7
|
+
#include <map>
|
|
8
|
+
#include <ctime> // for clock_t
|
|
9
|
+
|
|
10
|
+
using namespace std;
|
|
11
|
+
|
|
12
|
+
// ------------------------------------------------------------
|
|
13
|
+
// forward declare Pattern (defined in freq_miner.hpp)
|
|
14
|
+
class Pattern;
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
// Main entrypoint: load your file on disk into 'items', build DFS, theta, etc.
|
|
18
|
+
bool Load_instance(string &items_file, double thresh);
|
|
19
|
+
|
|
20
|
+
// storage & globals shared between the C++-CLI & Python bindings
|
|
21
|
+
extern vector<vector<int>> items;
|
|
22
|
+
extern vector<Pattern> DFS; // now Pattern is known
|
|
23
|
+
extern vector<int> item_dic;
|
|
24
|
+
|
|
25
|
+
extern string out_file;
|
|
26
|
+
extern bool b_disp, b_write, use_dic, use_list, pre_pro;
|
|
27
|
+
|
|
28
|
+
extern unsigned int M, L, time_limit;
|
|
29
|
+
extern unsigned long long N, E, theta; // E = total number of entries
|
|
30
|
+
|
|
31
|
+
extern clock_t start_time;
|
effspm/utility.cpp
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#include "utility.hpp"
|
|
2
|
+
#include <iostream>
|
|
3
|
+
#include <fstream>
|
|
4
|
+
|
|
5
|
+
// timing
|
|
6
|
+
std::clock_t start_time;
|
|
7
|
+
|
|
8
|
+
// flags
|
|
9
|
+
bool b_disp = false, b_write = false, use_dic = false, use_list = false, pre_pro = false;
|
|
10
|
+
unsigned int time_limit = 10 * 3600;
|
|
11
|
+
std::string out_file;
|
|
12
|
+
|
|
13
|
+
// storage for Python
|
|
14
|
+
static std::vector<std::vector<int>> collected_patterns;
|
|
15
|
+
|
|
16
|
+
double give_time(std::clock_t end_time) {
|
|
17
|
+
return static_cast<double>(end_time) / CLOCKS_PER_SEC;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
void ClearCollected() {
|
|
21
|
+
collected_patterns.clear();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const std::vector<std::vector<int>>& GetCollected() {
|
|
25
|
+
return collected_patterns;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// collect for Python
|
|
29
|
+
void CollectPattern(const std::vector<int>& seq) {
|
|
30
|
+
collected_patterns.push_back(seq);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// non-const overload forwards to const version
|
|
34
|
+
void Out_patt(std::vector<int>& seq, unsigned int freq) {
|
|
35
|
+
Out_patt(static_cast<const std::vector<int>&>(seq), freq);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// actual implementation
|
|
39
|
+
void Out_patt(const std::vector<int>& seq, unsigned int freq) {
|
|
40
|
+
// 1) collect for Python
|
|
41
|
+
CollectPattern(seq);
|
|
42
|
+
|
|
43
|
+
// 2) optional console output
|
|
44
|
+
if (b_disp) {
|
|
45
|
+
for (int x : seq) std::cout << x << ' ';
|
|
46
|
+
std::cout << "\n************** Freq: " << freq << "\n";
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// 3) optional file output
|
|
50
|
+
if (b_write) {
|
|
51
|
+
std::ofstream ofs(out_file, std::ios::app);
|
|
52
|
+
for (int x : seq) ofs << x << ' ';
|
|
53
|
+
ofs << "\n************** Freq: " << freq << "\n";
|
|
54
|
+
}
|
|
55
|
+
}
|
effspm/utility.hpp
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#ifndef UTILITY_HPP
|
|
2
|
+
#define UTILITY_HPP
|
|
3
|
+
|
|
4
|
+
#include <vector>
|
|
5
|
+
#include <string>
|
|
6
|
+
#include <ctime>
|
|
7
|
+
|
|
8
|
+
// timing
|
|
9
|
+
extern std::clock_t start_time;
|
|
10
|
+
double give_time(std::clock_t end_time);
|
|
11
|
+
|
|
12
|
+
// flags (shared with main.cpp)
|
|
13
|
+
extern bool b_disp, b_write, use_dic, use_list, pre_pro;
|
|
14
|
+
extern unsigned int time_limit;
|
|
15
|
+
extern std::string out_file;
|
|
16
|
+
|
|
17
|
+
// Python-binding collection
|
|
18
|
+
void ClearCollected();
|
|
19
|
+
const std::vector<std::vector<int>>& GetCollected();
|
|
20
|
+
|
|
21
|
+
// pattern collection & output
|
|
22
|
+
void CollectPattern(const std::vector<int>& seq);
|
|
23
|
+
|
|
24
|
+
// two overloads of Out_patt so calls with non‑const or const vectors both link
|
|
25
|
+
void Out_patt(std::vector<int>& seq, unsigned int freq);
|
|
26
|
+
void Out_patt(const std::vector<int>& seq, unsigned int freq);
|
|
27
|
+
|
|
28
|
+
#endif // UTILITY_HPP
|
|
29
|
+
|