effspm 0.3.0__cp310-cp310-macosx_11_0_arm64.whl → 0.3.3__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/_effspm.cpp +683 -2
- effspm/_effspm.cpython-310-darwin.so +0 -0
- effspm/btminer/src/load_inst.cpp +21 -11
- effspm/btminer/src/main.cpp +83 -0
- effspm/htminer/src/build_mdd.cpp +41 -66
- effspm/htminer/src/build_mdd.hpp +56 -49
- effspm/htminer/src/freq_miner.cpp +341 -307
- effspm/htminer/src/freq_miner.hpp +39 -40
- effspm/htminer/src/load_inst.cpp +287 -336
- effspm/htminer/src/load_inst.hpp +23 -6
- effspm/htminer/src/main.cpp +97 -0
- effspm/htminer/src/utility.cpp +38 -57
- effspm/htminer/src/utility.hpp +9 -64
- effspm/largebm/src/main.cpp +95 -0
- effspm/largehm/src/build_mdd.cpp +75 -110
- effspm/largehm/src/build_mdd.hpp +53 -73
- effspm/largehm/src/freq_miner.cpp +132 -173
- effspm/largehm/src/freq_miner.hpp +37 -60
- effspm/largehm/src/load_inst.cpp +136 -191
- effspm/largehm/src/load_inst.hpp +13 -50
- effspm/largehm/src/main.cpp +95 -0
- effspm/largehm/src/utility.cpp +46 -28
- effspm/largehm/src/utility.hpp +18 -16
- effspm/largepp/src/load_inst.cpp +5 -4
- effspm/largepp/src/main.cpp +108 -0
- effspm/load_inst.cpp +8 -8
- effspm/main.cpp +103 -0
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
- effspm-0.3.3.dist-info/RECORD +60 -0
- effspm-0.3.0.dist-info/RECORD +0 -54
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.3.0.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
effspm/_effspm.cpp
CHANGED
|
@@ -2,6 +2,686 @@
|
|
|
2
2
|
|
|
3
3
|
#include <pybind11/pybind11.h>
|
|
4
4
|
#include <pybind11/stl.h>
|
|
5
|
+
|
|
6
|
+
#include <iostream>
|
|
7
|
+
#include <fstream>
|
|
8
|
+
#include <cstdio> // std::remove
|
|
9
|
+
#include <vector>
|
|
10
|
+
#include <string>
|
|
11
|
+
#include <ctime>
|
|
12
|
+
#include <cmath>
|
|
13
|
+
|
|
14
|
+
namespace py = pybind11;
|
|
15
|
+
|
|
16
|
+
// PrefixProjection headers (global namespace)
|
|
17
|
+
#include "freq_miner.hpp"
|
|
18
|
+
#include "load_inst.hpp"
|
|
19
|
+
#include "utility.hpp"
|
|
20
|
+
|
|
21
|
+
// BTMiner (namespaced)
|
|
22
|
+
#include "btminer/src/freq_miner.hpp"
|
|
23
|
+
#include "btminer/src/load_inst.hpp"
|
|
24
|
+
#include "btminer/src/utility.hpp"
|
|
25
|
+
#include "btminer/src/build_mdd.hpp"
|
|
26
|
+
|
|
27
|
+
// HTMiner (namespaced)
|
|
28
|
+
#include "htminer/src/build_mdd.hpp"
|
|
29
|
+
#include "htminer/src/freq_miner.hpp"
|
|
30
|
+
#include "htminer/src/load_inst.hpp"
|
|
31
|
+
#include "htminer/src/utility.hpp"
|
|
32
|
+
|
|
33
|
+
// LargePrefixProjection
|
|
34
|
+
#include "largepp/src/freq_miner.hpp"
|
|
35
|
+
#include "largepp/src/load_inst.hpp"
|
|
36
|
+
#include "largepp/src/utility.hpp"
|
|
37
|
+
|
|
38
|
+
// LargeBTMiner
|
|
39
|
+
#include "largebm/src/freq_miner.hpp"
|
|
40
|
+
#include "largebm/src/load_inst.hpp"
|
|
41
|
+
#include "largebm/src/utility.hpp"
|
|
42
|
+
#include "largebm/src/build_mdd.hpp"
|
|
43
|
+
|
|
44
|
+
// LargeHTMiner
|
|
45
|
+
#include "largehm/src/freq_miner.hpp"
|
|
46
|
+
#include "largehm/src/load_inst.hpp"
|
|
47
|
+
#include "largehm/src/utility.hpp"
|
|
48
|
+
#include "largehm/src/build_mdd.hpp"
|
|
49
|
+
|
|
50
|
+
namespace {
|
|
51
|
+
|
|
52
|
+
// RAII helper for temp file
|
|
53
|
+
struct TempFile {
|
|
54
|
+
std::string path;
|
|
55
|
+
~TempFile() {
|
|
56
|
+
if (!path.empty()) {
|
|
57
|
+
std::remove(path.c_str());
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
// Write Python list[list[int]] to a temp file in professor’s format:
|
|
63
|
+
// one sequence per line, items separated by spaces.
|
|
64
|
+
std::string write_temp_seq_file(const std::vector<std::vector<int>>& seqs) {
|
|
65
|
+
char tmp_name[L_tmpnam];
|
|
66
|
+
if (!std::tmpnam(tmp_name)) {
|
|
67
|
+
throw std::runtime_error("Failed to create temporary file name");
|
|
68
|
+
}
|
|
69
|
+
std::string path = std::string(tmp_name) + ".txt";
|
|
70
|
+
|
|
71
|
+
std::ofstream ofs(path);
|
|
72
|
+
if (!ofs) {
|
|
73
|
+
throw std::runtime_error("Failed to open temporary file for writing: " + path);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
for (const auto& seq : seqs) {
|
|
77
|
+
for (size_t i = 0; i < seq.size(); ++i) {
|
|
78
|
+
if (i) ofs << ' ';
|
|
79
|
+
ofs << seq[i];
|
|
80
|
+
}
|
|
81
|
+
ofs << '\n';
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
ofs.close();
|
|
85
|
+
return path;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
} // anonymous namespace
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
PYBIND11_MODULE(_effspm, m) {
|
|
92
|
+
m.doc() = "Unified SPM library: PrefixProjection, BTMiner, HTMiner, Large* variants";
|
|
93
|
+
|
|
94
|
+
// ─────────────────────────────────────────────────────────────
|
|
95
|
+
// PrefixProjection (works directly on Python lists or files)
|
|
96
|
+
// ─────────────────────────────────────────────────────────────
|
|
97
|
+
m.def("PrefixProjection",
|
|
98
|
+
[](py::object data,
|
|
99
|
+
double minsup,
|
|
100
|
+
unsigned int time_limit,
|
|
101
|
+
bool preproc,
|
|
102
|
+
bool use_dic,
|
|
103
|
+
bool verbose,
|
|
104
|
+
const std::string &out_file)
|
|
105
|
+
{
|
|
106
|
+
::time_limit = time_limit;
|
|
107
|
+
::pre_pro = preproc;
|
|
108
|
+
::use_dic = use_dic;
|
|
109
|
+
::use_list = false;
|
|
110
|
+
::b_disp = verbose; // controls prints in original code
|
|
111
|
+
::b_write = !out_file.empty();
|
|
112
|
+
::out_file = out_file;
|
|
113
|
+
|
|
114
|
+
ClearCollected();
|
|
115
|
+
start_time = std::clock();
|
|
116
|
+
|
|
117
|
+
if (py::isinstance<py::str>(data)) {
|
|
118
|
+
std::string path = data.cast<std::string>();
|
|
119
|
+
if (!Load_instance(path, minsup))
|
|
120
|
+
throw std::runtime_error("PrefixProjection: failed to load file: " + path);
|
|
121
|
+
} else {
|
|
122
|
+
auto seqs = data.cast<std::vector<std::vector<int>>>();
|
|
123
|
+
items = std::move(seqs);
|
|
124
|
+
N = items.size();
|
|
125
|
+
|
|
126
|
+
int max_id = 0;
|
|
127
|
+
for (auto &seq : items)
|
|
128
|
+
for (int x : seq)
|
|
129
|
+
max_id = std::max(max_id, std::abs(x));
|
|
130
|
+
L = max_id;
|
|
131
|
+
|
|
132
|
+
theta = (minsup < 1.0) ? std::ceil(minsup * N) : minsup;
|
|
133
|
+
|
|
134
|
+
DFS.clear();
|
|
135
|
+
DFS.reserve(L);
|
|
136
|
+
for (unsigned int i = 0; i < L; ++i)
|
|
137
|
+
DFS.emplace_back(-static_cast<int>(i) - 1);
|
|
138
|
+
|
|
139
|
+
M = 0;
|
|
140
|
+
E = 0;
|
|
141
|
+
for (auto &seq : items) {
|
|
142
|
+
M = std::max<unsigned int>(M, seq.size());
|
|
143
|
+
E += seq.size();
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
Freq_miner();
|
|
148
|
+
|
|
149
|
+
py::dict out;
|
|
150
|
+
out["patterns"] = GetCollected();
|
|
151
|
+
out["time"] = give_time(std::clock() - start_time);
|
|
152
|
+
return out;
|
|
153
|
+
},
|
|
154
|
+
py::arg("data"),
|
|
155
|
+
py::arg("minsup") = 0.01,
|
|
156
|
+
py::arg("time_limit") = 36000,
|
|
157
|
+
py::arg("preproc") = false,
|
|
158
|
+
py::arg("use_dic") = false,
|
|
159
|
+
py::arg("verbose") = false,
|
|
160
|
+
py::arg("out_file") = ""
|
|
161
|
+
);
|
|
162
|
+
|
|
163
|
+
// ─────────────────────────────────────────────────────────────
|
|
164
|
+
// BTMiner (always uses professor's Load_instance)
|
|
165
|
+
// ─────────────────────────────────────────────────────────────
|
|
166
|
+
// ─────────────────────────────────────────────────────────────
|
|
167
|
+
// BTMiner (always uses professor's Load_instance)
|
|
168
|
+
// ─────────────────────────────────────────────────────────────
|
|
169
|
+
/*m.def("BTMiner",
|
|
170
|
+
[](py::object data,
|
|
171
|
+
double minsup,
|
|
172
|
+
unsigned int time_limit,
|
|
173
|
+
bool preproc,
|
|
174
|
+
bool use_dic,
|
|
175
|
+
bool verbose,
|
|
176
|
+
const std::string &out_file)
|
|
177
|
+
{
|
|
178
|
+
// 1) Configure professor globals
|
|
179
|
+
btminer::time_limit = static_cast<int>(time_limit);
|
|
180
|
+
btminer::pre_pro = preproc;
|
|
181
|
+
btminer::use_dic = use_dic;
|
|
182
|
+
btminer::b_disp = verbose;
|
|
183
|
+
btminer::b_write = !out_file.empty();
|
|
184
|
+
btminer::out_file = out_file;
|
|
185
|
+
btminer::N_mult = 1;
|
|
186
|
+
btminer::M_mult = 1;
|
|
187
|
+
btminer::just_build = false;
|
|
188
|
+
|
|
189
|
+
// 2) HARD RESET of *known* global state for BTMiner
|
|
190
|
+
// (Only touch what we know exists in btminer namespace)
|
|
191
|
+
btminer::ClearCollected(); // clear collected patterns
|
|
192
|
+
btminer::Tree.clear(); // clear MDD tree
|
|
193
|
+
btminer::DFS.clear(); // clear DFS patterns
|
|
194
|
+
|
|
195
|
+
btminer::M = 0;
|
|
196
|
+
btminer::L = 0;
|
|
197
|
+
btminer::N = 0;
|
|
198
|
+
btminer::theta = 0;
|
|
199
|
+
btminer::E = 0;
|
|
200
|
+
btminer::num_patt = 0; // reset pattern counter if defined
|
|
201
|
+
|
|
202
|
+
// NOTE: we do NOT reinsert root here; btminer::Load_instance()
|
|
203
|
+
// is responsible for calling Tree.emplace_back(0,0,0) as needed.
|
|
204
|
+
|
|
205
|
+
btminer::start_time = std::clock();
|
|
206
|
+
|
|
207
|
+
// 3) Handle input (path or list-of-lists)
|
|
208
|
+
TempFile tmp;
|
|
209
|
+
std::string path;
|
|
210
|
+
|
|
211
|
+
if (py::isinstance<py::str>(data)) {
|
|
212
|
+
// File path: use directly
|
|
213
|
+
path = data.cast<std::string>();
|
|
214
|
+
} else {
|
|
215
|
+
// Python list → write to a temp file in professor’s format
|
|
216
|
+
auto seqs = data.cast<std::vector<std::vector<int>>>();
|
|
217
|
+
tmp.path = write_temp_seq_file(seqs);
|
|
218
|
+
path = tmp.path;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if (verbose) {
|
|
222
|
+
std::cerr << "[BTMiner] path=" << path
|
|
223
|
+
<< " minsup=" << minsup
|
|
224
|
+
<< " preproc=" << preproc
|
|
225
|
+
<< " use_dic=" << use_dic
|
|
226
|
+
<< std::endl;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// 4) Build MDD + run miner
|
|
230
|
+
if (!btminer::Load_instance(path, minsup)) {
|
|
231
|
+
throw std::runtime_error("BTMiner: failed to load instance from: " + path);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
btminer::Freq_miner();
|
|
235
|
+
|
|
236
|
+
// 5) Return results
|
|
237
|
+
py::dict out;
|
|
238
|
+
out["patterns"] = btminer::GetCollected();
|
|
239
|
+
out["num_patterns"] = btminer::num_patt;
|
|
240
|
+
out["time"] = btminer::give_time(std::clock() - btminer::start_time);
|
|
241
|
+
out["N"] = btminer::N;
|
|
242
|
+
out["L"] = btminer::L;
|
|
243
|
+
out["theta"] = btminer::theta;
|
|
244
|
+
return out;
|
|
245
|
+
},
|
|
246
|
+
py::arg("data"),
|
|
247
|
+
py::arg("minsup") = 0.01,
|
|
248
|
+
py::arg("time_limit") = 36000,
|
|
249
|
+
py::arg("preproc") = false,
|
|
250
|
+
py::arg("use_dic") = false,
|
|
251
|
+
py::arg("verbose") = false,
|
|
252
|
+
py::arg("out_file") = ""
|
|
253
|
+
); */
|
|
254
|
+
m.def("BTMiner",
|
|
255
|
+
[](py::object data,
|
|
256
|
+
double minsup,
|
|
257
|
+
unsigned int time_limit,
|
|
258
|
+
bool preproc,
|
|
259
|
+
bool use_dic,
|
|
260
|
+
bool verbose,
|
|
261
|
+
const std::string &out_file)
|
|
262
|
+
{
|
|
263
|
+
// 1) Configure professor globals
|
|
264
|
+
btminer::time_limit = static_cast<int>(time_limit);
|
|
265
|
+
btminer::pre_pro = preproc;
|
|
266
|
+
btminer::use_dic = use_dic;
|
|
267
|
+
btminer::b_disp = verbose;
|
|
268
|
+
btminer::b_write = !out_file.empty();
|
|
269
|
+
btminer::out_file = out_file;
|
|
270
|
+
btminer::N_mult = 1;
|
|
271
|
+
btminer::M_mult = 1;
|
|
272
|
+
btminer::just_build = false;
|
|
273
|
+
|
|
274
|
+
// 2) HARD RESET of *known* global state for BTMiner
|
|
275
|
+
btminer::ClearCollected(); // clear collected patterns
|
|
276
|
+
btminer::Tree.clear(); // clear MDD tree
|
|
277
|
+
btminer::DFS.clear(); // clear DFS patterns
|
|
278
|
+
|
|
279
|
+
// clear all frequency / mapping / item structures
|
|
280
|
+
btminer::freq.clear();
|
|
281
|
+
btminer::item_dic.clear();
|
|
282
|
+
btminer::item_map.clear();
|
|
283
|
+
btminer::item_map_rev.clear();
|
|
284
|
+
btminer::items.clear(); // if you have this defined anywhere
|
|
285
|
+
|
|
286
|
+
// reset scalar globals
|
|
287
|
+
btminer::M = 0;
|
|
288
|
+
btminer::L = 0;
|
|
289
|
+
btminer::N = 0;
|
|
290
|
+
btminer::theta = 0;
|
|
291
|
+
btminer::E = 0;
|
|
292
|
+
btminer::num_patt = 0;
|
|
293
|
+
btminer::num_nodes = 0;
|
|
294
|
+
btminer::cur_node = 0;
|
|
295
|
+
// N_mult, M_mult, flags are set just above
|
|
296
|
+
|
|
297
|
+
btminer::start_time = std::clock();
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
// 3) Handle input (path or list-of-lists)
|
|
301
|
+
TempFile tmp;
|
|
302
|
+
std::string path;
|
|
303
|
+
|
|
304
|
+
if (py::isinstance<py::str>(data)) {
|
|
305
|
+
// File path: use directly
|
|
306
|
+
path = data.cast<std::string>();
|
|
307
|
+
} else {
|
|
308
|
+
// Python list → write to a temp file in professor’s format
|
|
309
|
+
auto seqs = data.cast<std::vector<std::vector<int>>>();
|
|
310
|
+
tmp.path = write_temp_seq_file(seqs);
|
|
311
|
+
path = tmp.path;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
if (verbose) {
|
|
315
|
+
std::cerr << "[BTMiner] path=" << path
|
|
316
|
+
<< " minsup=" << minsup
|
|
317
|
+
<< " preproc=" << preproc
|
|
318
|
+
<< " use_dic=" << use_dic
|
|
319
|
+
<< std::endl;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// 4) Build MDD + run miner
|
|
323
|
+
if (!btminer::Load_instance(path, minsup)) {
|
|
324
|
+
throw std::runtime_error("BTMiner: failed to load instance from: " + path);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
btminer::Freq_miner();
|
|
328
|
+
|
|
329
|
+
// 5) Return results
|
|
330
|
+
py::dict out;
|
|
331
|
+
out["patterns"] = btminer::GetCollected();
|
|
332
|
+
out["num_patterns"] = btminer::num_patt;
|
|
333
|
+
out["time"] = btminer::give_time(std::clock() - btminer::start_time);
|
|
334
|
+
out["N"] = btminer::N;
|
|
335
|
+
out["L"] = btminer::L;
|
|
336
|
+
out["theta"] = btminer::theta;
|
|
337
|
+
return out;
|
|
338
|
+
},
|
|
339
|
+
py::arg("data"),
|
|
340
|
+
py::arg("minsup") = 0.01,
|
|
341
|
+
py::arg("time_limit") = 36000,
|
|
342
|
+
py::arg("preproc") = false,
|
|
343
|
+
py::arg("use_dic") = false,
|
|
344
|
+
py::arg("verbose") = false,
|
|
345
|
+
py::arg("out_file") = ""
|
|
346
|
+
);
|
|
347
|
+
|
|
348
|
+
// ─────────────────────────────────────────────────────────────
|
|
349
|
+
// HTMiner (works on files; we use a temp file for in-memory data)
|
|
350
|
+
// ─────────────────────────────────────────────────────────────
|
|
351
|
+
// ─────────────────────────────────────────────────────────────
|
|
352
|
+
// HTMiner (always uses professor's Load_instance; pre_pro forced ON)
|
|
353
|
+
// ─────────────────────────────────────────────────────────────
|
|
354
|
+
m.def("HTMiner",
|
|
355
|
+
[](py::object data,
|
|
356
|
+
double minsup,
|
|
357
|
+
unsigned int time_limit,
|
|
358
|
+
bool /*preproc*/, // Python arg is ignored internally
|
|
359
|
+
bool use_dic,
|
|
360
|
+
bool verbose,
|
|
361
|
+
const std::string &out_file)
|
|
362
|
+
{
|
|
363
|
+
using namespace htminer;
|
|
364
|
+
|
|
365
|
+
// ───────── Global parameter setup ─────────
|
|
366
|
+
htminer::time_limit = time_limit;
|
|
367
|
+
|
|
368
|
+
// IMPORTANT: always run with preprocessing ON,
|
|
369
|
+
// regardless of the Python `preproc` flag.
|
|
370
|
+
htminer::pre_pro = true;
|
|
371
|
+
htminer::use_dic = use_dic;
|
|
372
|
+
htminer::just_build = false;
|
|
373
|
+
htminer::b_disp = verbose;
|
|
374
|
+
htminer::b_write = !out_file.empty();
|
|
375
|
+
htminer::out_file = out_file;
|
|
376
|
+
|
|
377
|
+
// ───────── HARD RESET of HTMiner globals ─────────
|
|
378
|
+
htminer::ClearCollected();
|
|
379
|
+
htminer::Tree.clear();
|
|
380
|
+
htminer::VTree.clear();
|
|
381
|
+
htminer::CTree.clear();
|
|
382
|
+
htminer::DFS.clear();
|
|
383
|
+
htminer::VDFS.clear();
|
|
384
|
+
htminer::item_dic.clear();
|
|
385
|
+
|
|
386
|
+
htminer::M = 0;
|
|
387
|
+
htminer::N = 0;
|
|
388
|
+
htminer::L = 0;
|
|
389
|
+
htminer::E = 0;
|
|
390
|
+
htminer::theta = 0;
|
|
391
|
+
htminer::mlim = 0;
|
|
392
|
+
htminer::itmset_exists = false;
|
|
393
|
+
|
|
394
|
+
// NOTE: do NOT add a root arc here;
|
|
395
|
+
// htminer::Load_instance() already does Tree.emplace_back(0,0,0)
|
|
396
|
+
htminer::start_time = std::clock();
|
|
397
|
+
|
|
398
|
+
// ───────── Handle input (path or in-memory sequences) ─────────
|
|
399
|
+
TempFile tmp;
|
|
400
|
+
std::string path;
|
|
401
|
+
|
|
402
|
+
if (py::isinstance<py::str>(data)) {
|
|
403
|
+
// data is a file path
|
|
404
|
+
path = data.cast<std::string>();
|
|
405
|
+
} else {
|
|
406
|
+
// data is a list[list[int]] → write a temp file in the same text format
|
|
407
|
+
auto seqs = data.cast<std::vector<std::vector<int>>>();
|
|
408
|
+
tmp.path = write_temp_seq_file(seqs);
|
|
409
|
+
path = tmp.path;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
if (verbose) {
|
|
413
|
+
std::cerr << "[HTMiner] path=" << path
|
|
414
|
+
<< " minsup=" << minsup
|
|
415
|
+
<< " preproc(always)=true"
|
|
416
|
+
<< " use_dic=" << use_dic
|
|
417
|
+
<< std::endl;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
// ───────── Build MDD via professor's loader ─────────
|
|
421
|
+
if (!htminer::Load_instance(path, minsup)) {
|
|
422
|
+
throw std::runtime_error("HTMiner: failed to load instance from: " + path);
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
// ───────── Run miner ─────────
|
|
426
|
+
htminer::Freq_miner();
|
|
427
|
+
|
|
428
|
+
// ───────── Return results ─────────
|
|
429
|
+
py::dict out;
|
|
430
|
+
out["patterns"] = htminer::GetCollected();
|
|
431
|
+
out["time"] = htminer::give_time(std::clock() - htminer::start_time);
|
|
432
|
+
return out;
|
|
433
|
+
},
|
|
434
|
+
py::arg("data"),
|
|
435
|
+
py::arg("minsup") = 0.01,
|
|
436
|
+
py::arg("time_limit") = 36000,
|
|
437
|
+
py::arg("preproc") = false, // kept for API symmetry, but IGNORED
|
|
438
|
+
py::arg("use_dic") = false,
|
|
439
|
+
py::arg("verbose") = false,
|
|
440
|
+
py::arg("out_file") = ""
|
|
441
|
+
);
|
|
442
|
+
|
|
443
|
+
// ─────────────────────────────────────────────────────────────
|
|
444
|
+
// LargePrefixProjection (already has its own Load_py)
|
|
445
|
+
// ─────────────────────────────────────────────────────────────
|
|
446
|
+
m.def("LargePrefixProjection",
|
|
447
|
+
[](py::object data,
|
|
448
|
+
double minsup,
|
|
449
|
+
unsigned int time_limit,
|
|
450
|
+
bool preproc,
|
|
451
|
+
bool use_dic,
|
|
452
|
+
bool verbose,
|
|
453
|
+
const std::string &out_file)
|
|
454
|
+
{
|
|
455
|
+
// 1) Configure global flags
|
|
456
|
+
largepp::time_limit = time_limit;
|
|
457
|
+
largepp::pre_pro = preproc;
|
|
458
|
+
largepp::use_dic = use_dic;
|
|
459
|
+
largepp::use_list = true; // LargePrefixProjection is list-based
|
|
460
|
+
largepp::b_disp = verbose;
|
|
461
|
+
largepp::b_write = !out_file.empty();
|
|
462
|
+
largepp::out_file = out_file;
|
|
463
|
+
largepp::just_build = false;
|
|
464
|
+
|
|
465
|
+
// 2) HARD RESET of largepp global state
|
|
466
|
+
// (only touch symbols that actually exist in largepp)
|
|
467
|
+
largepp::ClearCollected(); // clear previously collected patterns
|
|
468
|
+
|
|
469
|
+
// If these exist in largepp::load_inst.hpp / utility.hpp they’ll compile;
|
|
470
|
+
// if the compiler complains about any of them, just comment that line out.
|
|
471
|
+
largepp::items.clear(); // transaction DB
|
|
472
|
+
largepp::DFS.clear(); // DFS pattern stack, if list-based miner uses it
|
|
473
|
+
|
|
474
|
+
largepp::M = 0;
|
|
475
|
+
largepp::L = 0;
|
|
476
|
+
largepp::N = 0;
|
|
477
|
+
largepp::theta = 0;
|
|
478
|
+
largepp::E = 0;
|
|
479
|
+
largepp::num_patt = 0;
|
|
480
|
+
|
|
481
|
+
largepp::start_time = std::clock();
|
|
482
|
+
|
|
483
|
+
// 3) Handle input (path or Python list)
|
|
484
|
+
if (py::isinstance<py::str>(data)) {
|
|
485
|
+
std::string fname = data.cast<std::string>();
|
|
486
|
+
largepp::Load_instance(fname, minsup);
|
|
487
|
+
} else {
|
|
488
|
+
largepp::Load_py(data, minsup);
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// 4) Run miner
|
|
492
|
+
largepp::Freq_miner();
|
|
493
|
+
|
|
494
|
+
// 5) Return results
|
|
495
|
+
py::dict out;
|
|
496
|
+
out["patterns"] = largepp::GetCollected();
|
|
497
|
+
out["time"] = largepp::give_time(std::clock() - largepp::start_time);
|
|
498
|
+
return out;
|
|
499
|
+
},
|
|
500
|
+
py::arg("data"),
|
|
501
|
+
py::arg("minsup") = 0.01,
|
|
502
|
+
py::arg("time_limit") = 36000,
|
|
503
|
+
py::arg("preproc") = false,
|
|
504
|
+
py::arg("use_dic") = false,
|
|
505
|
+
py::arg("verbose") = false,
|
|
506
|
+
py::arg("out_file") = ""
|
|
507
|
+
);
|
|
508
|
+
|
|
509
|
+
// ─────────────────────────────────────────────────────────────
|
|
510
|
+
// LargeBTMiner (always uses professor's largebm::Load_instance)
|
|
511
|
+
// ─────────────────────────────────────────────────────────────
|
|
512
|
+
m.def("LargeBTMiner",
|
|
513
|
+
[](py::object data,
|
|
514
|
+
double minsup,
|
|
515
|
+
unsigned int time_limit,
|
|
516
|
+
bool preproc,
|
|
517
|
+
bool use_dic,
|
|
518
|
+
bool verbose,
|
|
519
|
+
const std::string &out_file)
|
|
520
|
+
{
|
|
521
|
+
using namespace largebm;
|
|
522
|
+
|
|
523
|
+
largebm::time_limit = time_limit;
|
|
524
|
+
largebm::pre_pro = preproc;
|
|
525
|
+
largebm::use_dic = use_dic;
|
|
526
|
+
largebm::use_list = false; // MDD-based
|
|
527
|
+
largebm::b_disp = verbose;
|
|
528
|
+
largebm::b_write = !out_file.empty();
|
|
529
|
+
largebm::out_file = out_file;
|
|
530
|
+
largebm::just_build = false;
|
|
531
|
+
|
|
532
|
+
largebm::ClearCollected();
|
|
533
|
+
largebm::items.clear();
|
|
534
|
+
largebm::item_dic.clear();
|
|
535
|
+
largebm::inv_item_dic.clear();
|
|
536
|
+
largebm::Tree.clear();
|
|
537
|
+
largebm::DFS.clear();
|
|
538
|
+
|
|
539
|
+
largebm::start_time = std::clock();
|
|
540
|
+
|
|
541
|
+
TempFile tmp;
|
|
542
|
+
std::string path;
|
|
543
|
+
|
|
544
|
+
if (py::isinstance<py::str>(data)) {
|
|
545
|
+
path = data.cast<std::string>();
|
|
546
|
+
} else {
|
|
547
|
+
auto seqs = data.cast<std::vector<std::vector<int>>>();
|
|
548
|
+
tmp.path = write_temp_seq_file(seqs);
|
|
549
|
+
path = tmp.path;
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
if (verbose) {
|
|
553
|
+
std::cerr << "[LargeBTMiner] path=" << path
|
|
554
|
+
<< " minsup=" << minsup
|
|
555
|
+
<< " preproc=" << preproc
|
|
556
|
+
<< " use_dic=" << use_dic
|
|
557
|
+
<< std::endl;
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
if (!largebm::Load_instance(path, minsup)) {
|
|
561
|
+
throw std::runtime_error("LargeBTMiner: failed to load instance from: " + path);
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
largebm::Freq_miner();
|
|
565
|
+
|
|
566
|
+
py::dict out;
|
|
567
|
+
out["patterns"] = largebm::GetCollected();
|
|
568
|
+
out["time"] = largebm::give_time(std::clock() - largebm::start_time);
|
|
569
|
+
return out;
|
|
570
|
+
},
|
|
571
|
+
py::arg("data"),
|
|
572
|
+
py::arg("minsup") = 0.01,
|
|
573
|
+
py::arg("time_limit") = 36000,
|
|
574
|
+
py::arg("preproc") = false,
|
|
575
|
+
py::arg("use_dic") = false,
|
|
576
|
+
py::arg("verbose") = false,
|
|
577
|
+
py::arg("out_file") = ""
|
|
578
|
+
);
|
|
579
|
+
|
|
580
|
+
// ─────────────────────────────────────────────────────────────
|
|
581
|
+
// LargeHTMiner (always uses professor's largehm::Load_instance; pre_pro forced ON)
|
|
582
|
+
// ─────────────────────────────────────────────────────────────
|
|
583
|
+
// ─────────────────────────────────────────────────────────────
|
|
584
|
+
// LargeHTMiner (professor's Large HTMiner, namespaced as largehm)
|
|
585
|
+
// ─────────────────────────────────────────────────────────────
|
|
586
|
+
m.def("LargeHTMiner",
|
|
587
|
+
[](py::object data,
|
|
588
|
+
double minsup,
|
|
589
|
+
unsigned int time_limit,
|
|
590
|
+
bool /*preproc*/, // kept for API symmetry; ignored
|
|
591
|
+
bool use_dic,
|
|
592
|
+
bool verbose,
|
|
593
|
+
const std::string &out_file)
|
|
594
|
+
{
|
|
595
|
+
using namespace largehm;
|
|
596
|
+
|
|
597
|
+
// 1) Global configuration (mirror professor's style)
|
|
598
|
+
largehm::time_limit = time_limit;
|
|
599
|
+
largehm::pre_pro = true; // always preprocess
|
|
600
|
+
largehm::use_dic = use_dic;
|
|
601
|
+
largehm::just_build = false;
|
|
602
|
+
largehm::b_disp = verbose;
|
|
603
|
+
largehm::b_write = !out_file.empty();
|
|
604
|
+
largehm::out_file = out_file;
|
|
605
|
+
|
|
606
|
+
// 2) HARD RESET of all global state for a fresh run
|
|
607
|
+
largehm::ClearCollected(); // our helper in largehm::utility.cpp
|
|
608
|
+
|
|
609
|
+
largehm::M = 0;
|
|
610
|
+
largehm::L = 0;
|
|
611
|
+
largehm::mlim = 0;
|
|
612
|
+
largehm::N = 0;
|
|
613
|
+
largehm::theta = 0;
|
|
614
|
+
largehm::E = 0;
|
|
615
|
+
largehm::itmset_exists = false;
|
|
616
|
+
|
|
617
|
+
// containers
|
|
618
|
+
// (item_dic reset is optional and not strictly needed here)
|
|
619
|
+
largehm::DFS.clear();
|
|
620
|
+
largehm::VDFS.clear();
|
|
621
|
+
largehm::Tree.clear();
|
|
622
|
+
largehm::VTree.clear();
|
|
623
|
+
largehm::CTree.clear();
|
|
624
|
+
|
|
625
|
+
largehm::start_time = std::clock();
|
|
626
|
+
|
|
627
|
+
// 3) Handle input (file path or Python list)
|
|
628
|
+
TempFile tmp;
|
|
629
|
+
std::string path;
|
|
630
|
+
|
|
631
|
+
if (py::isinstance<py::str>(data)) {
|
|
632
|
+
path = data.cast<std::string>();
|
|
633
|
+
} else {
|
|
634
|
+
auto seqs = data.cast<std::vector<std::vector<int>>>();
|
|
635
|
+
tmp.path = write_temp_seq_file(seqs);
|
|
636
|
+
path = tmp.path;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
if (verbose) {
|
|
640
|
+
std::cerr << "[LargeHTMiner] path=" << path
|
|
641
|
+
<< " minsup=" << minsup
|
|
642
|
+
<< " preproc(always)=true"
|
|
643
|
+
<< " use_dic=" << use_dic
|
|
644
|
+
<< std::endl;
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
// 4) Build MDD / load instance.
|
|
648
|
+
// NOTE: Load_instance() itself does Tree.emplace_back(0,0,0),
|
|
649
|
+
// so we DO NOT create a root node here.
|
|
650
|
+
if (!largehm::Load_instance(path, minsup)) {
|
|
651
|
+
throw std::runtime_error("LargeHTMiner: failed to load instance from: " + path);
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
// 5) Run miner (same timing logic as original main)
|
|
655
|
+
if (!largehm::just_build &&
|
|
656
|
+
largehm::give_time(std::clock() - largehm::start_time) < largehm::time_limit)
|
|
657
|
+
{
|
|
658
|
+
largehm::Freq_miner();
|
|
659
|
+
if (largehm::give_time(std::clock() - largehm::start_time) >= largehm::time_limit) {
|
|
660
|
+
std::cout << "TIME LIMIT REACHED\n";
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
// 6) Return collected patterns + runtime
|
|
665
|
+
py::dict out;
|
|
666
|
+
out["patterns"] = largehm::GetCollected();
|
|
667
|
+
out["time"] = largehm::give_time(std::clock() - largehm::start_time);
|
|
668
|
+
return out;
|
|
669
|
+
},
|
|
670
|
+
py::arg("data"),
|
|
671
|
+
py::arg("minsup") = 0.01,
|
|
672
|
+
py::arg("time_limit") = 36000,
|
|
673
|
+
py::arg("preproc") = false, // kept for API symmetry
|
|
674
|
+
py::arg("use_dic") = false,
|
|
675
|
+
py::arg("verbose") = false,
|
|
676
|
+
py::arg("out_file") = ""
|
|
677
|
+
);
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
/*#include <pybind11/pybind11.h>
|
|
684
|
+
#include <pybind11/stl.h>
|
|
5
685
|
namespace py = pybind11;
|
|
6
686
|
#include <iostream>
|
|
7
687
|
|
|
@@ -28,6 +708,7 @@ namespace py = pybind11;
|
|
|
28
708
|
#include "largepp/src/load_inst.hpp"
|
|
29
709
|
#include "largepp/src/utility.hpp"
|
|
30
710
|
|
|
711
|
+
|
|
31
712
|
#include "largebm/src/freq_miner.hpp"
|
|
32
713
|
#include "largebm/src/load_inst.hpp"
|
|
33
714
|
#include "largebm/src/utility.hpp"
|
|
@@ -469,7 +1150,7 @@ m.def("HTMiner",
|
|
|
469
1150
|
// ─────────────────────────────────────────────────────────────────────────
|
|
470
1151
|
// LargeBTMiner (MDD-based)
|
|
471
1152
|
// ─────────────────────────────────────────────────────────────────────────
|
|
472
|
-
m.def("LargeBTMiner",
|
|
1153
|
+
/*m.def("LargeBTMiner",
|
|
473
1154
|
[](py::object data,
|
|
474
1155
|
double minsup,
|
|
475
1156
|
unsigned int time_limit,
|
|
@@ -676,4 +1357,4 @@ m.def("LargeHTMiner",
|
|
|
676
1357
|
|
|
677
1358
|
|
|
678
1359
|
|
|
679
|
-
}
|
|
1360
|
+
} */
|