effspm 0.2.6__cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/__init__.py +11 -0
- effspm/_core.cpp +106 -0
- effspm/_effspm.cpp +609 -0
- effspm/_effspm.cpython-39-i386-linux-gnu.so +0 -0
- effspm/btminer/src/build_mdd.cpp +63 -0
- effspm/btminer/src/build_mdd.hpp +40 -0
- effspm/btminer/src/freq_miner.cpp +179 -0
- effspm/btminer/src/freq_miner.hpp +39 -0
- effspm/btminer/src/load_inst.cpp +200 -0
- effspm/btminer/src/load_inst.hpp +25 -0
- effspm/btminer/src/utility.cpp +65 -0
- effspm/btminer/src/utility.hpp +40 -0
- effspm/freq_miner.cpp +143 -0
- effspm/freq_miner.hpp +48 -0
- effspm/htminer/src/build_mdd.cpp +192 -0
- effspm/htminer/src/build_mdd.hpp +64 -0
- effspm/htminer/src/freq_miner.cpp +350 -0
- effspm/htminer/src/freq_miner.hpp +60 -0
- effspm/htminer/src/load_inst.cpp +394 -0
- effspm/htminer/src/load_inst.hpp +23 -0
- effspm/htminer/src/utility.cpp +72 -0
- effspm/htminer/src/utility.hpp +77 -0
- effspm/largebm/src/build_mdd.cpp +137 -0
- effspm/largebm/src/build_mdd.hpp +47 -0
- effspm/largebm/src/freq_miner.cpp +349 -0
- effspm/largebm/src/freq_miner.hpp +48 -0
- effspm/largebm/src/load_inst.cpp +230 -0
- effspm/largebm/src/load_inst.hpp +45 -0
- effspm/largebm/src/utility.cpp +45 -0
- effspm/largebm/src/utility.hpp +18 -0
- effspm/largehm/src/build_mdd.cpp +174 -0
- effspm/largehm/src/build_mdd.hpp +93 -0
- effspm/largehm/src/freq_miner.cpp +445 -0
- effspm/largehm/src/freq_miner.hpp +77 -0
- effspm/largehm/src/load_inst.cpp +357 -0
- effspm/largehm/src/load_inst.hpp +64 -0
- effspm/largehm/src/utility.cpp +38 -0
- effspm/largehm/src/utility.hpp +29 -0
- effspm/largepp/src/freq_miner.cpp +170 -0
- effspm/largepp/src/freq_miner.hpp +43 -0
- effspm/largepp/src/load_inst.cpp +219 -0
- effspm/largepp/src/load_inst.hpp +28 -0
- effspm/largepp/src/utility.cpp +34 -0
- effspm/largepp/src/utility.hpp +21 -0
- effspm/load_inst.cpp +252 -0
- effspm/load_inst.hpp +31 -0
- effspm/utility.cpp +55 -0
- effspm/utility.hpp +29 -0
- effspm-0.2.6.dist-info/METADATA +237 -0
- effspm-0.2.6.dist-info/RECORD +53 -0
- effspm-0.2.6.dist-info/WHEEL +6 -0
- effspm-0.2.6.dist-info/licenses/LICENSE +201 -0
- effspm-0.2.6.dist-info/top_level.txt +1 -0
effspm/__init__.py
ADDED
effspm/_core.cpp
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
#include <ctime> // std::clock
|
|
2
|
+
#include <cmath> // std::ceil, std::abs
|
|
3
|
+
#include <algorithm> // std::max
|
|
4
|
+
#include <iostream> // optional echo
|
|
5
|
+
#include <pybind11/pybind11.h>
|
|
6
|
+
#include <pybind11/stl.h>
|
|
7
|
+
|
|
8
|
+
#include "load_inst.hpp"
|
|
9
|
+
#include "freq_miner.hpp"
|
|
10
|
+
#include "utility.hpp"
|
|
11
|
+
|
|
12
|
+
namespace py = pybind11;
|
|
13
|
+
|
|
14
|
+
PYBIND11_MODULE(_core, m) {
|
|
15
|
+
m.doc() = "Efficient Sequential Pattern Mining via Prefix-Projection";
|
|
16
|
+
|
|
17
|
+
m.def("PrefixProjection",
|
|
18
|
+
[](py::object data,
|
|
19
|
+
double minsup,
|
|
20
|
+
unsigned int time_limit_arg,
|
|
21
|
+
bool preproc_arg,
|
|
22
|
+
bool use_dic_arg,
|
|
23
|
+
bool verbose_arg,
|
|
24
|
+
const std::string &out_file_arg)
|
|
25
|
+
{
|
|
26
|
+
// 1) configure C++ globals
|
|
27
|
+
time_limit = time_limit_arg;
|
|
28
|
+
pre_pro = preproc_arg;
|
|
29
|
+
use_dic = use_dic_arg;
|
|
30
|
+
use_list = false;
|
|
31
|
+
b_disp = verbose_arg;
|
|
32
|
+
b_write = !out_file_arg.empty();
|
|
33
|
+
out_file = out_file_arg;
|
|
34
|
+
|
|
35
|
+
// 2) clear collector & start timer
|
|
36
|
+
ClearCollected();
|
|
37
|
+
start_time = std::clock();
|
|
38
|
+
|
|
39
|
+
// 3) load either file or in‐memory sequences
|
|
40
|
+
if (py::isinstance<py::str>(data)) {
|
|
41
|
+
auto path = data.cast<std::string>();
|
|
42
|
+
if (!Load_instance(path, minsup))
|
|
43
|
+
throw std::runtime_error("Failed to load database from " + path);
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
// convert Python List[List[int]] → C++ items
|
|
47
|
+
auto seqs = data.cast<std::vector<std::vector<int>>>();
|
|
48
|
+
items = std::move(seqs);
|
|
49
|
+
N = items.size();
|
|
50
|
+
|
|
51
|
+
// a) compute max item ID → L
|
|
52
|
+
int max_id = 0;
|
|
53
|
+
for (auto &seq : items)
|
|
54
|
+
for (int x : seq)
|
|
55
|
+
max_id = std::max(max_id, std::abs(x));
|
|
56
|
+
L = static_cast<unsigned int>(max_id);
|
|
57
|
+
|
|
58
|
+
// b) support threshold θ
|
|
59
|
+
if (minsup < 1.0)
|
|
60
|
+
theta = static_cast<unsigned long long>(std::ceil(minsup * N));
|
|
61
|
+
else
|
|
62
|
+
theta = static_cast<unsigned long long>(minsup);
|
|
63
|
+
|
|
64
|
+
// c) initialize DFS stack
|
|
65
|
+
DFS.clear();
|
|
66
|
+
DFS.reserve(L);
|
|
67
|
+
for (unsigned int i = 0; i < L; ++i)
|
|
68
|
+
DFS.emplace_back(-static_cast<int>(i) - 1);
|
|
69
|
+
|
|
70
|
+
// d) gather dataset stats: max length M, total entries E
|
|
71
|
+
M = 0;
|
|
72
|
+
E = 0;
|
|
73
|
+
for (auto &seq : items) {
|
|
74
|
+
M = std::max<unsigned int>(M, static_cast<unsigned int>(seq.size()));
|
|
75
|
+
E += seq.size();
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (b_disp) {
|
|
79
|
+
std::cout << "\nIn-memory dataset: "
|
|
80
|
+
<< N << " sequences, max len " << M
|
|
81
|
+
<< ", " << E << " entries, " << L << " items\n";
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// 4) run the C++ miner
|
|
86
|
+
Freq_miner();
|
|
87
|
+
|
|
88
|
+
// 5) collect patterns & timing
|
|
89
|
+
auto patterns = GetCollected();
|
|
90
|
+
double wall_time = give_time(std::clock() - start_time);
|
|
91
|
+
|
|
92
|
+
// 6) return Python dict
|
|
93
|
+
py::dict out;
|
|
94
|
+
out["patterns"] = patterns;
|
|
95
|
+
out["time"] = wall_time;
|
|
96
|
+
return out;
|
|
97
|
+
},
|
|
98
|
+
py::arg("data"),
|
|
99
|
+
py::arg("minsup") = 0.01,
|
|
100
|
+
py::arg("time_limit") = 10 * 3600,
|
|
101
|
+
py::arg("preproc") = false,
|
|
102
|
+
py::arg("use_dic") = false,
|
|
103
|
+
py::arg("verbose") = false,
|
|
104
|
+
py::arg("out_file") = ""
|
|
105
|
+
);
|
|
106
|
+
}
|