effspm 0.1.5__cp313-cp313-win_amd64.whl → 0.1.7__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- effspm/__init__.py +2 -3
- effspm/_core.cp313-win_amd64.pyd +0 -0
- effspm/_core.cpp +91 -13
- effspm/freq_miner.hpp +4 -2
- effspm/load_inst.hpp +17 -12
- {effspm-0.1.5.dist-info → effspm-0.1.7.dist-info}/METADATA +1 -1
- effspm-0.1.7.dist-info/RECORD +14 -0
- {effspm-0.1.5.dist-info → effspm-0.1.7.dist-info}/WHEEL +1 -1
- effspm-0.1.5.dist-info/RECORD +0 -14
- {effspm-0.1.5.dist-info → effspm-0.1.7.dist-info}/licenses/LICENSE +0 -0
- {effspm-0.1.5.dist-info → effspm-0.1.7.dist-info}/top_level.txt +0 -0
effspm/__init__.py
CHANGED
effspm/_core.cp313-win_amd64.pyd
CHANGED
|
Binary file
|
effspm/_core.cpp
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
#include <ctime> // std::clock
|
|
2
|
+
#include <cmath> // std::ceil, std::abs
|
|
3
|
+
#include <algorithm> // std::max
|
|
4
|
+
#include <iostream> // optional echo
|
|
1
5
|
#include <pybind11/pybind11.h>
|
|
2
6
|
#include <pybind11/stl.h>
|
|
3
7
|
|
|
@@ -5,24 +9,98 @@
|
|
|
5
9
|
#include "freq_miner.hpp"
|
|
6
10
|
#include "utility.hpp"
|
|
7
11
|
|
|
8
|
-
|
|
9
12
|
namespace py = pybind11;
|
|
10
13
|
|
|
11
14
|
PYBIND11_MODULE(_core, m) {
|
|
12
15
|
m.doc() = "Efficient Sequential Pattern Mining via Prefix-Projection";
|
|
13
16
|
|
|
14
|
-
m.def("
|
|
15
|
-
|
|
17
|
+
m.def("PrefixProjection",
|
|
18
|
+
[](py::object data,
|
|
19
|
+
double minsup,
|
|
20
|
+
unsigned int time_limit_arg,
|
|
21
|
+
bool preproc_arg,
|
|
22
|
+
bool use_dic_arg,
|
|
23
|
+
bool verbose_arg,
|
|
24
|
+
const std::string &out_file_arg)
|
|
25
|
+
{
|
|
26
|
+
// 1) configure C++ globals
|
|
27
|
+
time_limit = time_limit_arg;
|
|
28
|
+
pre_pro = preproc_arg;
|
|
29
|
+
use_dic = use_dic_arg;
|
|
30
|
+
use_list = false;
|
|
31
|
+
b_disp = verbose_arg;
|
|
32
|
+
b_write = !out_file_arg.empty();
|
|
33
|
+
out_file = out_file_arg;
|
|
34
|
+
|
|
35
|
+
// 2) clear collector & start timer
|
|
36
|
+
ClearCollected();
|
|
37
|
+
start_time = std::clock();
|
|
38
|
+
|
|
39
|
+
// 3) load either file or in‐memory sequences
|
|
40
|
+
if (py::isinstance<py::str>(data)) {
|
|
41
|
+
auto path = data.cast<std::string>();
|
|
42
|
+
if (!Load_instance(path, minsup))
|
|
43
|
+
throw std::runtime_error("Failed to load database from " + path);
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
// convert Python List[List[int]] → C++ items
|
|
47
|
+
auto seqs = data.cast<std::vector<std::vector<int>>>();
|
|
48
|
+
items = std::move(seqs);
|
|
49
|
+
N = items.size();
|
|
50
|
+
|
|
51
|
+
// a) compute max item ID → L
|
|
52
|
+
int max_id = 0;
|
|
53
|
+
for (auto &seq : items)
|
|
54
|
+
for (int x : seq)
|
|
55
|
+
max_id = std::max(max_id, std::abs(x));
|
|
56
|
+
L = static_cast<unsigned int>(max_id);
|
|
57
|
+
|
|
58
|
+
// b) support threshold θ
|
|
59
|
+
if (minsup < 1.0)
|
|
60
|
+
theta = static_cast<unsigned long long>(std::ceil(minsup * N));
|
|
61
|
+
else
|
|
62
|
+
theta = static_cast<unsigned long long>(minsup);
|
|
63
|
+
|
|
64
|
+
// c) initialize DFS stack
|
|
65
|
+
DFS.clear();
|
|
66
|
+
DFS.reserve(L);
|
|
67
|
+
for (unsigned int i = 0; i < L; ++i)
|
|
68
|
+
DFS.emplace_back(-static_cast<int>(i) - 1);
|
|
69
|
+
|
|
70
|
+
// d) gather dataset stats: max length M, total entries E
|
|
71
|
+
M = 0;
|
|
72
|
+
E = 0;
|
|
73
|
+
for (auto &seq : items) {
|
|
74
|
+
M = std::max<unsigned int>(M, static_cast<unsigned int>(seq.size()));
|
|
75
|
+
E += seq.size();
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (b_disp) {
|
|
79
|
+
std::cout << "\nIn-memory dataset: "
|
|
80
|
+
<< N << " sequences, max len " << M
|
|
81
|
+
<< ", " << E << " entries, " << L << " items\n";
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// 4) run the C++ miner
|
|
86
|
+
Freq_miner();
|
|
16
87
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
}
|
|
88
|
+
// 5) collect patterns & timing
|
|
89
|
+
auto patterns = GetCollected();
|
|
90
|
+
double wall_time = give_time(std::clock() - start_time);
|
|
21
91
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
92
|
+
// 6) return Python dict
|
|
93
|
+
py::dict out;
|
|
94
|
+
out["patterns"] = patterns;
|
|
95
|
+
out["time"] = wall_time;
|
|
96
|
+
return out;
|
|
97
|
+
},
|
|
98
|
+
py::arg("data"),
|
|
99
|
+
py::arg("minsup") = 0.01,
|
|
100
|
+
py::arg("time_limit") = 10 * 3600,
|
|
101
|
+
py::arg("preproc") = false,
|
|
102
|
+
py::arg("use_dic") = false,
|
|
103
|
+
py::arg("verbose") = false,
|
|
104
|
+
py::arg("out_file") = ""
|
|
105
|
+
);
|
|
28
106
|
}
|
effspm/freq_miner.hpp
CHANGED
effspm/load_inst.hpp
CHANGED
|
@@ -1,25 +1,30 @@
|
|
|
1
|
+
// effspm/load_inst.hpp
|
|
1
2
|
#pragma once
|
|
2
3
|
|
|
3
|
-
#include<vector>
|
|
4
|
-
#include<string>
|
|
4
|
+
#include <vector>
|
|
5
|
+
#include <string>
|
|
5
6
|
#include <fstream>
|
|
6
7
|
#include <map>
|
|
7
|
-
//
|
|
8
|
-
|
|
8
|
+
#include <ctime> // for clock_t
|
|
9
9
|
|
|
10
10
|
using namespace std;
|
|
11
11
|
|
|
12
|
+
// ------------------------------------------------------------
|
|
13
|
+
// forward declare Pattern (defined in freq_miner.hpp)
|
|
14
|
+
struct Pattern;
|
|
15
|
+
|
|
16
|
+
// Main entrypoint: load your file on disk into 'items', build DFS, theta, etc.
|
|
12
17
|
bool Load_instance(string &items_file, double thresh);
|
|
13
18
|
|
|
19
|
+
// storage & globals shared between the C++-CLI & Python bindings
|
|
14
20
|
extern vector<vector<int>> items;
|
|
21
|
+
extern vector<Pattern> DFS; // now Pattern is known
|
|
22
|
+
extern vector<int> item_dic;
|
|
15
23
|
|
|
16
|
-
extern string
|
|
17
|
-
|
|
18
|
-
extern bool b_disp, b_write, use_dic, use_list, pre_pro;
|
|
19
|
-
|
|
20
|
-
extern unsigned int M, L, time_limit;
|
|
21
|
-
|
|
22
|
-
extern unsigned long long int N, theta;
|
|
24
|
+
extern string out_file;
|
|
25
|
+
extern bool b_disp, b_write, use_dic, use_list, pre_pro;
|
|
23
26
|
|
|
24
|
-
extern
|
|
27
|
+
extern unsigned int M, L, time_limit;
|
|
28
|
+
extern unsigned long long N, E, theta; // E = total number of entries
|
|
25
29
|
|
|
30
|
+
extern clock_t start_time;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
effspm/__init__.py,sha256=9edygUUVqPU4ubTsSGcch5NST8moZaYYvOgXPWv5L-w,71
|
|
2
|
+
effspm/_core.cp313-win_amd64.pyd,sha256=b6x8TewWC_-9aXUJCcGqmrLnmS4eQlwRkUGIOcCqzF8,179712
|
|
3
|
+
effspm/_core.cpp,sha256=JzUCIVmmDMVfRIjIsmXtVkSMkRMprE3wpOrx-jYhAQU,3781
|
|
4
|
+
effspm/freq_miner.cpp,sha256=qQDFPoPKY3ICaH2brm1CUKwNBhCy0-0dUWEoV_3FwME,4785
|
|
5
|
+
effspm/freq_miner.hpp,sha256=0Phi-H6Wto9mVRqmR_APFbmLUBDdZOEX8UU0KiHVJz4,789
|
|
6
|
+
effspm/load_inst.cpp,sha256=kTEucQ5YU7xPdRjcM9ixAPk49cLJ8H8YN9gJsTrm7mM,4769
|
|
7
|
+
effspm/load_inst.hpp,sha256=yQKGhYiPWoKC-t6dLPjpVi5qaOBkCUoQMwlWhYzDUj4,936
|
|
8
|
+
effspm/utility.cpp,sha256=OD5K0K0jQKgqVGJm91pSofImXOEVkDnqQvFh1qytvpA,1458
|
|
9
|
+
effspm/utility.hpp,sha256=hECSm-ORd20QJMundbOLkZvo9nj-I2pXd5AokagyGqQ,773
|
|
10
|
+
effspm-0.1.7.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
11
|
+
effspm-0.1.7.dist-info/METADATA,sha256=iwRl88P5FDFbOYeY6Y9Xd_Lk6DK_E018VnZJW2d8a0A,1349
|
|
12
|
+
effspm-0.1.7.dist-info/WHEEL,sha256=28q3oNn-jDh_H155N4TqNP6BD4TtmXmAi_15pKLiI-M,101
|
|
13
|
+
effspm-0.1.7.dist-info/top_level.txt,sha256=2O-AuI0nw0pDmJMo2jzM1wvV2rj48AmkjskkAnsuuQk,7
|
|
14
|
+
effspm-0.1.7.dist-info/RECORD,,
|
effspm-0.1.5.dist-info/RECORD
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
effspm/__init__.py,sha256=Nqm5qpeFBGhn0dGdEIhIc9H-u6vpzBdlP5KEAEb-LIM,49
|
|
2
|
-
effspm/_core.cp313-win_amd64.pyd,sha256=wOGnPln3qO0aHSqCTpgbcxn__OuVOOT1Sq4PYtg-1n0,172032
|
|
3
|
-
effspm/_core.cpp,sha256=3uqAf3NrZW6Ok-aPKK8lhWpdpwAu3M2dtPglZWMiJ3g,764
|
|
4
|
-
effspm/freq_miner.cpp,sha256=qQDFPoPKY3ICaH2brm1CUKwNBhCy0-0dUWEoV_3FwME,4785
|
|
5
|
-
effspm/freq_miner.hpp,sha256=Exp_4_7yIV0Y9U2I8kyImPoY8hWUFMIE--Zjf-G2TlI,712
|
|
6
|
-
effspm/load_inst.cpp,sha256=kTEucQ5YU7xPdRjcM9ixAPk49cLJ8H8YN9gJsTrm7mM,4769
|
|
7
|
-
effspm/load_inst.hpp,sha256=VxiVbYWe1I8tQvVZRrvwQ097xPxZM5CUn5CjyXM3c7U,465
|
|
8
|
-
effspm/utility.cpp,sha256=OD5K0K0jQKgqVGJm91pSofImXOEVkDnqQvFh1qytvpA,1458
|
|
9
|
-
effspm/utility.hpp,sha256=hECSm-ORd20QJMundbOLkZvo9nj-I2pXd5AokagyGqQ,773
|
|
10
|
-
effspm-0.1.5.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
11
|
-
effspm-0.1.5.dist-info/METADATA,sha256=fVoNUBxIuCDGMc3SzOTtKr1X1WeiA3y6e44EdqUBTns,1349
|
|
12
|
-
effspm-0.1.5.dist-info/WHEEL,sha256=6WoW_bHwIgUwfRGomr3tsp2x5B1WboZF1vodYe1_93Y,101
|
|
13
|
-
effspm-0.1.5.dist-info/top_level.txt,sha256=2O-AuI0nw0pDmJMo2jzM1wvV2rj48AmkjskkAnsuuQk,7
|
|
14
|
-
effspm-0.1.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|