effspm 0.1.5__cp313-cp313-win32.whl → 0.1.7__cp313-cp313-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
effspm/__init__.py CHANGED
@@ -1,4 +1,3 @@
1
- from ._core import mine
1
+ from ._core import PrefixProjection
2
2
 
3
-
4
- __all__ = ['mine']
3
+ __all__ = ['PrefixProjection']
Binary file
effspm/_core.cpp CHANGED
@@ -1,3 +1,7 @@
1
+ #include <ctime> // std::clock
2
+ #include <cmath> // std::ceil, std::abs
3
+ #include <algorithm> // std::max
4
+ #include <iostream> // optional echo
1
5
  #include <pybind11/pybind11.h>
2
6
  #include <pybind11/stl.h>
3
7
 
@@ -5,24 +9,98 @@
5
9
  #include "freq_miner.hpp"
6
10
  #include "utility.hpp"
7
11
 
8
-
9
12
  namespace py = pybind11;
10
13
 
11
14
  PYBIND11_MODULE(_core, m) {
12
15
  m.doc() = "Efficient Sequential Pattern Mining via Prefix-Projection";
13
16
 
14
- m.def("mine", [](std::string data_file, double minsup) {
15
- ClearCollected();
17
+ m.def("PrefixProjection",
18
+ [](py::object data,
19
+ double minsup,
20
+ unsigned int time_limit_arg,
21
+ bool preproc_arg,
22
+ bool use_dic_arg,
23
+ bool verbose_arg,
24
+ const std::string &out_file_arg)
25
+ {
26
+ // 1) configure C++ globals
27
+ time_limit = time_limit_arg;
28
+ pre_pro = preproc_arg;
29
+ use_dic = use_dic_arg;
30
+ use_list = false;
31
+ b_disp = verbose_arg;
32
+ b_write = !out_file_arg.empty();
33
+ out_file = out_file_arg;
34
+
35
+ // 2) clear collector & start timer
36
+ ClearCollected();
37
+ start_time = std::clock();
38
+
39
+ // 3) load either file or in‐memory sequences
40
+ if (py::isinstance<py::str>(data)) {
41
+ auto path = data.cast<std::string>();
42
+ if (!Load_instance(path, minsup))
43
+ throw std::runtime_error("Failed to load database from " + path);
44
+ }
45
+ else {
46
+ // convert Python List[List[int]] → C++ items
47
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
48
+ items = std::move(seqs);
49
+ N = items.size();
50
+
51
+ // a) compute max item ID → L
52
+ int max_id = 0;
53
+ for (auto &seq : items)
54
+ for (int x : seq)
55
+ max_id = std::max(max_id, std::abs(x));
56
+ L = static_cast<unsigned int>(max_id);
57
+
58
+ // b) support threshold θ
59
+ if (minsup < 1.0)
60
+ theta = static_cast<unsigned long long>(std::ceil(minsup * N));
61
+ else
62
+ theta = static_cast<unsigned long long>(minsup);
63
+
64
+ // c) initialize DFS stack
65
+ DFS.clear();
66
+ DFS.reserve(L);
67
+ for (unsigned int i = 0; i < L; ++i)
68
+ DFS.emplace_back(-static_cast<int>(i) - 1);
69
+
70
+ // d) gather dataset stats: max length M, total entries E
71
+ M = 0;
72
+ E = 0;
73
+ for (auto &seq : items) {
74
+ M = std::max<unsigned int>(M, static_cast<unsigned int>(seq.size()));
75
+ E += seq.size();
76
+ }
77
+
78
+ if (b_disp) {
79
+ std::cout << "\nIn-memory dataset: "
80
+ << N << " sequences, max len " << M
81
+ << ", " << E << " entries, " << L << " items\n";
82
+ }
83
+ }
84
+
85
+ // 4) run the C++ miner
86
+ Freq_miner();
16
87
 
17
- // Note: Pass by value (safe copy)
18
- if (!Load_instance(data_file, minsup)) {
19
- throw std::runtime_error("Failed to load database from " + data_file);
20
- }
88
+ // 5) collect patterns & timing
89
+ auto patterns = GetCollected();
90
+ double wall_time = give_time(std::clock() - start_time);
21
91
 
22
- Freq_miner();
23
- return GetCollected();
24
- },
25
- py::arg("data_file"),
26
- py::arg("minsup") = 0.01,
27
- "Mine sequential patterns from the given data file with minimum support.");
92
+ // 6) return Python dict
93
+ py::dict out;
94
+ out["patterns"] = patterns;
95
+ out["time"] = wall_time;
96
+ return out;
97
+ },
98
+ py::arg("data"),
99
+ py::arg("minsup") = 0.01,
100
+ py::arg("time_limit") = 10 * 3600,
101
+ py::arg("preproc") = false,
102
+ py::arg("use_dic") = false,
103
+ py::arg("verbose") = false,
104
+ py::arg("out_file") = ""
105
+ );
28
106
  }
effspm/freq_miner.hpp CHANGED
@@ -1,7 +1,9 @@
1
1
  #pragma once
2
-
2
+ #include <vector>
3
3
  #include "load_inst.hpp"
4
-
4
+ #include <cstdlib>
5
+ #include <cmath>
6
+ using namespace std;
5
7
  void Freq_miner();
6
8
  void Out_patt(std::vector<int>& seq, unsigned int freq);
7
9
 
effspm/load_inst.hpp CHANGED
@@ -1,25 +1,30 @@
1
+ // effspm/load_inst.hpp
1
2
  #pragma once
2
3
 
3
- #include<vector>
4
- #include<string>
4
+ #include <vector>
5
+ #include <string>
5
6
  #include <fstream>
6
7
  #include <map>
7
- // Should work because "effspm" is in include_dirs
8
-
8
+ #include <ctime> // for clock_t
9
9
 
10
10
  using namespace std;
11
11
 
12
+ // ------------------------------------------------------------
13
+ // forward declare Pattern (defined in freq_miner.hpp)
14
+ struct Pattern;
15
+
16
+ // Main entrypoint: load your file on disk into 'items', build DFS, theta, etc.
12
17
  bool Load_instance(string &items_file, double thresh);
13
18
 
19
+ // storage & globals shared between the C++-CLI & Python bindings
14
20
  extern vector<vector<int>> items;
21
+ extern vector<Pattern> DFS; // now Pattern is known
22
+ extern vector<int> item_dic;
15
23
 
16
- extern string out_file;
17
-
18
- extern bool b_disp, b_write, use_dic, use_list, pre_pro;
19
-
20
- extern unsigned int M, L, time_limit;
21
-
22
- extern unsigned long long int N, theta;
24
+ extern string out_file;
25
+ extern bool b_disp, b_write, use_dic, use_list, pre_pro;
23
26
 
24
- extern clock_t start_time;
27
+ extern unsigned int M, L, time_limit;
28
+ extern unsigned long long N, E, theta; // E = total number of entries
25
29
 
30
+ extern clock_t start_time;
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: effspm
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: Prefix‑Projection sequential pattern mining
5
5
  Home-page: https://github.com/yeshu999/effspm
6
6
  Author: yeshu999
@@ -0,0 +1,14 @@
1
+ effspm/__init__.py,sha256=9edygUUVqPU4ubTsSGcch5NST8moZaYYvOgXPWv5L-w,71
2
+ effspm/_core.cp313-win32.pyd,sha256=AzZZKY_BW-4JnQib43HIhCrTZu8iZGk2pb-hq4HiBb8,159744
3
+ effspm/_core.cpp,sha256=JzUCIVmmDMVfRIjIsmXtVkSMkRMprE3wpOrx-jYhAQU,3781
4
+ effspm/freq_miner.cpp,sha256=qQDFPoPKY3ICaH2brm1CUKwNBhCy0-0dUWEoV_3FwME,4785
5
+ effspm/freq_miner.hpp,sha256=0Phi-H6Wto9mVRqmR_APFbmLUBDdZOEX8UU0KiHVJz4,789
6
+ effspm/load_inst.cpp,sha256=kTEucQ5YU7xPdRjcM9ixAPk49cLJ8H8YN9gJsTrm7mM,4769
7
+ effspm/load_inst.hpp,sha256=yQKGhYiPWoKC-t6dLPjpVi5qaOBkCUoQMwlWhYzDUj4,936
8
+ effspm/utility.cpp,sha256=OD5K0K0jQKgqVGJm91pSofImXOEVkDnqQvFh1qytvpA,1458
9
+ effspm/utility.hpp,sha256=hECSm-ORd20QJMundbOLkZvo9nj-I2pXd5AokagyGqQ,773
10
+ effspm-0.1.7.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
11
+ effspm-0.1.7.dist-info/METADATA,sha256=iwRl88P5FDFbOYeY6Y9Xd_Lk6DK_E018VnZJW2d8a0A,1349
12
+ effspm-0.1.7.dist-info/WHEEL,sha256=NYexIIGIZlBGVlPAbGaWli7q7OqAv5-KmxH-Z-6t4-Y,97
13
+ effspm-0.1.7.dist-info/top_level.txt,sha256=2O-AuI0nw0pDmJMo2jzM1wvV2rj48AmkjskkAnsuuQk,7
14
+ effspm-0.1.7.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp313-cp313-win32
5
5
 
@@ -1,14 +0,0 @@
1
- effspm/__init__.py,sha256=Nqm5qpeFBGhn0dGdEIhIc9H-u6vpzBdlP5KEAEb-LIM,49
2
- effspm/_core.cp313-win32.pyd,sha256=2WeJkA5iw023f1iL1HhQPJNWguVoejGFHdPhq4I5TzA,151552
3
- effspm/_core.cpp,sha256=3uqAf3NrZW6Ok-aPKK8lhWpdpwAu3M2dtPglZWMiJ3g,764
4
- effspm/freq_miner.cpp,sha256=qQDFPoPKY3ICaH2brm1CUKwNBhCy0-0dUWEoV_3FwME,4785
5
- effspm/freq_miner.hpp,sha256=Exp_4_7yIV0Y9U2I8kyImPoY8hWUFMIE--Zjf-G2TlI,712
6
- effspm/load_inst.cpp,sha256=kTEucQ5YU7xPdRjcM9ixAPk49cLJ8H8YN9gJsTrm7mM,4769
7
- effspm/load_inst.hpp,sha256=VxiVbYWe1I8tQvVZRrvwQ097xPxZM5CUn5CjyXM3c7U,465
8
- effspm/utility.cpp,sha256=OD5K0K0jQKgqVGJm91pSofImXOEVkDnqQvFh1qytvpA,1458
9
- effspm/utility.hpp,sha256=hECSm-ORd20QJMundbOLkZvo9nj-I2pXd5AokagyGqQ,773
10
- effspm-0.1.5.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
11
- effspm-0.1.5.dist-info/METADATA,sha256=fVoNUBxIuCDGMc3SzOTtKr1X1WeiA3y6e44EdqUBTns,1349
12
- effspm-0.1.5.dist-info/WHEEL,sha256=iuBtox58ZfmdZzG-BEHKFn_UiBPbBDtT4b67THCTeoc,97
13
- effspm-0.1.5.dist-info/top_level.txt,sha256=2O-AuI0nw0pDmJMo2jzM1wvV2rj48AmkjskkAnsuuQk,7
14
- effspm-0.1.5.dist-info/RECORD,,