effspm 0.1.5__cp310-cp310-win_amd64.whl → 0.2.6__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. effspm/__init__.py +9 -2
  2. effspm/_core.cpp +91 -13
  3. effspm/_effspm.cp310-win_amd64.pyd +0 -0
  4. effspm/_effspm.cpp +609 -0
  5. effspm/btminer/src/build_mdd.cpp +63 -0
  6. effspm/btminer/src/build_mdd.hpp +40 -0
  7. effspm/btminer/src/freq_miner.cpp +179 -0
  8. effspm/btminer/src/freq_miner.hpp +39 -0
  9. effspm/btminer/src/load_inst.cpp +200 -0
  10. effspm/btminer/src/load_inst.hpp +25 -0
  11. effspm/btminer/src/utility.cpp +65 -0
  12. effspm/btminer/src/utility.hpp +40 -0
  13. effspm/freq_miner.hpp +7 -2
  14. effspm/htminer/src/build_mdd.cpp +192 -0
  15. effspm/htminer/src/build_mdd.hpp +64 -0
  16. effspm/htminer/src/freq_miner.cpp +350 -0
  17. effspm/htminer/src/freq_miner.hpp +60 -0
  18. effspm/htminer/src/load_inst.cpp +394 -0
  19. effspm/htminer/src/load_inst.hpp +23 -0
  20. effspm/htminer/src/utility.cpp +72 -0
  21. effspm/htminer/src/utility.hpp +77 -0
  22. effspm/largebm/src/build_mdd.cpp +137 -0
  23. effspm/largebm/src/build_mdd.hpp +47 -0
  24. effspm/largebm/src/freq_miner.cpp +349 -0
  25. effspm/largebm/src/freq_miner.hpp +48 -0
  26. effspm/largebm/src/load_inst.cpp +230 -0
  27. effspm/largebm/src/load_inst.hpp +45 -0
  28. effspm/largebm/src/utility.cpp +45 -0
  29. effspm/largebm/src/utility.hpp +18 -0
  30. effspm/largehm/src/build_mdd.cpp +174 -0
  31. effspm/largehm/src/build_mdd.hpp +93 -0
  32. effspm/largehm/src/freq_miner.cpp +445 -0
  33. effspm/largehm/src/freq_miner.hpp +77 -0
  34. effspm/largehm/src/load_inst.cpp +357 -0
  35. effspm/largehm/src/load_inst.hpp +64 -0
  36. effspm/largehm/src/utility.cpp +38 -0
  37. effspm/largehm/src/utility.hpp +29 -0
  38. effspm/largepp/src/freq_miner.cpp +170 -0
  39. effspm/largepp/src/freq_miner.hpp +43 -0
  40. effspm/largepp/src/load_inst.cpp +219 -0
  41. effspm/largepp/src/load_inst.hpp +28 -0
  42. effspm/largepp/src/utility.cpp +34 -0
  43. effspm/largepp/src/utility.hpp +21 -0
  44. effspm/load_inst.hpp +18 -12
  45. effspm-0.2.6.dist-info/METADATA +237 -0
  46. effspm-0.2.6.dist-info/RECORD +53 -0
  47. {effspm-0.1.5.dist-info → effspm-0.2.6.dist-info}/WHEEL +1 -1
  48. effspm/_core.cp310-win_amd64.pyd +0 -0
  49. effspm-0.1.5.dist-info/METADATA +0 -38
  50. effspm-0.1.5.dist-info/RECORD +0 -14
  51. {effspm-0.1.5.dist-info → effspm-0.2.6.dist-info}/licenses/LICENSE +0 -0
  52. {effspm-0.1.5.dist-info → effspm-0.2.6.dist-info}/top_level.txt +0 -0
effspm/__init__.py CHANGED
@@ -1,4 +1,11 @@
1
- from ._core import mine
2
1
 
2
+ from ._effspm import PrefixProjection, HTMiner, LargeHTMiner, BTMiner, LargeBTMiner, LargePrefixProjection
3
3
 
4
- __all__ = ['mine']
4
+ __all__ = [
5
+ "PrefixProjection",
6
+ "HTMiner",
7
+ "LargeHTMiner",
8
+ "BTMiner",
9
+ "LargeBTMiner",
10
+ "LargePrefixProjection",
11
+ ]
effspm/_core.cpp CHANGED
@@ -1,3 +1,7 @@
1
+ #include <ctime> // std::clock
2
+ #include <cmath> // std::ceil, std::abs
3
+ #include <algorithm> // std::max
4
+ #include <iostream> // optional echo
1
5
  #include <pybind11/pybind11.h>
2
6
  #include <pybind11/stl.h>
3
7
 
@@ -5,24 +9,98 @@
5
9
  #include "freq_miner.hpp"
6
10
  #include "utility.hpp"
7
11
 
8
-
9
12
  namespace py = pybind11;
10
13
 
11
14
  PYBIND11_MODULE(_core, m) {
12
15
  m.doc() = "Efficient Sequential Pattern Mining via Prefix-Projection";
13
16
 
14
- m.def("mine", [](std::string data_file, double minsup) {
15
- ClearCollected();
17
+ m.def("PrefixProjection",
18
+ [](py::object data,
19
+ double minsup,
20
+ unsigned int time_limit_arg,
21
+ bool preproc_arg,
22
+ bool use_dic_arg,
23
+ bool verbose_arg,
24
+ const std::string &out_file_arg)
25
+ {
26
+ // 1) configure C++ globals
27
+ time_limit = time_limit_arg;
28
+ pre_pro = preproc_arg;
29
+ use_dic = use_dic_arg;
30
+ use_list = false;
31
+ b_disp = verbose_arg;
32
+ b_write = !out_file_arg.empty();
33
+ out_file = out_file_arg;
34
+
35
+ // 2) clear collector & start timer
36
+ ClearCollected();
37
+ start_time = std::clock();
38
+
39
+ // 3) load either file or in‐memory sequences
40
+ if (py::isinstance<py::str>(data)) {
41
+ auto path = data.cast<std::string>();
42
+ if (!Load_instance(path, minsup))
43
+ throw std::runtime_error("Failed to load database from " + path);
44
+ }
45
+ else {
46
+ // convert Python List[List[int]] → C++ items
47
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
48
+ items = std::move(seqs);
49
+ N = items.size();
50
+
51
+ // a) compute max item ID → L
52
+ int max_id = 0;
53
+ for (auto &seq : items)
54
+ for (int x : seq)
55
+ max_id = std::max(max_id, std::abs(x));
56
+ L = static_cast<unsigned int>(max_id);
57
+
58
+ // b) support threshold θ
59
+ if (minsup < 1.0)
60
+ theta = static_cast<unsigned long long>(std::ceil(minsup * N));
61
+ else
62
+ theta = static_cast<unsigned long long>(minsup);
63
+
64
+ // c) initialize DFS stack
65
+ DFS.clear();
66
+ DFS.reserve(L);
67
+ for (unsigned int i = 0; i < L; ++i)
68
+ DFS.emplace_back(-static_cast<int>(i) - 1);
69
+
70
+ // d) gather dataset stats: max length M, total entries E
71
+ M = 0;
72
+ E = 0;
73
+ for (auto &seq : items) {
74
+ M = std::max<unsigned int>(M, static_cast<unsigned int>(seq.size()));
75
+ E += seq.size();
76
+ }
77
+
78
+ if (b_disp) {
79
+ std::cout << "\nIn-memory dataset: "
80
+ << N << " sequences, max len " << M
81
+ << ", " << E << " entries, " << L << " items\n";
82
+ }
83
+ }
84
+
85
+ // 4) run the C++ miner
86
+ Freq_miner();
16
87
 
17
- // Note: Pass by value (safe copy)
18
- if (!Load_instance(data_file, minsup)) {
19
- throw std::runtime_error("Failed to load database from " + data_file);
20
- }
88
+ // 5) collect patterns & timing
89
+ auto patterns = GetCollected();
90
+ double wall_time = give_time(std::clock() - start_time);
21
91
 
22
- Freq_miner();
23
- return GetCollected();
24
- },
25
- py::arg("data_file"),
26
- py::arg("minsup") = 0.01,
27
- "Mine sequential patterns from the given data file with minimum support.");
92
+ // 6) return Python dict
93
+ py::dict out;
94
+ out["patterns"] = patterns;
95
+ out["time"] = wall_time;
96
+ return out;
97
+ },
98
+ py::arg("data"),
99
+ py::arg("minsup") = 0.01,
100
+ py::arg("time_limit") = 10 * 3600,
101
+ py::arg("preproc") = false,
102
+ py::arg("use_dic") = false,
103
+ py::arg("verbose") = false,
104
+ py::arg("out_file") = ""
105
+ );
28
106
  }
Binary file