effspm 0.2.7__cp39-cp39-win_amd64.whl → 0.3.3__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. effspm/_effspm.cp39-win_amd64.pyd +0 -0
  2. effspm/_effspm.cpp +961 -210
  3. effspm/btminer/src/build_mdd.cpp +42 -17
  4. effspm/btminer/src/build_mdd.hpp +13 -19
  5. effspm/btminer/src/freq_miner.cpp +134 -49
  6. effspm/btminer/src/freq_miner.hpp +16 -0
  7. effspm/btminer/src/load_inst.cpp +211 -126
  8. effspm/btminer/src/load_inst.hpp +22 -4
  9. effspm/btminer/src/main.cpp +83 -0
  10. effspm/btminer/src/utility.cpp +26 -41
  11. effspm/btminer/src/utility.hpp +6 -30
  12. effspm/freq_miner.hpp +2 -1
  13. effspm/htminer/src/build_mdd.cpp +46 -124
  14. effspm/htminer/src/build_mdd.hpp +56 -49
  15. effspm/htminer/src/freq_miner.cpp +341 -307
  16. effspm/htminer/src/freq_miner.hpp +39 -40
  17. effspm/htminer/src/load_inst.cpp +287 -336
  18. effspm/htminer/src/load_inst.hpp +23 -6
  19. effspm/htminer/src/main.cpp +97 -0
  20. effspm/htminer/src/utility.cpp +38 -57
  21. effspm/htminer/src/utility.hpp +9 -64
  22. effspm/largebm/src/build_mdd.cpp +69 -110
  23. effspm/largebm/src/build_mdd.hpp +22 -37
  24. effspm/largebm/src/freq_miner.cpp +241 -291
  25. effspm/largebm/src/freq_miner.hpp +25 -36
  26. effspm/largebm/src/load_inst.cpp +20 -26
  27. effspm/largebm/src/load_inst.hpp +24 -34
  28. effspm/largebm/src/main.cpp +95 -0
  29. effspm/largebm/src/utility.cpp +11 -21
  30. effspm/largebm/src/utility.hpp +7 -10
  31. effspm/largehm/src/build_mdd.cpp +75 -110
  32. effspm/largehm/src/build_mdd.hpp +53 -73
  33. effspm/largehm/src/freq_miner.cpp +134 -191
  34. effspm/largehm/src/freq_miner.hpp +37 -60
  35. effspm/largehm/src/load_inst.cpp +137 -174
  36. effspm/largehm/src/load_inst.hpp +13 -50
  37. effspm/largehm/src/main.cpp +95 -0
  38. effspm/largehm/src/utility.cpp +46 -28
  39. effspm/largehm/src/utility.hpp +18 -16
  40. effspm/largepp/src/freq_miner.cpp +184 -156
  41. effspm/largepp/src/freq_miner.hpp +11 -36
  42. effspm/largepp/src/load_inst.cpp +32 -12
  43. effspm/largepp/src/load_inst.hpp +15 -9
  44. effspm/largepp/src/main.cpp +108 -0
  45. effspm/largepp/src/pattern.hpp +31 -0
  46. effspm/load_inst.cpp +8 -8
  47. effspm/load_inst.hpp +1 -1
  48. effspm/main.cpp +103 -0
  49. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
  50. effspm-0.3.3.dist-info/RECORD +60 -0
  51. effspm-0.2.7.dist-info/RECORD +0 -53
  52. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
  53. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
  54. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
@@ -1,48 +1,37 @@
1
1
  #pragma once
2
-
3
- #include "load_inst.hpp"
4
- #include "build_mdd.hpp"
2
+ #include <vector>
5
3
 
6
4
  namespace largebm {
7
-
8
- void Freq_miner();
9
- // recursive helper for the list‐based mode
10
- void Freq_miner_list(const std::vector<std::vector<int>>& db,
11
- std::vector<int>& prefix,
12
- unsigned long long theta,
13
- std::vector<std::vector<int>>& out);
5
+
14
6
  class Pattern {
15
7
  public:
16
-
17
- vector<int> seq;
18
- vector<unsigned long long int> str_pnt;
19
- vector<int> list;
20
-
21
- unsigned long long int freq;
22
-
23
- Pattern(vector<int>& _seq, int item) {
24
- seq.swap(_seq);
25
- seq.push_back(item);
26
- freq = 0;
27
- }
28
-
29
- Pattern(int item) {
30
- seq.push_back(item);
31
- freq = 0;
32
- }
33
-
34
- Pattern() {
35
- freq = 0;
36
- }
37
-
38
-
8
+ std::vector<int> seq;
9
+ std::vector<unsigned long long> str_pnt;
10
+ std::vector<int> list;
11
+ unsigned long long freq = 0;
12
+
13
+ Pattern() = default;
14
+ Pattern(std::vector<int>& _seq, int item) {
15
+ seq.swap(_seq);
16
+ seq.push_back(item);
17
+ freq = 0;
18
+ }
19
+ Pattern(int item) {
20
+ seq.push_back(item);
21
+ freq = 0;
22
+ }
39
23
  };
40
24
 
25
+ void Freq_miner();
26
+ void Freq_miner_list(const std::vector<std::vector<int>>& db,
27
+ std::vector<int>& prefix,
28
+ unsigned long long theta,
29
+ std::vector<std::vector<int>>& out);
30
+
41
31
  extern unsigned long long int num_patt;
42
32
  extern std::vector<bool> ilist;
43
33
  extern std::vector<bool> slist;
44
34
  extern std::vector<int> DFS_numfound;
45
- extern Pattern _patt;
46
-
35
+ extern Pattern _patt;
47
36
 
48
- }
37
+ } // namespace largebm
@@ -1,13 +1,8 @@
1
-
2
- #include <sstream>
3
1
  #include <algorithm>
4
2
  #include <cmath>
5
- #include <ctime>
6
- #include <iostream> // for std::cout, std::endl
7
3
  #include <fstream>
8
- #include <vector>
9
- #include <string>
10
-
4
+ #include <iostream>
5
+ #include <sstream>
11
6
  #include "load_inst.hpp"
12
7
  #include "build_mdd.hpp"
13
8
  #include "utility.hpp"
@@ -15,7 +10,7 @@
15
10
 
16
11
  namespace largebm {
17
12
 
18
- // ── global definitions ────────────────────────────────────────────
13
+ // ── single definitions of globals ─────────────────────────────────
19
14
  bool use_list = false;
20
15
  bool b_disp = false;
21
16
  bool b_write = false;
@@ -51,11 +46,10 @@ static void Load_items_list(const std::string& fname) {
51
46
  if (item_dic[a - 1] == -1) continue;
52
47
  seq.push_back(x);
53
48
  }
54
- if (!seq.empty()) items.push_back(std::move(seq));
49
+ if (!seq.empty()) items.push_back(seq);
55
50
  }
56
51
  }
57
52
 
58
- // ─────────────── main loader ─────────────────────────────────────
59
53
  bool Load_instance(const std::string& items_file, double minsup) {
60
54
  // reset state
61
55
  N = L = num_nodes = theta = M = E = 0;
@@ -90,9 +84,6 @@ bool Load_instance(const std::string& items_file, double minsup) {
90
84
  // MDD build mode
91
85
  if (pre_pro) {
92
86
  if (!Preprocess(items_file, minsup)) return false;
93
- std::cout << "\nPreprocess done in "
94
- << give_time(std::clock() - kk)
95
- << " seconds\n\n";
96
87
  DFS.clear();
97
88
  DFS.reserve(L);
98
89
  for (unsigned int i = 0; i < L; ++i)
@@ -100,23 +91,29 @@ bool Load_instance(const std::string& items_file, double minsup) {
100
91
  kk = std::clock();
101
92
  Load_items_pre(items_file);
102
93
  } else {
103
- if (!Preprocess(items_file, 0.0)) return false;
94
+ if (!Preprocess(items_file, minsup)) return false;
104
95
  kk = std::clock();
105
96
  Load_items(items_file);
106
97
  }
107
98
 
108
- std::cout << "\nMDD Database built in "
109
- << give_time(std::clock() - kk)
110
- << " seconds\n\n";
111
- std::cout << "Found " << N
112
- << " sequences, with max line len " << M
113
- << ", and " << L << " items, and " << E << " entries\n";
114
- std::cout << "Total MDD nodes: " << Tree.size() << std::endl;
99
+ // ensure DFS size
100
+ if (DFS.size() < L) {
101
+ DFS.reserve(L);
102
+ while (DFS.size() < L) {
103
+ DFS.emplace_back(-int(DFS.size()) - 1);
104
+ }
105
+ }
106
+
107
+ // SAFETY — seed any zeroed singletons from their str_pnt list
108
+ for (unsigned int i = 0; i < L && i < DFS.size(); ++i) {
109
+ if (DFS[i].freq == 0 && !DFS[i].str_pnt.empty()) {
110
+ DFS[i].freq = static_cast<unsigned long long>(DFS[i].str_pnt.size());
111
+ }
112
+ }
115
113
 
116
114
  return true;
117
115
  }
118
116
 
119
- // ────────────── Preprocess (list mode) ───────────────────────────
120
117
  bool Preprocess(const std::string& inst, double thresh) {
121
118
  std::ifstream file(inst);
122
119
  if (!file.good()) return false;
@@ -158,7 +155,6 @@ bool Preprocess(const std::string& inst, double thresh) {
158
155
  return true;
159
156
  }
160
157
 
161
- // Load_items_pre: MDD insert from file
162
158
  void Load_items_pre(const std::string& inst_name) {
163
159
  std::ifstream file(inst_name);
164
160
  if (!file.good()) return;
@@ -182,7 +178,7 @@ void Load_items_pre(const std::string& inst_name) {
182
178
  continue;
183
179
  }
184
180
  if (ditem > 0) { ditem = item_dic[ditem - 1]; itmset_exists = true; }
185
- else { ditem = -item_dic[-ditem - 1]; }
181
+ else { ditem = -item_dic[-ditem - 1]; }
186
182
  if (sgn) { if (ditem > 0) ditem = -ditem; sgn = false; }
187
183
  temp_vec.push_back(ditem);
188
184
  }
@@ -193,7 +189,6 @@ void Load_items_pre(const std::string& inst_name) {
193
189
  }
194
190
  }
195
191
 
196
- // Load_items: full MDD build
197
192
  bool Load_items(const std::string& inst_name) {
198
193
  std::ifstream file(inst_name);
199
194
  if (!file.good()) return false;
@@ -227,4 +222,3 @@ void ClearCollected() { collected.clear(); }
227
222
  const std::vector<std::vector<int>>& GetCollected() { return collected; }
228
223
 
229
224
  } // namespace largebm
230
-
@@ -1,45 +1,35 @@
1
1
  #pragma once
2
-
3
2
  #include <vector>
4
3
  #include <string>
5
- #include <fstream>
6
- #include <cmath>
7
4
  #include <ctime>
8
5
 
9
-
10
6
  namespace largebm {
11
- using namespace std;
12
7
 
13
- // forward-declare Pattern (defined in freq_miner.hpp)
14
- class Pattern;
15
-
16
- struct Arc;
17
- extern std::vector<Arc> Tree; // <-- add this
18
- void Build_MDD(const std::vector<int>& seq);
19
- // ─── Entry points ─────────────────────────────────────────────────────────
20
- bool Load_instance(const string& items_file, double thresh);
21
- bool Preprocess(const string& fname, double thresh);
22
- void Load_items_pre(const string& fname);
23
- bool Load_items(const string& fname);
24
- extern std::vector<int> inv_item_dic;
25
- // Called by the Python‐wrapper when passing a Python list of lists
26
-
8
+ class Pattern; // forward
9
+ // [2025-10-25 NEW]: match the real definition (class, not struct) to avoid ABI warnings
10
+ class Arc; // was: struct Arc;
11
+
12
+ // Config & state (single definitions in load_inst.cpp)
13
+ extern std::string out_file, folder;
14
+ extern bool use_list;
15
+ extern bool b_disp, b_write, use_dic, just_build, pre_pro, itmset_exists;
16
+ extern unsigned int M, L, time_limit;
17
+ extern unsigned long long N, num_nodes, theta, E;
18
+ extern std::clock_t start_time;
19
+
20
+ extern std::vector<std::vector<int>> items;
21
+ extern std::vector<int> item_dic;
22
+ extern std::vector<int> inv_item_dic;
23
+ extern std::vector<Pattern> DFS;
24
+ extern std::vector<std::vector<int>> collected;
27
25
 
28
- // ─── Config globals (must match btminer types exactly) ────────────────────
29
- extern string out_file, folder;
30
- extern bool use_list;
31
- extern bool b_disp, b_write, use_dic, just_build, pre_pro, itmset_exists;
32
- extern unsigned int M, L, time_limit;
33
- extern unsigned long long N, num_nodes, theta, E;
34
- extern clock_t start_time;
35
- // extern std::vector<Arc> Tree;
36
- // ─── Data for list-based (LargeBTMiner) mode ───────────────────────────────
37
- extern std::vector<std::vector<int>> items;
38
- extern std::vector<int> item_dic;
39
- extern std::vector<Pattern> DFS; // Pattern is now declared
40
- extern std::vector<std::vector<int>> collected;
41
- void ClearCollected();
42
- const std::vector<std::vector<int>>& GetCollected();
26
+ // Loader API
27
+ bool Load_instance(const std::string& items_file, double thresh);
28
+ bool Preprocess(const std::string& fname, double thresh);
29
+ void Load_items_pre(const std::string& fname);
30
+ bool Load_items(const std::string& fname);
43
31
 
32
+ void ClearCollected();
33
+ const std::vector<std::vector<int>>& GetCollected();
44
34
 
45
35
  } // namespace largebm
@@ -0,0 +1,95 @@
1
+ #include <iostream>
2
+ #include <time.h>
3
+ #include <string.h>
4
+ #include <string>
5
+ #include "load_inst.hpp"
6
+ #include "build_mdd.hpp"
7
+ #include "utility.hpp"
8
+ #include "freq_miner.hpp"
9
+
10
+ namespace largebm{
11
+ using namespace std;
12
+
13
+ string out_file;
14
+
15
+ bool b_disp = 0, b_write = 0, just_build = 0, pre_pro = 1;
16
+
17
+ int time_limit = 30 * 3600;
18
+
19
+ clock_t start_time;
20
+
21
+ string folder;
22
+
23
+ int main(int argc, char* argv[]) {
24
+
25
+ string VV, attr;
26
+
27
+ double thresh = 0;
28
+ for (int i = 1; i<argc; i++) {
29
+ if (argv[i][0] != '-' || isdigit(argv[i][1]))
30
+ continue;
31
+ else if (strcmp(argv[i], "-thr") == 0)
32
+ thresh = stod(argv[i + 1]);
33
+ else if (strcmp(argv[i], "-file") == 0)
34
+ VV = argv[i + 1];
35
+ else if (strcmp(argv[i], "-time") == 0)
36
+ time_limit = stoi(argv[i + 1]);
37
+ else if (strcmp(argv[i], "-jbuild") == 0)
38
+ just_build = 1;
39
+ else if (strcmp(argv[i], "-folder") == 0)
40
+ folder = argv[i + 1];
41
+ else if (strcmp(argv[i], "-npre") == 0)
42
+ pre_pro = 0;
43
+ else if (strcmp(argv[i], "-out") == 0) {
44
+ if (i + 1 == argc || argv[i + 1][0] == '-')
45
+ b_disp = 1;
46
+ else if (argv[i + 1][0] == '+') {
47
+ b_disp = 1;
48
+ b_write = 1;
49
+ if (strlen(argv[i + 1]) > 1) {
50
+ out_file = argv[i + 1];
51
+ out_file = out_file.substr(1, out_file.size() - 1);
52
+ }
53
+ else
54
+ out_file = VV;
55
+ }
56
+ else {
57
+ b_write = 1;
58
+ out_file = argv[i + 1];
59
+ }
60
+ }
61
+
62
+ else
63
+ cout << "Command " << argv[i] << " not recognized and skipped.\n";
64
+ }
65
+
66
+
67
+
68
+ cout << "\n********************** " << VV << "**********************\n";
69
+
70
+ string item_file = folder + VV + ".txt";
71
+
72
+ cout << "loading instances...\n";
73
+
74
+ start_time = clock();
75
+
76
+ if (!Load_instance(item_file, thresh)) {
77
+ cout << "Files invalid, exiting.\n";
78
+ cin.get();
79
+ return 0;
80
+ }
81
+
82
+ //kk = clock();
83
+
84
+ if (!just_build && give_time(clock() - start_time) < time_limit) {
85
+ Freq_miner();
86
+ if (give_time(clock() - start_time) >= time_limit)
87
+ cout << "TIME LIMIT REACHED\n";
88
+ cout << "Mining Complete\n\nFound a total of " << num_patt << " patterns\n";
89
+ cout << "\nTotal CPU time " << give_time(clock() - start_time) << " seconds\n\n";
90
+ }
91
+
92
+
93
+ return 0;
94
+ }
95
+ }
@@ -1,45 +1,35 @@
1
1
  #include "utility.hpp"
2
2
  #include "build_mdd.hpp"
3
- #include "load_inst.hpp"
4
-
5
3
  #include <vector>
6
- #include <ctime>
7
- #include <algorithm>
8
4
 
9
5
  namespace largebm {
10
6
 
7
+ double give_time(std::clock_t ticks) {
8
+ return static_cast<double>(ticks) / CLOCKS_PER_SEC;
9
+ }
10
+
11
11
  bool check_parent(unsigned long long cur_arc,
12
12
  unsigned long long str_pnt,
13
13
  unsigned long long start,
14
- std::vector<unsigned long long>& strpnt_vec)
15
- {
14
+ std::vector<unsigned long long>& strpnt_vec) {
16
15
  std::vector<unsigned long long> ancestors;
17
-
18
16
  unsigned long long cur_anct = Tree[cur_arc].anct;
19
17
 
20
18
  while (Tree[cur_anct].itmset > Tree[str_pnt].itmset) {
21
- if (Tree[cur_anct].item > 0)
22
- ancestors.push_back(cur_anct);
19
+ if (Tree[cur_anct].item > 0) ancestors.push_back(cur_anct);
23
20
  cur_anct = Tree[cur_anct].anct;
24
21
  }
25
22
 
26
23
  if (Tree[cur_anct].itmset == Tree[str_pnt].itmset)
27
24
  return true;
28
- else {
29
- for (auto it = ancestors.rbegin(); it != ancestors.rend(); ++it) {
30
- for (unsigned long long i = start; i < strpnt_vec.size(); ++i) {
31
- if (strpnt_vec[i] == *it)
32
- return true;
33
- }
25
+
26
+ for (std::vector<unsigned long long>::reverse_iterator it = ancestors.rbegin();
27
+ it != ancestors.rend(); ++it) {
28
+ for (unsigned long long i = start; i < strpnt_vec.size(); ++i) {
29
+ if (strpnt_vec[i] == *it) return true;
34
30
  }
35
31
  }
36
-
37
32
  return false;
38
33
  }
39
34
 
40
- // return elapsed time in seconds
41
- double give_time(std::clock_t ticks) {
42
- return static_cast<double>(ticks) / CLOCKS_PER_SEC;
43
- }
44
-
45
35
  } // namespace largebm
@@ -1,18 +1,15 @@
1
1
  #pragma once
2
-
3
2
  #include <vector>
4
- #include <time.h>
5
- #include <string>
6
- #include "build_mdd.hpp"
7
3
  #include <ctime>
8
4
 
9
5
  namespace largebm {
10
- using namespace std;
11
-
12
- double give_time(std::clock_t kk);
13
-
14
6
 
7
+ double give_time(std::clock_t ticks);
15
8
 
16
- bool check_parent(unsigned long long int cur_arc, unsigned long long int str_pnt, unsigned long long int start, vector<unsigned long long int>& strpnt_vec);
9
+ // Check if `str_pnt` is an ancestor of `cur_arc` respecting itemset boundaries
10
+ bool check_parent(unsigned long long cur_arc,
11
+ unsigned long long str_pnt,
12
+ unsigned long long start,
13
+ std::vector<unsigned long long>& strpnt_vec);
17
14
 
18
- }
15
+ } // namespace largebm