effspm 0.3.0__cp312-cp312-macosx_11_0_arm64.whl → 0.3.3__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
@@ -31,7 +31,7 @@ map<int, string> item_map_rev;
31
31
 
32
32
  std::vector<int> freq;
33
33
  std::vector<int> item_dic;
34
-
34
+ std::vector<std::vector<int>> items;
35
35
  // ✅ REAL DEFINITION lives here:
36
36
  std::vector<Pattern> DFS;
37
37
 
@@ -67,8 +67,8 @@ bool Load_instance(string &items_file, double thresh) {
67
67
  if (pre_pro) {
68
68
  if (!Preprocess(items_file, thresh))
69
69
  return false;
70
-
71
- cout << "\nPreprocess done in " << give_time(clock() - kk) << " seconds\n\n";
70
+ if (b_disp)
71
+ cout << "\nPreprocess done in " << give_time(clock() - kk) << " seconds\n\n";
72
72
 
73
73
  // build empty DFS of size L
74
74
  DFS.clear();
@@ -88,12 +88,13 @@ bool Load_instance(string &items_file, double thresh) {
88
88
  else
89
89
  theta = static_cast<int>(thresh);
90
90
  }
91
-
92
- cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
93
- cout << "Found " << N * N_mult
94
- << " sequence, with max line len " << M
95
- << ", and " << L << " items, and " << E << " enteries\n";
96
- cout << "Total MDD nodes: " << Tree.size() << endl;
91
+ if (b_disp)
92
+ cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
93
+ if (b_disp)
94
+ cout << "Found " << N * N_mult
95
+ << " sequence, with max line len " << M
96
+ << ", and " << L << " items, and " << E << " enteries\n";
97
+ //cout << "Total MDD nodes: " << Tree.size() << endl;
97
98
 
98
99
  return true;
99
100
  }
@@ -102,6 +103,15 @@ bool Load_instance(string &items_file, double thresh) {
102
103
  // preprocessing pass
103
104
  // ---------------------------------------------------------------------
104
105
  bool Preprocess(string &inst, double thresh) {
106
+ N = 0;
107
+ L = 0;
108
+ freq.clear();
109
+ item_dic.clear();
110
+ item_map.clear();
111
+ item_map_rev.clear();
112
+ // (E is usually for entries during Build_MDD, so we can leave it
113
+ // for the load phase; it’s already reset in the binding)
114
+
105
115
  ifstream file(inst);
106
116
 
107
117
  if (file.good()) {
@@ -147,8 +157,8 @@ bool Preprocess(string &inst, double thresh) {
147
157
  if (freq[i] >= theta)
148
158
  item_dic[i] = ++real_L;
149
159
  }
150
-
151
- cout << "Original number of items: " << L
160
+ if (b_disp)
161
+ cout << "Original number of items: " << L
152
162
  << " Reduced to: " << real_L << endl;
153
163
 
154
164
  L = real_L;
@@ -0,0 +1,83 @@
1
+ #include <iostream>
2
+ #include <string.h>
3
+ #include "load_inst.hpp"
4
+ #include "freq_miner.hpp"
5
+ #include "utility.hpp"
6
+ #include "build_mdd.hpp"
7
+
8
+ namespace btminer {
9
+ // everything is already declared
10
+ }
11
+
12
+ int main(int argc, char* argv[]) {
13
+ using namespace btminer;
14
+
15
+ std::string VV, attr;
16
+
17
+ double thresh = 0;
18
+ for (int i = 1; i<argc; i++) {
19
+ if (argv[i][0] != '-' || isdigit(argv[i][1]))
20
+ continue;
21
+ else if (strcmp(argv[i], "-thr") == 0)
22
+ thresh = std::stod(argv[i + 1]);
23
+ else if (strcmp(argv[i], "-file") == 0)
24
+ VV = argv[i + 1];
25
+ else if (strcmp(argv[i], "-N_mult") == 0)
26
+ N_mult = std::stoi(argv[i + 1]);
27
+ else if (strcmp(argv[i], "-M_mult") == 0)
28
+ M_mult = std::stoi(argv[i + 1]);
29
+ else if (strcmp(argv[i], "-time") == 0)
30
+ time_limit = std::stoi(argv[i + 1]);
31
+ else if (strcmp(argv[i], "-jbuild") == 0)
32
+ just_build = 1;
33
+ else if (strcmp(argv[i], "-folder") == 0)
34
+ folder = argv[i + 1];
35
+ else if (strcmp(argv[i], "-npre") == 0)
36
+ pre_pro = 0;
37
+ else if (strcmp(argv[i], "-dic") == 0)
38
+ use_dic = 1;
39
+ else if (strcmp(argv[i], "-out") == 0) {
40
+ if (i + 1 == argc || argv[i + 1][0] == '-')
41
+ b_disp = 1;
42
+ else if (argv[i + 1][0] == '+') {
43
+ b_disp = 1;
44
+ b_write = 1;
45
+ if (strlen(argv[i + 1]) > 1) {
46
+ out_file = argv[i + 1];
47
+ out_file = out_file.substr(1, out_file.size() - 1);
48
+ }
49
+ else
50
+ out_file = VV;
51
+ }
52
+ else {
53
+ b_write = 1;
54
+ out_file = argv[i + 1];
55
+ }
56
+ }
57
+ else
58
+ std::cout << "Command " << argv[i] << " not recognized and skipped.\n";
59
+ }
60
+
61
+ std::cout << "\n********************** " << VV << " N_mult: " << N_mult << " M_mult: " << M_mult << "**********************\n";
62
+
63
+ std::string item_file = folder + VV + ".txt";
64
+
65
+ std::cout << "loading instances...\n";
66
+
67
+ start_time = clock();
68
+
69
+ if (!Load_instance(item_file, thresh)) {
70
+ std::cout << "Files invalid, exiting.\n";
71
+ return 0;
72
+ }
73
+
74
+ if (!just_build && give_time(clock() - start_time) < time_limit) {
75
+ Freq_miner();
76
+ if (give_time(clock() - start_time) >= time_limit)
77
+ std::cout << "TIME LIMIT REACHED\n";
78
+ std::cout << "Mining Complete\n\nFound a total of " << num_patt << " patterns\n";
79
+ std::cout << "\nTotal CPU time " << give_time(clock() - start_time) << " seconds\n\n";
80
+ }
81
+
82
+ return 0;
83
+ }
@@ -7,43 +7,32 @@
7
7
 
8
8
  namespace htminer {
9
9
 
10
- // Forward declarations (unchanged)
11
- int Add_arc(int item, unsigned int last_arc, int& itmset, std::vector<unsigned int>& ancest_map);
12
- void Add_vec(std::vector<int>& items_lim, std::vector<unsigned int>& ancest_map, unsigned int last_arc, int itmset);
10
+ using std::vector;
13
11
 
14
- // Global trees (unchanged)
15
- std::vector<Arc> Tree;
16
- std::vector<VArc> VTree;
17
- std::vector<CArc> CTree;
12
+ int Add_arc(int item, unsigned int last_arc, int& itmset, vector<unsigned int>& ancest_map);
13
+ void Add_vec(vector<int>& items_lim, vector<unsigned int>& ancest_map, unsigned int last_arc, int itmset);
18
14
 
19
- void Build_MDD(std::vector<int>& items, std::vector<int>& items_lim) {
20
- // Prepare ancestor map of size L
21
- std::vector<unsigned int> ancest_map(L, 0);
15
+ vector<Arc> Tree;
16
+ vector<VArc> VTree;
17
+ vector<CArc> CTree;
22
18
 
23
- unsigned int last_arc = 0;
24
- int itmset = 0;
25
-
26
- // 1) normal items
27
- for (size_t idx = 0; idx < items.size(); ++idx) {
28
- int curr_item = items[idx];
19
+ void Build_MDD(vector<int>& items, vector<int>& items_lim) {
29
20
 
30
- ++E; // count this entry, just like in btminer
21
+ vector<unsigned int> ancest_map(L, 0);
31
22
 
32
- last_arc = Add_arc(curr_item, last_arc, itmset, ancest_map);
33
- }
23
+ unsigned int last_arc = 0;
24
+ int itmset = 0;
25
+ for (vector<int>::iterator it = items.begin(); it != items.end(); ++it)
26
+ last_arc = Add_arc(*it, last_arc, itmset, ancest_map);
34
27
 
35
- // 2) tail / limited items
36
- if (!items_lim.empty()) {
28
+ if (!items_lim.empty())
37
29
  Add_vec(items_lim, ancest_map, last_arc, itmset);
38
- }
39
30
  }
40
31
 
41
- int Add_arc(int item,
42
- unsigned int last_arc,
43
- int& itmset,
44
- std::vector<unsigned int>& ancest_map)
45
- {
32
+ int Add_arc(int item, unsigned int last_arc, int& itmset, vector<unsigned int>& ancest_map) {
33
+
46
34
  unsigned int anct = ancest_map[std::abs(item) - 1];
35
+
47
36
  if (item < 0)
48
37
  ++itmset;
49
38
 
@@ -51,9 +40,8 @@ int Add_arc(int item,
51
40
 
52
41
  if (last_sibl == 0) {
53
42
  Tree.emplace_back(item, itmset, anct);
54
- last_sibl = static_cast<unsigned int>(Tree.size() - 1);
43
+ last_sibl = (unsigned int)Tree.size() - 1;
55
44
  Tree[last_arc].chld = last_sibl;
56
-
57
45
  if (anct == 0)
58
46
  DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
59
47
  }
@@ -61,8 +49,8 @@ int Add_arc(int item,
61
49
  while (Tree[last_sibl].item != item) {
62
50
  if (Tree[last_sibl].sibl == 0) {
63
51
  Tree.emplace_back(item, itmset, anct);
64
- Tree[last_sibl].sibl = static_cast<unsigned int>(Tree.size() - 1);
65
- last_sibl = static_cast<unsigned int>(Tree.size() - 1);
52
+ Tree[last_sibl].sibl = (unsigned int)Tree.size() - 1;
53
+ last_sibl = (unsigned int)Tree.size() - 1;
66
54
  if (anct == 0)
67
55
  DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
68
56
  break;
@@ -78,61 +66,48 @@ int Add_arc(int item,
78
66
 
79
67
  ancest_map[std::abs(item) - 1] = last_sibl;
80
68
 
81
- return static_cast<int>(last_sibl);
69
+ return (int)last_sibl;
82
70
  }
83
71
 
84
- void Add_vec(std::vector<int>& items_lim,
85
- std::vector<unsigned int>& ancest,
86
- unsigned int last_arc,
87
- int itmset)
88
- {
89
- items_lim.shrink_to_fit();
72
+ void Add_vec(vector<int>& items_lim, vector<unsigned int>& ancest, unsigned int last_arc, int itmset) {
90
73
 
91
- std::vector<bool> counted(L, false);
74
+ items_lim.shrink_to_fit();
75
+ vector<bool> counted(L, 0);
92
76
 
93
77
  if (Tree[last_arc].itmset > 0) {
94
- ancest.push_back(0);
78
+ ancest.push_back(0); // last element of ancest is CArc child
95
79
  ancest.shrink_to_fit();
96
-
97
- for (size_t i = 0; i < items_lim.size(); ++i) {
80
+ for (int i = 0; i < (int)items_lim.size(); ++i) {
98
81
  int cur_itm = std::abs(items_lim[i]);
99
-
100
- ++E; // ✅ count this limited-entry too
101
-
102
82
  if (ancest[cur_itm - 1] == 0 && !counted[cur_itm - 1]) {
103
- if (i + 1 < static_cast<int>(items_lim.size())) {
104
- VDFS[cur_itm - 1].str_pnt.push_back(-static_cast<int>(i) - 1);
105
- VDFS[cur_itm - 1].seq_ID.push_back(static_cast<unsigned int>(CTree.size()));
83
+ if (i + 1 < (int)items_lim.size()) {
84
+ VDFS[cur_itm - 1].str_pnt.push_back(-i - 1); // CTree positions: negative pointers
85
+ VDFS[cur_itm - 1].seq_ID.push_back((unsigned int)CTree.size());
106
86
  }
107
87
  ++DFS[cur_itm - 1].freq;
108
- counted[cur_itm - 1] = true;
88
+ counted[cur_itm - 1] = 1;
109
89
  }
110
90
  }
111
-
112
91
  CTree.emplace_back(ancest, items_lim);
113
- Tree[last_arc].chld = static_cast<unsigned int>(CTree.size() - 1);
114
- Tree[last_arc].itmset = -itmset;
92
+ Tree[last_arc].chld = (unsigned int)CTree.size() - 1;
93
+ Tree[last_arc].itmset = -itmset; // Tree→CTree edge is marked by negative itmset
115
94
  }
116
95
  else {
117
- std::vector<unsigned int>& ancest_ct = CTree[Tree[last_arc].chld].ancest;
118
-
119
- for (size_t i = 0; i < items_lim.size(); ++i) {
96
+ vector<unsigned int>& ancest_ref = CTree[Tree[last_arc].chld].ancest;
97
+ for (int i = 0; i < (int)items_lim.size(); ++i) {
120
98
  int cur_itm = std::abs(items_lim[i]);
121
-
122
- ++E; // also count in this branch
123
-
124
- if (!counted[cur_itm - 1] && ancest_ct[cur_itm - 1] == 0) {
125
- if (i + 1 < static_cast<int>(items_lim.size())) {
126
- VDFS[cur_itm - 1].str_pnt.push_back(static_cast<unsigned int>(i) + 1);
127
- VDFS[cur_itm - 1].seq_ID.push_back(static_cast<unsigned int>(VTree.size()));
99
+ if (!counted[cur_itm - 1] && ancest_ref[cur_itm - 1] == 0) {
100
+ if (i + 1 < (int)items_lim.size()) {
101
+ VDFS[cur_itm - 1].str_pnt.push_back(i + 1);
102
+ VDFS[cur_itm - 1].seq_ID.push_back((unsigned int)VTree.size());
128
103
  }
129
104
  ++DFS[cur_itm - 1].freq;
130
- counted[cur_itm - 1] = true;
105
+ counted[cur_itm - 1] = 1;
131
106
  }
132
107
  }
133
-
134
- VTree.emplace_back(items_lim, ancest_ct.back());
135
- CTree[Tree[last_arc].chld].ancest.back() = static_cast<unsigned int>(VTree.size());
108
+ VTree.emplace_back(items_lim, CTree[Tree[last_arc].chld].ancest.back());
109
+ CTree[Tree[last_arc].chld].ancest.back() = (unsigned int)VTree.size();
110
+ // VTree siblings and CTree children are +1 of actual index to mark end
136
111
  }
137
112
  }
138
113
 
@@ -5,60 +5,67 @@
5
5
  #include "load_inst.hpp"
6
6
 
7
7
  namespace htminer {
8
- void Build_MDD(std::vector<int>& items, std::vector<int>& items_lim);
9
8
 
10
- class Arc {
11
- public:
12
- unsigned int chld;
13
- unsigned int sibl;
14
- unsigned int freq;
15
- unsigned int anct;
16
- int itmset;
17
- int item;
9
+ using std::vector;
18
10
 
19
- Arc(unsigned int _itm, int _itmset, unsigned int _anc) {
20
- chld = 0;
21
- sibl = 0;
22
- freq = 0;
23
- itmset = _itmset;
24
- anct = _anc;
25
- item = _itm;
26
- }
11
+ void Build_MDD(vector<int>& items, vector<int>& items_lim);
27
12
 
28
- Arc() {
29
- chld = 0;
30
- sibl = 0;
31
- freq = 0;
32
- }
33
- };
13
+ class Arc {
14
+ public:
15
+ unsigned int chld;
16
+ unsigned int sibl;
17
+ unsigned int freq;
18
+ unsigned int anct;
19
+ int itmset;
20
+ int item;
34
21
 
35
- class VArc {
36
- public:
37
- unsigned int sibl;
38
- std::vector<int> seq;
22
+ Arc(unsigned int _itm, int _itmset, unsigned int _anc) {
23
+ chld = 0;
24
+ sibl = 0;
25
+ freq = 0;
26
+ itmset = _itmset;
27
+ anct = _anc;
28
+ item = _itm;
29
+ }
39
30
 
40
- VArc(std::vector<int>& items, unsigned int _sib) {
41
- sibl = _sib;
42
- seq.swap(items);
43
- }
44
-
45
- VArc() {
46
- sibl = 0;
47
- }
48
- };
31
+ Arc() {
32
+ chld = 0;
33
+ sibl = 0;
34
+ freq = 0;
35
+ itmset = 0;
36
+ anct = 0;
37
+ item = 0;
38
+ }
39
+ };
49
40
 
50
- class CArc {
51
- public:
52
- std::vector<int> seq;
53
- std::vector<unsigned int> ancest;
41
+ class VArc {
42
+ public:
43
+ unsigned int sibl;
44
+ vector<int> seq;
54
45
 
55
- CArc(std::vector<unsigned int>& _anc, std::vector<int>& items) {
56
- ancest.swap(_anc);
57
- seq.swap(items);
58
- }
59
- };
46
+ VArc(vector<int>& items, unsigned int _sib) {
47
+ sibl = _sib;
48
+ seq.swap(items);
49
+ }
60
50
 
61
- extern std::vector<Arc> Tree;
62
- extern std::vector<VArc> VTree;
63
- extern std::vector<CArc> CTree;
64
- }
51
+ VArc() {
52
+ sibl = 0;
53
+ }
54
+ };
55
+
56
+ class CArc {
57
+ public:
58
+ vector<int> seq;
59
+ vector<unsigned int> ancest;
60
+
61
+ CArc(vector<unsigned int>& _anc, vector<int>& items) {
62
+ ancest.swap(_anc);
63
+ seq.swap(items);
64
+ }
65
+ };
66
+
67
+ extern vector<Arc> Tree;
68
+ extern vector<VArc> VTree;
69
+ extern vector<CArc> CTree;
70
+
71
+ } // namespace htminer