effspm 0.2.6__cp310-cp310-win_amd64.whl → 0.3.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,181 +12,128 @@ int Add_arc(int item, unsigned int last_arc, int& itmset, std::vector<unsigned i
12
12
  void Add_vec(std::vector<int>& items_lim, std::vector<unsigned int>& ancest_map, unsigned int last_arc, int itmset);
13
13
 
14
14
  // Global trees (unchanged)
15
- std::vector<Arc> Tree;
15
+ std::vector<Arc> Tree;
16
16
  std::vector<VArc> VTree;
17
17
  std::vector<CArc> CTree;
18
18
 
19
19
  void Build_MDD(std::vector<int>& items, std::vector<int>& items_lim) {
20
- // DEBUG: entry into Build_MDD
21
- // std::cerr << "[HTMiner::Build_MDD] called with items.size()=" << items.size()
22
- // << " items_lim.size()=" << items_lim.size() << std::endl;
23
-
24
- // // Prepare ancestor map of size L
25
- std::vector<unsigned int> ancest_map(L, 0);
20
+ // Prepare ancestor map of size L
21
+ std::vector<unsigned int> ancest_map(L, 0);
26
22
 
27
23
  unsigned int last_arc = 0;
28
- int itmset = 0;
24
+ int itmset = 0;
29
25
 
30
- // Iterate over items
26
+ // 1) normal items
31
27
  for (size_t idx = 0; idx < items.size(); ++idx) {
32
28
  int curr_item = items[idx];
33
- // std::cerr << "[HTMiner::Build_MDD] processing items[" << idx
34
- // << "]=" << curr_item << " last_arc=" << last_arc
35
- // << " itmset=" << itmset << std::endl;
36
29
 
37
- last_arc = Add_arc(curr_item, last_arc, itmset, ancest_map);
30
+ ++E; // count this entry, just like in btminer
38
31
 
39
- // std::cerr << "[HTMiner::Build_MDD] returned from Add_arc, new last_arc="
40
- // << last_arc << " itmset=" << itmset << std::endl;
32
+ last_arc = Add_arc(curr_item, last_arc, itmset, ancest_map);
41
33
  }
42
34
 
43
- // If there are limited items, handle them
35
+ // 2) tail / limited items
44
36
  if (!items_lim.empty()) {
45
- // std::cerr << "[HTMiner::Build_MDD] items_lim is not empty; size="
46
- // << items_lim.size() << std::endl;
47
37
  Add_vec(items_lim, ancest_map, last_arc, itmset);
48
- // std::cerr << "[HTMiner::Build_MDD] returned from Add_vec" << std::endl;
49
- } else {
50
- // std::cerr << "[HTMiner::Build_MDD] items_lim is empty; skipping Add_vec" << std::endl;
51
38
  }
52
-
53
- // DEBUG: exit Build_MDD
54
- // std::cerr << "[HTMiner::Build_MDD] exiting; Tree.size()=" << Tree.size()
55
- // << " CTree.size()=" << CTree.size()
56
- // << " VTree.size()=" << VTree.size() << std::endl;
57
- //
58
39
  }
59
40
 
60
- int Add_arc(int item, unsigned int last_arc, int& itmset, std::vector<unsigned int>& ancest_map) {
41
+ int Add_arc(int item,
42
+ unsigned int last_arc,
43
+ int& itmset,
44
+ std::vector<unsigned int>& ancest_map)
45
+ {
61
46
  unsigned int anct = ancest_map[std::abs(item) - 1];
62
- if (item < 0) {
47
+ if (item < 0)
63
48
  ++itmset;
64
- // std::cerr << "[HTMiner::Add_arc] negative item detected; itmset incremented to "
65
- // << itmset << std::endl;
66
- }
67
49
 
68
50
  unsigned int last_sibl = Tree[last_arc].chld;
69
- // std::cerr << "[HTMiner::Add_arc] starting with last_sibl=" << last_sibl
70
- // << " anct=" << anct << std::endl;
71
51
 
72
52
  if (last_sibl == 0) {
73
53
  Tree.emplace_back(item, itmset, anct);
74
54
  last_sibl = static_cast<unsigned int>(Tree.size() - 1);
75
55
  Tree[last_arc].chld = last_sibl;
76
- // std::cerr << "[HTMiner::Add_arc] created new arc at index=" << last_sibl
77
- // << " setting Tree[" << last_arc << "].chld=" << last_sibl << std::endl;
78
- if (anct == 0) {
56
+
57
+ if (anct == 0)
79
58
  DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
80
- // std::cerr << "[HTMiner::Add_arc] appended to DFS[" << (std::abs(item) - 1)
81
- // << "].str_pnt -> " << last_sibl << std::endl;
82
- }
83
59
  }
84
60
  else {
85
- // std::cerr << "[HTMiner::Add_arc] traversing siblings starting at " << last_sibl << std::endl;
86
61
  while (Tree[last_sibl].item != item) {
87
62
  if (Tree[last_sibl].sibl == 0) {
88
63
  Tree.emplace_back(item, itmset, anct);
89
64
  Tree[last_sibl].sibl = static_cast<unsigned int>(Tree.size() - 1);
90
- last_sibl = static_cast<unsigned int>(Tree.size() - 1);
91
- // std::cerr << "[HTMiner::Add_arc] created sibling arc at index=" << last_sibl
92
- // << " setting Tree[" << (last_sibl - 1) << "].sibl=" << last_sibl << std::endl;
93
- if (anct == 0) {
65
+ last_sibl = static_cast<unsigned int>(Tree.size() - 1);
66
+ if (anct == 0)
94
67
  DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
95
- // std::cerr << "[HTMiner::Add_arc] appended to DFS[" << (std::abs(item) - 1)
96
- // << "].str_pnt -> " << last_sibl << std::endl;
97
- }
98
68
  break;
99
69
  }
100
70
  last_sibl = Tree[last_sibl].sibl;
101
- // std::cerr << "[HTMiner::Add_arc] moving to next sibling: " << last_sibl << std::endl;
102
71
  }
103
72
  }
104
73
 
105
- if (anct == 0) {
74
+ if (anct == 0)
106
75
  ++DFS[std::abs(item) - 1].freq;
107
- // std::cerr << "[HTMiner::Add_arc] incremented DFS[" << (std::abs(item) - 1)
108
- // << "].freq -> " << DFS[std::abs(item) - 1].freq << std::endl;
109
- }
110
76
 
111
77
  ++Tree[last_sibl].freq;
112
- // std::cerr << "[HTMiner::Add_arc] incremented Tree[" << last_sibl << "].freq -> "
113
- // << Tree[last_sibl].freq << std::endl;
114
78
 
115
79
  ancest_map[std::abs(item) - 1] = last_sibl;
116
- // std::cerr << "[HTMiner::Add_arc] updated ancest_map[" << (std::abs(item) - 1)
117
- // << "] -> " << last_sibl << std::endl;
118
80
 
119
81
  return static_cast<int>(last_sibl);
120
82
  }
121
83
 
122
- void Add_vec(std::vector<int>& items_lim, std::vector<unsigned int>& ancest, unsigned int last_arc, int itmset) {
84
+ void Add_vec(std::vector<int>& items_lim,
85
+ std::vector<unsigned int>& ancest,
86
+ unsigned int last_arc,
87
+ int itmset)
88
+ {
123
89
  items_lim.shrink_to_fit();
124
- // std::cerr << "[HTMiner::Add_vec] called with items_lim.size()=" << items_lim.size()
125
- // << " last_arc=" << last_arc << " itmset=" << itmset << std::endl;
126
90
 
127
91
  std::vector<bool> counted(L, false);
128
92
 
129
93
  if (Tree[last_arc].itmset > 0) {
130
94
  ancest.push_back(0);
131
95
  ancest.shrink_to_fit();
132
- // std::cerr << "[HTMiner::Add_vec] Tree[" << last_arc << "].itmset > 0; pushing 0 to ancest" << std::endl;
133
96
 
134
97
  for (size_t i = 0; i < items_lim.size(); ++i) {
135
98
  int cur_itm = std::abs(items_lim[i]);
99
+
100
+ ++E; // ✅ count this limited-entry too
101
+
136
102
  if (ancest[cur_itm - 1] == 0 && !counted[cur_itm - 1]) {
137
103
  if (i + 1 < static_cast<int>(items_lim.size())) {
138
104
  VDFS[cur_itm - 1].str_pnt.push_back(-static_cast<int>(i) - 1);
139
105
  VDFS[cur_itm - 1].seq_ID.push_back(static_cast<unsigned int>(CTree.size()));
140
- // std::cerr << "[HTMiner::Add_vec] appended negative str_pnt to VDFS["
141
- // << (cur_itm - 1) << "] -> " << (-static_cast<int>(i) - 1) << std::endl;
142
106
  }
143
107
  ++DFS[cur_itm - 1].freq;
144
108
  counted[cur_itm - 1] = true;
145
- // std::cerr << "[HTMiner::Add_vec] incremented DFS[" << (cur_itm - 1)
146
- // << "].freq -> " << DFS[cur_itm - 1].freq << std::endl;
147
109
  }
148
110
  }
149
111
 
150
112
  CTree.emplace_back(ancest, items_lim);
151
- //std::cerr << "[HTMiner::Add_vec] added new CTree node; CTree.size()=" << CTree.size() << std::endl;
152
-
153
- Tree[last_arc].chld = static_cast<unsigned int>(CTree.size() - 1);
113
+ Tree[last_arc].chld = static_cast<unsigned int>(CTree.size() - 1);
154
114
  Tree[last_arc].itmset = -itmset;
155
- // std::cerr << "[HTMiner::Add_vec] updated Tree[" << last_arc
156
- // << "].chld=" << Tree[last_arc].chld
157
- // << " Tree[" << last_arc << "].itmset=" << Tree[last_arc].itmset << std::endl;
158
- //
159
- }
115
+ }
160
116
  else {
161
117
  std::vector<unsigned int>& ancest_ct = CTree[Tree[last_arc].chld].ancest;
162
- // std::cerr << "[HTMiner::Add_vec] Tree[" << last_arc << "].itmset <= 0; using existing CTree node "
163
- // << Tree[last_arc].chld << std::endl;
164
118
 
165
119
  for (size_t i = 0; i < items_lim.size(); ++i) {
166
120
  int cur_itm = std::abs(items_lim[i]);
121
+
122
+ ++E; // ✅ also count in this branch
123
+
167
124
  if (!counted[cur_itm - 1] && ancest_ct[cur_itm - 1] == 0) {
168
125
  if (i + 1 < static_cast<int>(items_lim.size())) {
169
126
  VDFS[cur_itm - 1].str_pnt.push_back(static_cast<unsigned int>(i) + 1);
170
127
  VDFS[cur_itm - 1].seq_ID.push_back(static_cast<unsigned int>(VTree.size()));
171
- // std::cerr << "[HTMiner::Add_vec] appended positive str_pnt to VDFS["
172
- // << (cur_itm - 1) << "] -> " << (static_cast<unsigned int>(i) + 1) << std::endl;
173
128
  }
174
129
  ++DFS[cur_itm - 1].freq;
175
130
  counted[cur_itm - 1] = true;
176
- // std::cerr << "[HTMiner::Add_vec] incremented DFS[" << (cur_itm - 1)
177
- // << "].freq -> " << DFS[cur_itm - 1].freq << std::endl;
178
131
  }
179
132
  }
180
133
 
181
134
  VTree.emplace_back(items_lim, ancest_ct.back());
182
- // std::cerr << "[HTMiner::Add_vec] added new VTree node; VTree.size()=" << VTree.size() << std::endl;
183
-
184
135
  CTree[Tree[last_arc].chld].ancest.back() = static_cast<unsigned int>(VTree.size());
185
- // std::cerr << "[HTMiner::Add_vec] updated CTree[" << Tree[last_arc].chld
186
- // << "].ancest.back()=" << CTree[Tree[last_arc].chld].ancest.back() << std::endl;
187
136
  }
188
-
189
- //std::cerr << "[HTMiner::Add_vec] exiting" << std::endl;
190
137
  }
191
138
 
192
- } // namespace htminer
139
+ } // namespace htminer
@@ -1,137 +1,96 @@
1
- // File: effspm/largebm/src/load_inst.cpp
2
-
3
- #include <vector>
4
- #include <iostream>
5
1
  #include <unordered_map>
6
- #include "load_inst.hpp"
2
+ #include <cstdlib>
7
3
  #include "build_mdd.hpp"
8
4
  #include "freq_miner.hpp"
9
- #include "utility.hpp"
5
+ #include "load_inst.hpp"
10
6
 
11
7
  namespace largebm {
12
8
 
13
- // Forward declaration for Add_arc
14
- int Add_arc(int item, unsigned long long int last_arc, int& itmset,
15
- std::unordered_map<int, unsigned long long int>& ancest_map);
16
-
17
- // Global MDD tree and other globals (declared in headers)
18
- std::vector<Arc> Tree;
9
+ std::vector<Arc> Tree;
19
10
 
20
- void Build_MDD(std::vector<int>& items) {
21
- std::unordered_map<int, unsigned long long int> ancest_map;
22
- unsigned long long int last_arc = 0;
23
- int itmset = 0;
11
+ static int Add_arc(int item,
12
+ unsigned long long last_arc,
13
+ int& itmset,
14
+ std::unordered_map<int, unsigned long long>& ancest_map);
24
15
 
25
- for (auto it = items.begin(); it != items.end(); ++it) {
26
- last_arc = Add_arc(*it, last_arc, itmset, ancest_map);
27
- }
16
+ void Build_MDD(const std::vector<int>& items) {
17
+ std::unordered_map<int, unsigned long long> ancest_map;
18
+ unsigned long long last_arc = 0;
19
+ int itmset = 0;
20
+ for (int v : items) {
21
+ last_arc = Add_arc(v, last_arc, itmset, ancest_map);
28
22
  }
23
+ }
29
24
 
25
+ static int Add_arc(int item,
26
+ unsigned long long last_arc,
27
+ int& itmset,
28
+ std::unordered_map<int, unsigned long long>& ancest_map) {
29
+ ++E;
30
30
 
31
- int Add_arc(int item, unsigned long long int last_arc, int& itmset,
32
- std::unordered_map<int, unsigned long long int>& ancest_map) {
33
-
34
- unsigned idx = std::abs(item) - 1;
31
+ unsigned idx = static_cast<unsigned>(std::abs(item) - 1);
35
32
 
36
- // ─── DEBUG ────────────────────────────────────────────────
37
- // std::cout << "[Add_arc] item=" << item
38
- // << " idx=" << idx
39
- // << " last_arc=" << last_arc
40
- // << " Tree.size=" << Tree.size()
41
- // << " DFS.size=" << DFS.size()
42
- // << std::endl;
43
-
44
- // Ensure DFS can hold this index
45
- if (idx >= DFS.size()) {
46
- // std::cout << "[Add_arc] • resizing DFS to " << (idx + 1) << std::endl;
47
- DFS.reserve(idx + 1);
48
- while (DFS.size() <= idx) {
49
- DFS.emplace_back(-static_cast<int>(DFS.size()) - 1); // Pattern(-id)
50
- }
33
+ if (idx >= DFS.size()) {
34
+ DFS.reserve(idx + 1);
35
+ while (DFS.size() <= idx) {
36
+ DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
51
37
  }
38
+ }
52
39
 
53
- unsigned long long int anct;
54
- auto p = ancest_map.find(std::abs(item));
55
- if (p == ancest_map.end()) {
56
- anct = 0;
57
- } else {
58
- anct = p->second;
59
- }
40
+ unsigned long long anct = 0;
41
+ {
42
+ std::unordered_map<int, unsigned long long>::const_iterator p =
43
+ ancest_map.find(std::abs(item));
44
+ if (p != ancest_map.end()) anct = p->second;
45
+ }
60
46
 
61
- if (item < 0) {
62
- ++itmset;
63
- }
47
+ if (item < 0) {
48
+ ++itmset;
49
+ }
64
50
 
65
- // Before accessing Tree[last_arc].chld, check bounds
66
- if (last_arc >= Tree.size()) {
67
- // std::cout << "[Add_arc] !!! last_arc OOB last_arc="
68
- // << last_arc << " Tree.size=" << Tree.size()
69
- // << std::endl;
70
- // We still proceed so we can see crash context:
71
- }
51
+ unsigned long long last_sibl = 0;
52
+ if (last_arc < Tree.size()) {
53
+ last_sibl = Tree[last_arc].chld;
54
+ }
72
55
 
73
- unsigned long long int last_sibl = 0;
56
+ if (last_sibl == 0) {
57
+ Tree.emplace_back(item, itmset, anct);
58
+ last_sibl = Tree.size() - 1;
74
59
  if (last_arc < Tree.size()) {
75
- last_sibl = Tree[last_arc].chld;
60
+ Tree[last_arc].chld = last_sibl;
76
61
  }
77
-
78
- if (last_sibl == 0) {
79
- // Insert new node as first child
80
- Tree.emplace_back(item, itmset, anct);
81
- last_sibl = Tree.size() - 1;
82
-
83
- if (last_arc < Tree.size()) {
84
- Tree[last_arc].chld = last_sibl;
85
- }
86
- if (anct == 0) {
87
- // Debug before DFS access
88
- // std::cout << "[Add_arc] • DFS access at index=" << (std::abs(item) - 1)
89
- // << " DFS.size=" << DFS.size() << std::endl;
90
- DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
62
+ if (anct == 0) {
63
+ DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
64
+ }
65
+ } else {
66
+ while (true) {
67
+ if (last_sibl >= Tree.size()) break;
68
+ if (Tree[last_sibl].item == item) {
69
+ break;
91
70
  }
92
-
93
- } else {
94
-
95
- // Walk siblings until find matching item or end
96
- while (true) {
97
- if (last_sibl >= Tree.size()) {
98
- // std::cout << "[Add_arc] !!! last_sibl OOB last_sibl="
99
- // << last_sibl << " Tree.size=" << Tree.size()
100
- // << std::endl;
101
- break;
102
- }
103
- if (Tree[last_sibl].item == item) {
104
- break;
105
- }
106
- if (Tree[last_sibl].sibl == 0) {
107
- Tree.emplace_back(item, itmset, anct);
108
- Tree[last_sibl].sibl = Tree.size() - 1;
109
- last_sibl = Tree.size() - 1;
110
- if (anct == 0) {
111
- // std::cout << "[Add_arc] • DFS access at index=" << (std::abs(item) - 1)
112
- // << " DFS.size=" << DFS.size() << std::endl;
113
- DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
114
- }
115
- break;
71
+ if (Tree[last_sibl].sibl == 0) {
72
+ Tree.emplace_back(item, itmset, anct);
73
+ Tree[last_sibl].sibl = Tree.size() - 1;
74
+ last_sibl = Tree.size() - 1;
75
+ if (anct == 0) {
76
+ DFS[std::abs(item) - 1].str_pnt.push_back(last_sibl);
116
77
  }
117
- last_sibl = Tree[last_sibl].sibl;
78
+ break;
118
79
  }
80
+ last_sibl = Tree[last_sibl].sibl;
119
81
  }
82
+ }
120
83
 
121
- if (anct == 0) {
122
- // std::cout << "[Add_arc] • increment DFS.freq at index=" << (std::abs(item) - 1)
123
- // << " DFS.size=" << DFS.size() << std::endl;
124
- DFS[std::abs(item) - 1].freq++;
125
- }
126
-
127
- if (last_sibl < Tree.size()) {
128
- // std::cout << "[Add_arc] • increment Tree.freq at node=" << last_sibl
129
- // << " Tree.size=" << Tree.size() << std::endl;
130
- Tree[last_sibl].freq++;
131
- }
84
+ if (anct == 0) {
85
+ DFS[std::abs(item) - 1].freq++;
86
+ }
132
87
 
133
- ancest_map[std::abs(item)] = last_sibl;
134
- return last_sibl;
88
+ if (last_sibl < Tree.size()) {
89
+ Tree[last_sibl].freq++;
135
90
  }
136
91
 
137
- } // namespace largebm
92
+ ancest_map[std::abs(item)] = last_sibl;
93
+ return static_cast<int>(last_sibl);
94
+ }
95
+
96
+ } // namespace largebm
@@ -1,47 +1,32 @@
1
1
  #pragma once
2
-
3
- #include<vector>
4
- #include <cmath>
5
- #include "load_inst.hpp"
2
+ #include <vector>
3
+ #include <cstdint>
6
4
 
7
5
  namespace largebm {
8
- void Build_MDD(std::vector<int>& items);
9
6
 
10
7
  class Arc {
11
8
  public:
9
+ unsigned long long chld = 0;
10
+ unsigned long long sibl = 0;
11
+ unsigned long long freq = 0;
12
+ unsigned long long anct = 0;
13
+ int itmset = 0;
14
+ int item = 0;
15
+
16
+ Arc() = default;
17
+ Arc(int _itm, int _itmset, unsigned long long _anc)
18
+ : chld(0), sibl(0), freq(0), anct(_anc), itmset(_itmset), item(_itm) {}
19
+ Arc(int _itm, unsigned long long _anc)
20
+ : chld(0), sibl(0), freq(0), anct(_anc), item(_itm) {}
21
+ };
12
22
 
13
- unsigned long long int chld;
14
- unsigned long long int sibl;
15
- unsigned long long int freq;
16
- unsigned long long int anct;
17
- int itmset;
18
- int item;
19
-
20
- Arc(int _itm, int _itmset, unsigned long long int _anc) {
21
- itmset = _itmset;
22
- anct = _anc;
23
- item = _itm;
24
- freq = 0;
25
- chld = 0;
26
- sibl = 0;
27
- }
28
-
29
- Arc(int _itm, int _anc) {
30
- item = _itm;
31
- anct = _anc;
32
- freq = 0;
33
- chld = 0;
34
- sibl = 0;
35
- }
36
-
37
- Arc() {
38
- freq = 0;
39
- chld = 0;
40
- sibl = 0;
41
- }
23
+ // Single global MDD, defined in build_mdd.cpp
24
+ extern std::vector<Arc> Tree;
42
25
 
26
+ // [2025-10-25 NEW]: const-correct signature used everywhere
27
+ void Build_MDD(const std::vector<int>& items);
43
28
 
44
- };
29
+ // [2025-10-25 NEW]: debug helper to read how many anct==0 ticks we did
30
+ unsigned long long _effspm_dbg_anct0_ticks();
45
31
 
46
- extern std::vector<Arc> Tree;
47
- }
32
+ } // namespace largebm