effspm 0.2.7__cp311-cp311-macosx_11_0_arm64.whl → 0.3.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
+ // ─── effspm/largehm/src/load_inst.cpp ────────────────────────────────────────
2
+
1
3
  #include <iostream>
2
4
  #include <sstream>
3
5
  #include <algorithm>
@@ -16,17 +18,17 @@ using namespace std;
16
18
  string out_file;
17
19
  string folder;
18
20
 
19
- bool b_disp = false;
20
- bool b_write = false;
21
- bool use_dic = false;
22
- bool use_list = false;
23
- bool just_build = false;
24
- bool pre_pro = false;
21
+ bool b_disp = false;
22
+ bool b_write = false;
23
+ bool use_dic = false;
24
+ bool use_list = false;
25
+ bool just_build = false;
26
+ bool pre_pro = false;
25
27
  bool itmset_exists = false;
26
28
 
27
- unsigned int M = 0;
28
- unsigned int L = 0;
29
- unsigned int mlim = 0;
29
+ unsigned int M = 0;
30
+ unsigned int L = 0;
31
+ unsigned int mlim = 0;
30
32
  unsigned int time_limit = 0;
31
33
 
32
34
  unsigned long long int N = 0;
@@ -37,13 +39,16 @@ clock_t start_time = 0;
37
39
 
38
40
  vector<vector<int>> items;
39
41
 
40
- vector<int> item_dic;
42
+ vector<int> item_dic;
41
43
  vector<Pattern> DFS;
42
44
  vector<VPattern> VDFS;
43
45
 
44
46
 
47
+ // ─────────────────────────────────────────────────────────────────────────────
48
+ // Load_instance
49
+ // ─────────────────────────────────────────────────────────────────────────────
45
50
  bool Load_instance(string& items_file, double thresh) {
46
- // ─── 1) CLEAR ANY leftover state from a previous run ───
51
+ // 1) CLEAR leftover state
47
52
  Tree.clear();
48
53
  VTree.clear();
49
54
  CTree.clear();
@@ -58,62 +63,68 @@ bool Load_instance(string& items_file, double thresh) {
58
63
  E = 0;
59
64
  theta = 0;
60
65
  itmset_exists = false;
61
- // ────────────────────────────────────────────────────
62
66
 
63
67
  clock_t kk = clock();
64
68
 
65
- // Insert fresh dummy root node:
66
-
67
-
68
-
69
-
69
+ // root
70
70
  Tree.emplace_back(0, 0, 0);
71
71
 
72
72
  if (!pre_pro) {
73
73
  if (!Load_items(items_file))
74
74
  return false;
75
+
75
76
  DFS.reserve(L);
76
- while (DFS.size() < L) {
77
+ while (DFS.size() < L)
77
78
  DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
78
- }
79
+
79
80
  VDFS.reserve(L);
80
- while (VDFS.size() < L) {
81
+ while (VDFS.size() < L)
81
82
  VDFS.emplace_back(static_cast<int>(VDFS.size()));
82
- }
83
- if (thresh < 1.0) {
83
+
84
+ if (thresh < 1.0)
84
85
  theta = static_cast<unsigned long long>(ceil(thresh * N));
85
- } else {
86
+ else
86
87
  theta = static_cast<unsigned long long>(thresh);
87
- }
88
+
89
+ start_time = clock();
88
90
  }
89
91
  else {
90
92
  if (!Load_items(items_file))
91
93
  return false;
92
- if (thresh < 1.0) {
94
+
95
+ if (thresh < 1.0)
93
96
  theta = static_cast<unsigned long long>(ceil(thresh * N));
94
- } else {
97
+ else
95
98
  theta = static_cast<unsigned long long>(thresh);
96
- }
99
+
100
+ start_time = clock();
97
101
  }
98
102
 
99
- cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
100
- cout << "Found " << N << " sequence, with max line len " << M
101
- << ", and " << L << " items, and " << E << " enteries\n";
102
- // cout << "Total Trie nodes: " << Tree.size()
103
- // << " Total CTree nodes: " << CTree.size()
104
- // << " Total VTree nodes: " << VTree.size() << endl;
103
+ // 👇 only print when verbose/b_disp
104
+ if (b_disp) {
105
+ cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
106
+ cout << "Found " << N << " sequence, with max line len " << M
107
+ << ", and " << L << " items, and " << E << " enteries\n";
108
+ // cout << "Total Trie nodes: " << Tree.size()
109
+ // << " Total CTree nodes: " << CTree.size()
110
+ // << " Total VTree nodes: " << VTree.size() << endl;
111
+ }
105
112
 
106
113
  return true;
107
114
  }
108
115
 
109
116
 
117
+ // ─────────────────────────────────────────────────────────────────────────────
118
+ // Preprocess
119
+ // ─────────────────────────────────────────────────────────────────────────────
110
120
  bool Preprocess(string &inst, double thresh) {
111
121
  vector<unsigned long long int> MN(100, 0);
112
122
  vector<vector<bool>> ML(100, vector<bool>(1000000, false));
113
123
 
114
124
  ifstream file(inst);
115
125
  if (!file.good()) {
116
- cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
126
+ if (b_disp)
127
+ cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
117
128
  return false;
118
129
  }
119
130
 
@@ -124,7 +135,7 @@ bool Preprocess(string &inst, double thresh) {
124
135
  int ditem;
125
136
  while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
126
137
  ++N;
127
- if (N % 10000000 == 0)
138
+ if (b_disp && N % 10000000 == 0)
128
139
  cout << "N: " << N << endl;
129
140
 
130
141
  istringstream word(line);
@@ -164,11 +175,10 @@ bool Preprocess(string &inst, double thresh) {
164
175
  M = size_m;
165
176
  }
166
177
 
167
- if (thresh < 1.0) {
178
+ if (thresh < 1.0)
168
179
  theta = static_cast<unsigned long long>(ceil(thresh * N));
169
- } else {
180
+ else
170
181
  theta = static_cast<unsigned long long>(thresh);
171
- }
172
182
 
173
183
  int real_L = 0;
174
184
  item_dic.assign(L, -1);
@@ -176,11 +186,12 @@ bool Preprocess(string &inst, double thresh) {
176
186
  for (int i = 0; i < (int)L; ++i) {
177
187
  if (freq[i] >= theta) {
178
188
  item_dic[i] = ++real_L;
179
- item_in[i] = true;
189
+ item_in[i] = true;
180
190
  }
181
191
  }
182
192
 
183
- cout << "Original number of items: " << L << " Reduced to: " << real_L << endl;
193
+ if (b_disp)
194
+ cout << "Original number of items: " << L << " Reduced to: " << real_L << endl;
184
195
 
185
196
  unsigned long long int LpM = 1;
186
197
  mlim = M;
@@ -195,12 +206,17 @@ bool Preprocess(string &inst, double thresh) {
195
206
  ++ml;
196
207
  }
197
208
  LpM *= ml * (1 + itmset_exists);
198
- cout << ml << " " << LpM << " " << MN[i] << endl;
209
+
210
+ if (b_disp)
211
+ cout << ml << " " << LpM << " " << MN[i] << endl;
212
+
199
213
  if (LpM * ulim > MN[i]) {
200
214
  orgmlim = i;
201
215
  while (i + ulim - 1 < (int)MN.size() && i + ulim - 1 < (int)M) {
202
- cout << (MN[i - 1] - MN[i + ulim - 1]) << " "
203
- << MN[i + ulim - 1] << endl;
216
+ if (b_disp)
217
+ cout << (MN[i - 1] - MN[i + ulim - 1]) << " "
218
+ << MN[i + ulim - 1] << endl;
219
+
204
220
  if ((MN[i - 1] - MN[i + ulim - 1]) < MN[i + ulim - 1]
205
221
  && MN[i + ulim - 1] < 600000000) {
206
222
  mlim = i - 1;
@@ -212,11 +228,12 @@ bool Preprocess(string &inst, double thresh) {
212
228
  }
213
229
  }
214
230
 
215
- cout << "M is: " << M << " Mlim is: " << mlim
216
- << " ulim is: " << ulim
217
- << " original mlim is: " << orgmlim
218
- << " guess is: "
219
- << round((log(N) - log(6)) / log(real_L)) << endl;
231
+ if (b_disp)
232
+ cout << "M is: " << M << " Mlim is: " << mlim
233
+ << " ulim is: " << ulim
234
+ << " original mlim is: " << orgmlim
235
+ << " guess is: "
236
+ << round((log(N) - log(6)) / log(real_L)) << endl;
220
237
 
221
238
  if (mlim < (int)M) {
222
239
  for (int i = 0; i < real_L; ++i)
@@ -230,10 +247,14 @@ bool Preprocess(string &inst, double thresh) {
230
247
  }
231
248
 
232
249
 
250
+ // ─────────────────────────────────────────────────────────────────────────────
251
+ // Load_items_pre
252
+ // ─────────────────────────────────────────────────────────────────────────────
233
253
  bool Load_items_pre(string &inst_name) {
234
254
  ifstream file(inst_name);
235
255
  if (!file.good()) {
236
- cout << "!!!!!! No such file exists: " << inst_name << " !!!!!!\n";
256
+ if (b_disp)
257
+ cout << "!!!!!! No such file exists: " << inst_name << " !!!!!!\n";
237
258
  return false;
238
259
  }
239
260
 
@@ -246,7 +267,6 @@ bool Load_items_pre(string &inst_name) {
246
267
  vector<int> temp_lim;
247
268
  bool sgn = false;
248
269
 
249
- // L is final from Preprocess
250
270
  while (word >> itm) {
251
271
  ditem = stoi(itm);
252
272
  if (item_dic[std::abs(ditem) - 1] == -1) {
@@ -274,18 +294,16 @@ bool Load_items_pre(string &inst_name) {
274
294
  continue;
275
295
 
276
296
  ++N;
277
- if (N % 10000000 == 0)
297
+ if (b_disp && N % 10000000 == 0)
278
298
  cout << N << endl;
279
299
 
280
300
  if (temp_vec.size() + temp_lim.size() > (size_t)M)
281
301
  M = static_cast<unsigned int>(temp_vec.size() + temp_lim.size());
282
302
 
283
- // ─── Ensure DFS/VDFS size before Build_MDD ───
284
303
  while (DFS.size() < L)
285
304
  DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
286
305
  while (VDFS.size() < L)
287
306
  VDFS.emplace_back(static_cast<int>(VDFS.size()));
288
- // ──────────────────────────────────────────────
289
307
 
290
308
  Build_MDD(temp_vec, temp_lim);
291
309
  }
@@ -294,12 +312,14 @@ bool Load_items_pre(string &inst_name) {
294
312
  }
295
313
 
296
314
 
315
+ // ─────────────────────────────────────────────────────────────────────────────
316
+ // Load_items (no preprocess)
317
+ // ─────────────────────────────────────────────────────────────────────────────
297
318
  bool Load_items(string &inst_name) {
298
- // std::cerr << "[SANITY] In Load_items: inst_name='"
299
- // << inst_name << "'" << std::endl;
300
319
  ifstream file(inst_name);
301
320
  if (!file.good()) {
302
- cout << "!!!!!! No such file exists: " << inst_name << " !!!!!!\n";
321
+ if (b_disp)
322
+ cout << "!!!!!! No such file exists: " << inst_name << " !!!!!!\n";
303
323
  return false;
304
324
  }
305
325
 
@@ -307,7 +327,7 @@ bool Load_items(string &inst_name) {
307
327
  int ditem;
308
328
  while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
309
329
  ++N;
310
- if (N % 1000000 == 0)
330
+ if (b_disp && N % 1000000 == 0)
311
331
  cout << "Found " << N << " sequence, with max line len "
312
332
  << M << ", and " << L << " items, and " << E
313
333
  << " enteries\n";
@@ -325,7 +345,7 @@ bool Load_items(string &inst_name) {
325
345
 
326
346
  if (L < static_cast<unsigned int>(std::abs(ditem))) {
327
347
  L = static_cast<unsigned int>(std::abs(ditem));
328
- // Immediately grow DFS/VDFS to handle new L
348
+
329
349
  while (DFS.size() < L)
330
350
  DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
331
351
  while (VDFS.size() < L)
@@ -337,16 +357,14 @@ bool Load_items(string &inst_name) {
337
357
  else
338
358
  temp_lim.push_back(ditem);
339
359
  }
340
- E += static_cast<unsigned long long>(temp_vec.size() + temp_lim.size());
360
+ E += static_cast<unsigned long long>(temp_vec.size() + temp_lim.size());
341
361
  if (temp_vec.size() + temp_lim.size() > (size_t)M)
342
362
  M = static_cast<unsigned int>(temp_vec.size() + temp_lim.size());
343
363
 
344
- // ─── Ensure DFS/VDFS size before Build_MDD ───
345
364
  while (DFS.size() < L)
346
365
  DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
347
366
  while (VDFS.size() < L)
348
367
  VDFS.emplace_back(static_cast<int>(VDFS.size()));
349
- // ──────────────────────────────────────────────
350
368
 
351
369
  Build_MDD(temp_vec, temp_lim);
352
370
  }