effspm 0.2.7__cp312-cp312-win_amd64.whl → 0.3.3__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. effspm/_effspm.cp312-win_amd64.pyd +0 -0
  2. effspm/_effspm.cpp +961 -210
  3. effspm/btminer/src/build_mdd.cpp +42 -17
  4. effspm/btminer/src/build_mdd.hpp +13 -19
  5. effspm/btminer/src/freq_miner.cpp +134 -49
  6. effspm/btminer/src/freq_miner.hpp +16 -0
  7. effspm/btminer/src/load_inst.cpp +211 -126
  8. effspm/btminer/src/load_inst.hpp +22 -4
  9. effspm/btminer/src/main.cpp +83 -0
  10. effspm/btminer/src/utility.cpp +26 -41
  11. effspm/btminer/src/utility.hpp +6 -30
  12. effspm/freq_miner.hpp +2 -1
  13. effspm/htminer/src/build_mdd.cpp +46 -124
  14. effspm/htminer/src/build_mdd.hpp +56 -49
  15. effspm/htminer/src/freq_miner.cpp +341 -307
  16. effspm/htminer/src/freq_miner.hpp +39 -40
  17. effspm/htminer/src/load_inst.cpp +287 -336
  18. effspm/htminer/src/load_inst.hpp +23 -6
  19. effspm/htminer/src/main.cpp +97 -0
  20. effspm/htminer/src/utility.cpp +38 -57
  21. effspm/htminer/src/utility.hpp +9 -64
  22. effspm/largebm/src/build_mdd.cpp +69 -110
  23. effspm/largebm/src/build_mdd.hpp +22 -37
  24. effspm/largebm/src/freq_miner.cpp +241 -291
  25. effspm/largebm/src/freq_miner.hpp +25 -36
  26. effspm/largebm/src/load_inst.cpp +20 -26
  27. effspm/largebm/src/load_inst.hpp +24 -34
  28. effspm/largebm/src/main.cpp +95 -0
  29. effspm/largebm/src/utility.cpp +11 -21
  30. effspm/largebm/src/utility.hpp +7 -10
  31. effspm/largehm/src/build_mdd.cpp +75 -110
  32. effspm/largehm/src/build_mdd.hpp +53 -73
  33. effspm/largehm/src/freq_miner.cpp +134 -191
  34. effspm/largehm/src/freq_miner.hpp +37 -60
  35. effspm/largehm/src/load_inst.cpp +137 -174
  36. effspm/largehm/src/load_inst.hpp +13 -50
  37. effspm/largehm/src/main.cpp +95 -0
  38. effspm/largehm/src/utility.cpp +46 -28
  39. effspm/largehm/src/utility.hpp +18 -16
  40. effspm/largepp/src/freq_miner.cpp +184 -156
  41. effspm/largepp/src/freq_miner.hpp +11 -36
  42. effspm/largepp/src/load_inst.cpp +32 -12
  43. effspm/largepp/src/load_inst.hpp +15 -9
  44. effspm/largepp/src/main.cpp +108 -0
  45. effspm/largepp/src/pattern.hpp +31 -0
  46. effspm/load_inst.cpp +8 -8
  47. effspm/load_inst.hpp +1 -1
  48. effspm/main.cpp +103 -0
  49. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
  50. effspm-0.3.3.dist-info/RECORD +60 -0
  51. effspm-0.2.7.dist-info/RECORD +0 -53
  52. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
  53. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
  54. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
@@ -1,131 +1,106 @@
1
1
  #include <iostream>
2
2
  #include <sstream>
3
3
  #include <algorithm>
4
- #include <fstream>
5
- #include <cmath>
6
- #include <ctime>
7
-
4
+ #include <math.h>
8
5
  #include "load_inst.hpp"
9
6
  #include "utility.hpp"
10
7
  #include "build_mdd.hpp"
11
8
  #include "freq_miner.hpp"
12
9
 
13
10
  namespace largehm {
14
- using namespace std;
15
11
 
16
- string out_file;
17
- string folder;
12
+ using namespace std;
18
13
 
19
- bool b_disp = false;
20
- bool b_write = false;
21
- bool use_dic = false;
22
- bool use_list = false;
23
- bool just_build = false;
24
- bool pre_pro = false;
25
- bool itmset_exists = false;
14
+ unsigned int M = 0, L = 0, mlim;
15
+ unsigned long long int N = 0, theta, E = 0;
26
16
 
27
- unsigned int M = 0;
28
- unsigned int L = 0;
29
- unsigned int mlim = 0;
30
- unsigned int time_limit = 0;
17
+ bool itmset_exists = 0;
31
18
 
32
- unsigned long long int N = 0;
33
- unsigned long long int theta = 0;
34
- unsigned long long int E = 0;
19
+ vector<int> item_dic;
20
+ vector<Pattern> DFS;
21
+ vector<VPattern> VDFS;
35
22
 
36
- clock_t start_time = 0;
23
+ string out_file, folder;
37
24
 
38
- vector<vector<int>> items;
25
+ bool b_disp = 0;
26
+ bool b_write = 0;
27
+ bool use_dic = 0;
28
+ bool just_build = 0;
29
+ bool pre_pro = 1;
39
30
 
40
- vector<int> item_dic;
41
- vector<Pattern> DFS;
42
- vector<VPattern> VDFS;
31
+ unsigned int time_limit = 10 * 3600;
32
+ clock_t start_time;
43
33
 
34
+ void Load_items_pre(string &inst_name);
35
+ bool Load_items(string &inst_name);
36
+ bool Preprocess(string& inst, double thresh);
44
37
 
45
38
  bool Load_instance(string& items_file, double thresh) {
46
- // ─── 1) CLEAR ANY leftover state from a previous run ───
47
- Tree.clear();
48
- VTree.clear();
49
- CTree.clear();
50
- DFS.clear();
51
- VDFS.clear();
52
- item_dic.clear();
53
- items.clear();
54
-
55
- N = 0;
56
- M = 0;
57
- L = 0;
58
- E = 0;
59
- theta = 0;
60
- itmset_exists = false;
61
- // ────────────────────────────────────────────────────
62
39
 
63
40
  clock_t kk = clock();
41
+ Tree.emplace_back(0, 0, 0);
64
42
 
65
- // Insert fresh dummy root node:
66
-
67
-
68
-
43
+ if (pre_pro) {
44
+ if (!Preprocess(items_file, thresh))
45
+ return 0;
69
46
 
70
- Tree.emplace_back(0, 0, 0);
47
+ // KEEP THIS: Preprocess timing
48
+ if (b_disp)
49
+ cout << "\nPreprocess done in " << give_time(clock() - kk) << " seconds\n\n";
71
50
 
72
- if (!pre_pro) {
73
- if (!Load_items(items_file))
74
- return false;
75
51
  DFS.reserve(L);
76
- while (DFS.size() < L) {
77
- DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
78
- }
79
- VDFS.reserve(L);
80
- while (VDFS.size() < L) {
81
- VDFS.emplace_back(static_cast<int>(VDFS.size()));
82
- }
83
- if (thresh < 1.0) {
84
- theta = static_cast<unsigned long long>(ceil(thresh * N));
85
- } else {
86
- theta = static_cast<unsigned long long>(thresh);
87
- }
52
+ for (int i = 0; i < (int)L; ++i)
53
+ DFS.emplace_back(-i - 1);
54
+
55
+ kk = clock();
56
+ Load_items_pre(items_file);
57
+
88
58
  }
59
+ else if (!Load_items(items_file))
60
+ return 0;
89
61
  else {
90
- if (!Load_items(items_file))
91
- return false;
92
- if (thresh < 1.0) {
93
- theta = static_cast<unsigned long long>(ceil(thresh * N));
94
- } else {
95
- theta = static_cast<unsigned long long>(thresh);
96
- }
62
+ if (thresh < 1)
63
+ theta = ceil(thresh * N);
64
+ else
65
+ theta = thresh;
97
66
  }
98
67
 
99
- cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
100
- cout << "Found " << N << " sequence, with max line len " << M
101
- << ", and " << L << " items, and " << E << " enteries\n";
68
+ // KEEP THIS: MDD build timing
69
+ if (b_disp)
70
+ cout << "\nMDD Database built in " << give_time(clock() - kk) << " seconds\n\n";
71
+
72
+ // ✅ KEEP THIS: main summary line
73
+ if (b_disp)
74
+ cout << "Found " << N << " sequence, with max line len " << M
75
+ << ", and " << L << " items, and " << E << " enteries\n";
76
+
77
+ // ❌ COMMENT OUT: extra debug
102
78
  // cout << "Total Trie nodes: " << Tree.size()
103
79
  // << " Total CTree nodes: " << CTree.size()
104
80
  // << " Total VTree nodes: " << VTree.size() << endl;
105
81
 
106
- return true;
82
+ return 1;
107
83
  }
108
84
 
109
-
110
85
  bool Preprocess(string &inst, double thresh) {
111
- vector<unsigned long long int> MN(100, 0);
112
- vector<vector<bool>> ML(100, vector<bool>(1000000, false));
113
86
 
87
+ vector<unsigned long long int> MN(100, 0);
88
+ vector<vector<bool>> ML(100, vector<bool>(1000000, 0));
114
89
  ifstream file(inst);
90
+
91
+ vector<unsigned long long int> freq(1000000);
92
+ vector<unsigned long long int> counted(1000000, 0);
93
+
115
94
  if (!file.good()) {
116
- cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
117
- return false;
95
+ // cout << "!!!!!! No such file exists: " << inst << " !!!!!!\n";
96
+ return 0;
118
97
  }
119
98
 
120
- vector<unsigned long long int> freq(1000000, 0ULL);
121
- vector<unsigned long long int> counted(1000000, 0ULL);
122
-
123
99
  string line;
124
100
  int ditem;
125
101
  while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
126
102
  ++N;
127
- if (N % 10000000 == 0)
128
- cout << "N: " << N << endl;
103
+ // if (N % 10000000 == 0) cout << "N: " << N << endl;
129
104
 
130
105
  istringstream word(line);
131
106
  string itm;
@@ -135,107 +110,114 @@ bool Preprocess(string &inst, double thresh) {
135
110
  ditem = stoi(itm);
136
111
 
137
112
  if (ditem > 0)
138
- itmset_exists = true;
113
+ itmset_exists = 1;
139
114
  else
140
- ditem = -ditem;
115
+ ditem *= -1;
141
116
 
142
117
  if (size_m < (int)MN.size()) {
143
118
  ++MN[size_m - 1];
144
- if ((int)ML[size_m - 1].size() < ditem) {
145
- ML[size_m - 1].resize(ditem, false);
119
+ if (ML[size_m - 1].size() < (size_t)ditem) {
120
+ ML[size_m - 1].reserve(ditem);
121
+ while (ML[size_m - 1].size() < (size_t)ditem)
122
+ ML[size_m - 1].push_back(0);
146
123
  }
147
- ML[size_m - 1][ditem - 1] = true;
124
+ ML[size_m - 1][ditem - 1] = 1;
148
125
  }
149
126
 
150
- if (L < static_cast<unsigned int>(ditem)) {
151
- L = static_cast<unsigned int>(ditem);
152
- }
127
+ if (L < (unsigned int)ditem)
128
+ L = ditem;
153
129
 
154
- if ((int)freq.size() < ditem) {
155
- freq.resize(ditem, 0ULL);
156
- counted.resize(ditem, 0ULL);
130
+ if (freq.size() < L) {
131
+ freq.reserve(L);
132
+ counted.reserve(L);
133
+ while (freq.size() < L) {
134
+ freq.push_back(0);
135
+ counted.push_back(0);
136
+ }
157
137
  }
138
+
158
139
  if (counted[ditem - 1] != N) {
159
140
  ++freq[ditem - 1];
160
141
  counted[ditem - 1] = N;
161
142
  }
143
+
144
+ ++E; // count entries
162
145
  }
163
146
  if (size_m > (int)M)
164
147
  M = size_m;
165
148
  }
166
149
 
167
- if (thresh < 1.0) {
168
- theta = static_cast<unsigned long long>(ceil(thresh * N));
169
- } else {
170
- theta = static_cast<unsigned long long>(thresh);
171
- }
150
+ if (thresh < 1)
151
+ theta = ceil(thresh * N);
152
+ else
153
+ theta = thresh;
172
154
 
173
155
  int real_L = 0;
174
- item_dic.assign(L, -1);
175
- vector<bool> item_in(L, false);
156
+ item_dic = vector<int>(L, -1);
157
+ vector<bool> item_in(L, 0);
176
158
  for (int i = 0; i < (int)L; ++i) {
177
159
  if (freq[i] >= theta) {
178
160
  item_dic[i] = ++real_L;
179
- item_in[i] = true;
161
+ item_in[i] = 1;
180
162
  }
181
163
  }
182
164
 
183
- cout << "Original number of items: " << L << " Reduced to: " << real_L << endl;
165
+ // COMMENTED: extra stats
166
+ // cout << "Original number of items: " << L
167
+ // << " Reduced to: " << real_L << endl;
184
168
 
185
169
  unsigned long long int LpM = 1;
186
170
  mlim = M;
187
171
  int orgmlim = 0;
188
172
  int ulim = min(1 + real_L / 4, 10);
189
173
  unsigned long long int ml;
190
-
174
+ int coef = 1 + 1 * itmset_exists;
191
175
  for (int i = 0; i + ulim < (int)MN.size() && i + ulim < (int)M; ++i) {
192
176
  ml = 0;
193
177
  for (int j = 0; j < (int)L; ++j) {
194
178
  if (ML[i][j] && item_in[j])
195
179
  ++ml;
196
180
  }
197
- LpM *= ml * (1 + itmset_exists);
198
- cout << ml << " " << LpM << " " << MN[i] << endl;
181
+ LpM *= ml * coef;
182
+ // cout << ml << " " << LpM << " " << MN[i] << endl;
199
183
  if (LpM * ulim > MN[i]) {
200
184
  orgmlim = i;
201
185
  while (i + ulim - 1 < (int)MN.size() && i + ulim - 1 < (int)M) {
202
- cout << (MN[i - 1] - MN[i + ulim - 1]) << " "
203
- << MN[i + ulim - 1] << endl;
204
- if ((MN[i - 1] - MN[i + ulim - 1]) < MN[i + ulim - 1]
205
- && MN[i + ulim - 1] < 600000000) {
186
+ // cout << MN[i - 1] - MN[i + ulim - 1]
187
+ // << " " << MN[i + ulim - 1] << endl;
188
+ if (MN[i - 1] - MN[i + ulim - 1] < MN[i + ulim - 1] &&
189
+ MN[i + ulim - 1] < 600000000) {
206
190
  mlim = i - 1;
207
191
  break;
208
192
  }
209
- ++i;
193
+ i += 1;
210
194
  }
211
195
  break;
212
196
  }
213
197
  }
214
198
 
215
- cout << "M is: " << M << " Mlim is: " << mlim
216
- << " ulim is: " << ulim
217
- << " original mlim is: " << orgmlim
218
- << " guess is: "
219
- << round((log(N) - log(6)) / log(real_L)) << endl;
199
+ // cout << "M is: " << M << " Mlim is: " << mlim
200
+ // << " ulim is: " << ulim
201
+ // << " original mlim is: " << orgmlim
202
+ // << " guess is: " << round((log(N) - log(6)) / log(real_L)) << endl;
220
203
 
221
- if (mlim < (int)M) {
204
+ if (mlim < M) {
222
205
  for (int i = 0; i < real_L; ++i)
223
206
  VDFS.emplace_back(i);
224
207
  }
225
208
 
226
- L = static_cast<unsigned int>(real_L);
209
+ L = real_L;
227
210
  N = 0;
228
211
  M = 0;
229
- return true;
212
+
213
+ return 1;
230
214
  }
231
215
 
216
+ void Load_items_pre(string &inst_name) {
232
217
 
233
- bool Load_items_pre(string &inst_name) {
234
218
  ifstream file(inst_name);
235
- if (!file.good()) {
236
- cout << "!!!!!! No such file exists: " << inst_name << " !!!!!!\n";
237
- return false;
238
- }
219
+ if (!file.good())
220
+ return;
239
221
 
240
222
  string line;
241
223
  int ditem;
@@ -244,12 +226,11 @@ bool Load_items_pre(string &inst_name) {
244
226
  string itm;
245
227
  vector<int> temp_vec;
246
228
  vector<int> temp_lim;
247
- bool sgn = false;
248
-
249
- // L is final from Preprocess
229
+ bool sgn = 0;
250
230
  while (word >> itm) {
251
231
  ditem = stoi(itm);
252
- if (item_dic[std::abs(ditem) - 1] == -1) {
232
+
233
+ if (item_dic[abs(ditem) - 1] == -1) {
253
234
  if (!sgn)
254
235
  sgn = (ditem < 0);
255
236
  continue;
@@ -259,99 +240,81 @@ bool Load_items_pre(string &inst_name) {
259
240
  else
260
241
  ditem = -item_dic[-ditem - 1];
261
242
  }
243
+
262
244
  if (sgn) {
263
245
  if (ditem > 0)
264
246
  ditem = -ditem;
265
- sgn = false;
247
+ sgn = 0;
266
248
  }
267
- if (temp_vec.size() <= (size_t)mlim)
249
+
250
+ if (temp_vec.size() <= mlim)
268
251
  temp_vec.push_back(ditem);
269
252
  else
270
253
  temp_lim.push_back(ditem);
254
+
255
+ ++E;
271
256
  }
272
257
 
273
258
  if (temp_vec.empty())
274
259
  continue;
275
260
 
276
261
  ++N;
277
- if (N % 10000000 == 0)
278
- cout << N << endl;
262
+ // if (N % 10000000 == 0) cout << N << endl;
279
263
 
280
- if (temp_vec.size() + temp_lim.size() > (size_t)M)
281
- M = static_cast<unsigned int>(temp_vec.size() + temp_lim.size());
282
-
283
- // ─── Ensure DFS/VDFS size before Build_MDD ───
284
- while (DFS.size() < L)
285
- DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
286
- while (VDFS.size() < L)
287
- VDFS.emplace_back(static_cast<int>(VDFS.size()));
288
- // ──────────────────────────────────────────────
264
+ if (temp_vec.size() + temp_lim.size() > M)
265
+ M = temp_vec.size() + temp_lim.size();
289
266
 
290
267
  Build_MDD(temp_vec, temp_lim);
291
268
  }
292
-
293
- return true;
294
269
  }
295
270
 
296
-
297
271
  bool Load_items(string &inst_name) {
298
- // std::cerr << "[SANITY] In Load_items: inst_name='"
299
- // << inst_name << "'" << std::endl;
272
+
300
273
  ifstream file(inst_name);
301
274
  if (!file.good()) {
302
- cout << "!!!!!! No such file exists: " << inst_name << " !!!!!!\n";
303
- return false;
275
+ // cout << "!!!!!! No such file exists: " << inst_name << " !!!!!!\n";
276
+ return 0;
304
277
  }
305
278
 
306
279
  string line;
307
280
  int ditem;
308
281
  while (getline(file, line) && give_time(clock() - start_time) < time_limit) {
309
282
  ++N;
310
- if (N % 1000000 == 0)
311
- cout << "Found " << N << " sequence, with max line len "
312
- << M << ", and " << L << " items, and " << E
313
- << " enteries\n";
283
+ // if (N % 1000000 == 0)
284
+ // cout << "Found " << N << " sequence, with max line len " << M
285
+ // << ", and " << L << " items, and " << E << " enteries\n";
314
286
 
315
287
  istringstream word(line);
316
288
  string itm;
317
289
  vector<int> temp_vec;
318
290
  vector<int> temp_lim;
319
-
320
291
  while (word >> itm) {
321
292
  ditem = stoi(itm);
322
-
323
293
  if (ditem > 0)
324
- itmset_exists = true;
325
-
326
- if (L < static_cast<unsigned int>(std::abs(ditem))) {
327
- L = static_cast<unsigned int>(std::abs(ditem));
328
- // Immediately grow DFS/VDFS to handle new L
329
- while (DFS.size() < L)
330
- DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
331
- while (VDFS.size() < L)
332
- VDFS.emplace_back(static_cast<int>(VDFS.size()));
294
+ itmset_exists = 1;
295
+ if (L < (unsigned int)abs(ditem)) {
296
+ L = abs(ditem);
297
+ while (DFS.size() < L) {
298
+ DFS.reserve(L);
299
+ DFS.emplace_back(-DFS.size() - 1);
300
+ }
333
301
  }
334
302
 
335
- if (temp_vec.size() < (size_t)mlim)
303
+ if (temp_vec.size() < mlim)
336
304
  temp_vec.push_back(ditem);
337
305
  else
338
306
  temp_lim.push_back(ditem);
307
+
308
+ ++E;
339
309
  }
340
- E += static_cast<unsigned long long>(temp_vec.size() + temp_lim.size());
341
- if (temp_vec.size() + temp_lim.size() > (size_t)M)
342
- M = static_cast<unsigned int>(temp_vec.size() + temp_lim.size());
343
310
 
344
- // ─── Ensure DFS/VDFS size before Build_MDD ───
345
- while (DFS.size() < L)
346
- DFS.emplace_back(-static_cast<int>(DFS.size()) - 1);
347
- while (VDFS.size() < L)
348
- VDFS.emplace_back(static_cast<int>(VDFS.size()));
349
- // ──────────────────────────────────────────────
311
+ if (temp_vec.size() + temp_lim.size() > M)
312
+ M = temp_vec.size();
350
313
 
351
314
  Build_MDD(temp_vec, temp_lim);
352
315
  }
353
316
 
354
- return true;
317
+ return 1;
355
318
  }
356
319
 
357
320
  } // namespace largehm
@@ -1,64 +1,27 @@
1
- #ifndef LARGEHM_LOAD_INST_HPP
2
- #define LARGEHM_LOAD_INST_HPP
1
+ #pragma once
3
2
 
4
- #include <string>
5
3
  #include <vector>
4
+ #include <string>
6
5
  #include <fstream>
7
- #include <ctime> // for clock_t
8
-
9
- // We need Pattern and VPattern, so include freq_miner.hpp here:
10
- #include "freq_miner.hpp"
6
+ #include <map>
7
+ #include <unordered_set>
8
+ #include <unordered_map>
9
+ #include <time.h>
11
10
 
12
11
  namespace largehm {
13
12
 
14
- //
15
- // ─── Globals & Function Prototypes ───────────────────────────────────────────
16
- //
17
-
18
- // Output/folder:
19
- extern std::string out_file;
20
- extern std::string folder;
21
-
22
- // Flags:
23
- extern bool b_disp;
24
- extern bool b_write;
25
- extern bool use_dic;
26
- extern bool use_list;
27
- extern bool just_build;
28
- extern bool pre_pro;
29
- extern bool itmset_exists;
13
+ using namespace std;
30
14
 
31
- // Database statistics:
32
- extern unsigned int M;
33
- extern unsigned int L;
34
- extern unsigned int mlim;
35
- extern unsigned int time_limit;
15
+ bool Load_instance(string& items_file, double thresh);
36
16
 
37
- extern unsigned long long int N;
38
- extern unsigned long long int theta;
39
- extern unsigned long long int E;
17
+ extern string out_file, folder;
40
18
 
41
- // Timing:
42
- extern clock_t start_time;
43
-
44
- // In‐memory sequences (only if “in‐memory” mode):
45
- extern std::vector<std::vector<int>> items;
46
-
47
- // Preprocessing dictionary (maps original → compressed IDs):
48
- extern std::vector<int> item_dic;
19
+ extern bool b_disp, b_write, use_dic, just_build, pre_pro, itmset_exists;
49
20
 
50
- // DFS stacks used by the miner (Pattern / VPattern):
51
- extern std::vector<Pattern> DFS;
52
- extern std::vector<VPattern> VDFS;
21
+ extern unsigned int M, L, mlim, time_limit;
53
22
 
54
- // Internal loader functions:
55
- bool Load_items_pre(std::string &inst_name);
56
- bool Load_items(std::string &inst_name);
57
- bool Preprocess(std::string &inst, double thresh);
23
+ extern unsigned long long int N, theta, E;
58
24
 
59
- // Main entry‐point for loading & building the MDD:
60
- bool Load_instance(std::string &items_file, double thresh);
25
+ extern clock_t start_time;
61
26
 
62
27
  } // namespace largehm
63
-
64
- #endif // LARGEHM_LOAD_INST_HPP
@@ -0,0 +1,95 @@
1
+ #include <iostream>
2
+ #include <time.h>
3
+ #include <string.h>
4
+ #include <string>
5
+ #include "load_inst.hpp"
6
+ #include "build_mdd.hpp"
7
+ #include "utility.hpp"
8
+ #include "freq_miner.hpp"
9
+
10
+ using namespace std;
11
+
12
+ string out_file;
13
+
14
+ bool b_disp = 0, b_write = 0, use_dic = 0, just_build = 0, pre_pro = 1;
15
+
16
+ unsigned int time_limit = 10 * 3600;
17
+
18
+ clock_t start_time;
19
+
20
+ string folder;
21
+
22
+ int main(int argc, char* argv[]) {
23
+
24
+ string VV, attr;
25
+
26
+ double thresh = 0;
27
+ for (int i = 1; i<argc; i++) {
28
+ if (argv[i][0] != '-' || isdigit(argv[i][1]))
29
+ continue;
30
+ else if (strcmp(argv[i], "-thr") == 0)
31
+ thresh = stod(argv[i + 1]);
32
+ else if (strcmp(argv[i], "-file") == 0)
33
+ VV = argv[i + 1];
34
+ else if (strcmp(argv[i], "-time") == 0)
35
+ time_limit = stoi(argv[i + 1]);
36
+ else if (strcmp(argv[i], "-jbuild") == 0)
37
+ just_build = 1;
38
+ else if (strcmp(argv[i], "-folder") == 0)
39
+ folder = argv[i + 1];
40
+ else if (strcmp(argv[i], "-npre") == 0)
41
+ pre_pro = 0;
42
+ else if (strcmp(argv[i], "-dic") == 0)
43
+ use_dic = 1;
44
+ else if (strcmp(argv[i], "-out") == 0) {
45
+ if (i + 1 == argc || argv[i + 1][0] == '-')
46
+ b_disp = 1;
47
+ else if (argv[i + 1][0] == '+') {
48
+ b_disp = 1;
49
+ b_write = 1;
50
+ if (strlen(argv[i + 1]) > 1) {
51
+ out_file = argv[i + 1];
52
+ out_file = out_file.substr(1, out_file.size() - 1);
53
+ }
54
+ else
55
+ out_file = VV;
56
+ }
57
+ else {
58
+ b_write = 1;
59
+ out_file = argv[i + 1];
60
+ }
61
+ }
62
+
63
+ else
64
+ cout << "Command " << argv[i] << " not recognized and skipped.\n";
65
+ }
66
+
67
+
68
+
69
+ cout << "\n********************** " << VV << "**********************\n";
70
+
71
+ string item_file = folder + VV + ".txt";
72
+
73
+ cout << "loading instances...\n";
74
+
75
+ start_time = clock();
76
+
77
+ if (!Load_instance(item_file, thresh)) {
78
+ cout << "Files invalid, exiting.\n";
79
+ cin.get();
80
+ return 0;
81
+ }
82
+
83
+ //kk = clock();
84
+
85
+ if (!just_build && give_time(clock() - start_time) < time_limit) {
86
+ Freq_miner();
87
+ if (give_time(clock() - start_time) >= time_limit)
88
+ cout << "TIME LIMIT REACHED\n";
89
+ cout << "Mining Complete\n\nFound a total of " << num_patt << " patterns\n";
90
+ cout << "\nTotal CPU time " << give_time(clock() - start_time) << " seconds\n\n";
91
+ }
92
+
93
+
94
+ return 0;
95
+ }