effspm 0.2.8__cp39-cp39-macosx_11_0_arm64.whl → 0.3.2__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. effspm/_effspm.cpp +850 -210
  2. effspm/_effspm.cpython-39-darwin.so +0 -0
  3. effspm/btminer/src/build_mdd.cpp +42 -17
  4. effspm/btminer/src/build_mdd.hpp +13 -19
  5. effspm/btminer/src/freq_miner.cpp +134 -49
  6. effspm/btminer/src/freq_miner.hpp +16 -0
  7. effspm/btminer/src/load_inst.cpp +202 -126
  8. effspm/btminer/src/load_inst.hpp +22 -4
  9. effspm/btminer/src/main.cpp +83 -0
  10. effspm/btminer/src/utility.cpp +26 -41
  11. effspm/btminer/src/utility.hpp +6 -30
  12. effspm/freq_miner.hpp +2 -1
  13. effspm/htminer/src/build_mdd.cpp +46 -124
  14. effspm/htminer/src/build_mdd.hpp +56 -49
  15. effspm/htminer/src/freq_miner.cpp +341 -307
  16. effspm/htminer/src/freq_miner.hpp +39 -40
  17. effspm/htminer/src/load_inst.cpp +287 -336
  18. effspm/htminer/src/load_inst.hpp +23 -6
  19. effspm/htminer/src/main.cpp +97 -0
  20. effspm/htminer/src/utility.cpp +38 -57
  21. effspm/htminer/src/utility.hpp +9 -64
  22. effspm/largebm/src/build_mdd.cpp +69 -110
  23. effspm/largebm/src/build_mdd.hpp +22 -37
  24. effspm/largebm/src/freq_miner.cpp +241 -291
  25. effspm/largebm/src/freq_miner.hpp +25 -36
  26. effspm/largebm/src/load_inst.cpp +20 -26
  27. effspm/largebm/src/load_inst.hpp +24 -34
  28. effspm/largebm/src/main.cpp +95 -0
  29. effspm/largebm/src/utility.cpp +11 -21
  30. effspm/largebm/src/utility.hpp +7 -10
  31. effspm/largehm/src/build_mdd.cpp +75 -110
  32. effspm/largehm/src/build_mdd.hpp +53 -73
  33. effspm/largehm/src/freq_miner.cpp +134 -191
  34. effspm/largehm/src/freq_miner.hpp +37 -60
  35. effspm/largehm/src/load_inst.cpp +137 -174
  36. effspm/largehm/src/load_inst.hpp +13 -50
  37. effspm/largehm/src/main.cpp +95 -0
  38. effspm/largehm/src/utility.cpp +46 -28
  39. effspm/largehm/src/utility.hpp +18 -16
  40. effspm/largepp/src/freq_miner.cpp +184 -156
  41. effspm/largepp/src/freq_miner.hpp +11 -36
  42. effspm/largepp/src/load_inst.cpp +32 -12
  43. effspm/largepp/src/load_inst.hpp +15 -9
  44. effspm/largepp/src/main.cpp +108 -0
  45. effspm/largepp/src/pattern.hpp +31 -0
  46. effspm/load_inst.cpp +8 -8
  47. effspm/load_inst.hpp +1 -1
  48. effspm/main.cpp +103 -0
  49. {effspm-0.2.8.dist-info → effspm-0.3.2.dist-info}/METADATA +1 -1
  50. effspm-0.3.2.dist-info/RECORD +60 -0
  51. effspm-0.2.8.dist-info/RECORD +0 -53
  52. {effspm-0.2.8.dist-info → effspm-0.3.2.dist-info}/WHEEL +0 -0
  53. {effspm-0.2.8.dist-info → effspm-0.3.2.dist-info}/licenses/LICENSE +0 -0
  54. {effspm-0.2.8.dist-info → effspm-0.3.2.dist-info}/top_level.txt +0 -0
effspm/_effspm.cpp CHANGED
@@ -2,49 +2,97 @@
2
2
 
3
3
  #include <pybind11/pybind11.h>
4
4
  #include <pybind11/stl.h>
5
- namespace py = pybind11;
5
+
6
6
  #include <iostream>
7
+ #include <fstream>
8
+ #include <cstdio> // std::remove
9
+ #include <vector>
10
+ #include <string>
11
+ #include <ctime>
12
+ #include <cmath>
7
13
 
14
+ namespace py = pybind11;
8
15
 
9
- // PrefixProjection headers
16
+ // PrefixProjection headers (global namespace)
10
17
  #include "freq_miner.hpp"
11
18
  #include "load_inst.hpp"
12
19
  #include "utility.hpp"
13
20
 
14
- // BTMiner (wrapped in its own namespace in source files)
21
+ // BTMiner (namespaced)
15
22
  #include "btminer/src/freq_miner.hpp"
16
23
  #include "btminer/src/load_inst.hpp"
17
24
  #include "btminer/src/utility.hpp"
18
25
  #include "btminer/src/build_mdd.hpp"
19
26
 
20
- // HTMiner (wrapped in its own namespace in source files)
21
- #include "htminer/src/build_mdd.hpp" // ← ensure HTMiner MDD builder is available
27
+ // HTMiner (namespaced)
28
+ #include "htminer/src/build_mdd.hpp"
22
29
  #include "htminer/src/freq_miner.hpp"
23
30
  #include "htminer/src/load_inst.hpp"
24
31
  #include "htminer/src/utility.hpp"
25
32
 
26
-
33
+ // LargePrefixProjection
27
34
  #include "largepp/src/freq_miner.hpp"
28
35
  #include "largepp/src/load_inst.hpp"
29
36
  #include "largepp/src/utility.hpp"
30
37
 
38
+ // LargeBTMiner
31
39
  #include "largebm/src/freq_miner.hpp"
32
40
  #include "largebm/src/load_inst.hpp"
33
41
  #include "largebm/src/utility.hpp"
34
42
  #include "largebm/src/build_mdd.hpp"
35
43
 
44
+ // LargeHTMiner
36
45
  #include "largehm/src/freq_miner.hpp"
37
46
  #include "largehm/src/load_inst.hpp"
38
47
  #include "largehm/src/utility.hpp"
39
48
  #include "largehm/src/build_mdd.hpp"
40
49
 
50
+ namespace {
51
+
52
+ // RAII helper for temp file
53
+ struct TempFile {
54
+ std::string path;
55
+ ~TempFile() {
56
+ if (!path.empty()) {
57
+ std::remove(path.c_str());
58
+ }
59
+ }
60
+ };
61
+
62
+ // Write Python list[list[int]] to a temp file in professor’s format:
63
+ // one sequence per line, items separated by spaces.
64
+ std::string write_temp_seq_file(const std::vector<std::vector<int>>& seqs) {
65
+ char tmp_name[L_tmpnam];
66
+ if (!std::tmpnam(tmp_name)) {
67
+ throw std::runtime_error("Failed to create temporary file name");
68
+ }
69
+ std::string path = std::string(tmp_name) + ".txt";
70
+
71
+ std::ofstream ofs(path);
72
+ if (!ofs) {
73
+ throw std::runtime_error("Failed to open temporary file for writing: " + path);
74
+ }
75
+
76
+ for (const auto& seq : seqs) {
77
+ for (size_t i = 0; i < seq.size(); ++i) {
78
+ if (i) ofs << ' ';
79
+ ofs << seq[i];
80
+ }
81
+ ofs << '\n';
82
+ }
83
+
84
+ ofs.close();
85
+ return path;
86
+ }
87
+
88
+ } // anonymous namespace
41
89
 
42
90
 
43
91
  PYBIND11_MODULE(_effspm, m) {
44
- m.doc() = "Unified SPM library: PrefixProjection, BTMiner, HTMiner";
92
+ m.doc() = "Unified SPM library: PrefixProjection, BTMiner, HTMiner, Large* variants";
45
93
 
46
94
  // ─────────────────────────────────────────────────────────────
47
- // PrefixProjection
95
+ // PrefixProjection (works directly on Python lists or files)
48
96
  // ─────────────────────────────────────────────────────────────
49
97
  m.def("PrefixProjection",
50
98
  [](py::object data,
@@ -59,7 +107,7 @@ PYBIND11_MODULE(_effspm, m) {
59
107
  ::pre_pro = preproc;
60
108
  ::use_dic = use_dic;
61
109
  ::use_list = false;
62
- ::b_disp = verbose;
110
+ ::b_disp = verbose; // controls prints in original code
63
111
  ::b_write = !out_file.empty();
64
112
  ::out_file = out_file;
65
113
 
@@ -69,7 +117,7 @@ PYBIND11_MODULE(_effspm, m) {
69
117
  if (py::isinstance<py::str>(data)) {
70
118
  std::string path = data.cast<std::string>();
71
119
  if (!Load_instance(path, minsup))
72
- throw std::runtime_error("Failed to load file: " + path);
120
+ throw std::runtime_error("PrefixProjection: failed to load file: " + path);
73
121
  } else {
74
122
  auto seqs = data.cast<std::vector<std::vector<int>>>();
75
123
  items = std::move(seqs);
@@ -113,9 +161,197 @@ PYBIND11_MODULE(_effspm, m) {
113
161
  );
114
162
 
115
163
  // ─────────────────────────────────────────────────────────────
116
- // BTMiner
164
+ // BTMiner (always uses professor's Load_instance)
165
+ // ─────────────────────────────────────────────────────────────
166
+ // ─────────────────────────────────────────────────────────────
167
+ // BTMiner (always uses professor's Load_instance)
168
+ // ─────────────────────────────────────────────────────────────
169
+ m.def("BTMiner",
170
+ [](py::object data,
171
+ double minsup,
172
+ unsigned int time_limit,
173
+ bool preproc,
174
+ bool use_dic,
175
+ bool verbose,
176
+ const std::string &out_file)
177
+ {
178
+ // 1) Configure professor globals
179
+ btminer::time_limit = static_cast<int>(time_limit);
180
+ btminer::pre_pro = preproc;
181
+ btminer::use_dic = use_dic;
182
+ btminer::b_disp = verbose;
183
+ btminer::b_write = !out_file.empty();
184
+ btminer::out_file = out_file;
185
+ btminer::N_mult = 1;
186
+ btminer::M_mult = 1;
187
+ btminer::just_build = false;
188
+
189
+ // 2) HARD RESET of *known* global state for BTMiner
190
+ // (Only touch what we know exists in btminer namespace)
191
+ btminer::ClearCollected(); // clear collected patterns
192
+ btminer::Tree.clear(); // clear MDD tree
193
+ btminer::DFS.clear(); // clear DFS patterns
194
+
195
+ btminer::M = 0;
196
+ btminer::L = 0;
197
+ btminer::N = 0;
198
+ btminer::theta = 0;
199
+ btminer::E = 0;
200
+ btminer::num_patt = 0; // reset pattern counter if defined
201
+
202
+ // NOTE: we do NOT reinsert root here; btminer::Load_instance()
203
+ // is responsible for calling Tree.emplace_back(0,0,0) as needed.
204
+
205
+ btminer::start_time = std::clock();
206
+
207
+ // 3) Handle input (path or list-of-lists)
208
+ TempFile tmp;
209
+ std::string path;
210
+
211
+ if (py::isinstance<py::str>(data)) {
212
+ // File path: use directly
213
+ path = data.cast<std::string>();
214
+ } else {
215
+ // Python list → write to a temp file in professor’s format
216
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
217
+ tmp.path = write_temp_seq_file(seqs);
218
+ path = tmp.path;
219
+ }
220
+
221
+ if (verbose) {
222
+ std::cerr << "[BTMiner] path=" << path
223
+ << " minsup=" << minsup
224
+ << " preproc=" << preproc
225
+ << " use_dic=" << use_dic
226
+ << std::endl;
227
+ }
228
+
229
+ // 4) Build MDD + run miner
230
+ if (!btminer::Load_instance(path, minsup)) {
231
+ throw std::runtime_error("BTMiner: failed to load instance from: " + path);
232
+ }
233
+
234
+ btminer::Freq_miner();
235
+
236
+ // 5) Return results
237
+ py::dict out;
238
+ out["patterns"] = btminer::GetCollected();
239
+ out["num_patterns"] = btminer::num_patt;
240
+ out["time"] = btminer::give_time(std::clock() - btminer::start_time);
241
+ out["N"] = btminer::N;
242
+ out["L"] = btminer::L;
243
+ out["theta"] = btminer::theta;
244
+ return out;
245
+ },
246
+ py::arg("data"),
247
+ py::arg("minsup") = 0.01,
248
+ py::arg("time_limit") = 36000,
249
+ py::arg("preproc") = false,
250
+ py::arg("use_dic") = false,
251
+ py::arg("verbose") = false,
252
+ py::arg("out_file") = ""
253
+ );
254
+
255
+
256
+ // ─────────────────────────────────────────────────────────────
257
+ // HTMiner (works on files; we use a temp file for in-memory data)
258
+ // ─────────────────────────────────────────────────────────────
259
+ // ─────────────────────────────────────────────────────────────
260
+ // HTMiner (always uses professor's Load_instance; pre_pro forced ON)
261
+ // ─────────────────────────────────────────────────────────────
262
+ m.def("HTMiner",
263
+ [](py::object data,
264
+ double minsup,
265
+ unsigned int time_limit,
266
+ bool /*preproc*/, // Python arg is ignored internally
267
+ bool use_dic,
268
+ bool verbose,
269
+ const std::string &out_file)
270
+ {
271
+ using namespace htminer;
272
+
273
+ // ───────── Global parameter setup ─────────
274
+ htminer::time_limit = time_limit;
275
+
276
+ // IMPORTANT: always run with preprocessing ON,
277
+ // regardless of the Python `preproc` flag.
278
+ htminer::pre_pro = true;
279
+ htminer::use_dic = use_dic;
280
+ htminer::just_build = false;
281
+ htminer::b_disp = verbose;
282
+ htminer::b_write = !out_file.empty();
283
+ htminer::out_file = out_file;
284
+
285
+ // ───────── HARD RESET of HTMiner globals ─────────
286
+ htminer::ClearCollected();
287
+ htminer::Tree.clear();
288
+ htminer::VTree.clear();
289
+ htminer::CTree.clear();
290
+ htminer::DFS.clear();
291
+ htminer::VDFS.clear();
292
+ htminer::item_dic.clear();
293
+
294
+ htminer::M = 0;
295
+ htminer::N = 0;
296
+ htminer::L = 0;
297
+ htminer::E = 0;
298
+ htminer::theta = 0;
299
+ htminer::mlim = 0;
300
+ htminer::itmset_exists = false;
301
+
302
+ // NOTE: do NOT add a root arc here;
303
+ // htminer::Load_instance() already does Tree.emplace_back(0,0,0)
304
+ htminer::start_time = std::clock();
305
+
306
+ // ───────── Handle input (path or in-memory sequences) ─────────
307
+ TempFile tmp;
308
+ std::string path;
309
+
310
+ if (py::isinstance<py::str>(data)) {
311
+ // data is a file path
312
+ path = data.cast<std::string>();
313
+ } else {
314
+ // data is a list[list[int]] → write a temp file in the same text format
315
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
316
+ tmp.path = write_temp_seq_file(seqs);
317
+ path = tmp.path;
318
+ }
319
+
320
+ if (verbose) {
321
+ std::cerr << "[HTMiner] path=" << path
322
+ << " minsup=" << minsup
323
+ << " preproc(always)=true"
324
+ << " use_dic=" << use_dic
325
+ << std::endl;
326
+ }
327
+
328
+ // ───────── Build MDD via professor's loader ─────────
329
+ if (!htminer::Load_instance(path, minsup)) {
330
+ throw std::runtime_error("HTMiner: failed to load instance from: " + path);
331
+ }
332
+
333
+ // ───────── Run miner ─────────
334
+ htminer::Freq_miner();
335
+
336
+ // ───────── Return results ─────────
337
+ py::dict out;
338
+ out["patterns"] = htminer::GetCollected();
339
+ out["time"] = htminer::give_time(std::clock() - htminer::start_time);
340
+ return out;
341
+ },
342
+ py::arg("data"),
343
+ py::arg("minsup") = 0.01,
344
+ py::arg("time_limit") = 36000,
345
+ py::arg("preproc") = false, // kept for API symmetry, but IGNORED
346
+ py::arg("use_dic") = false,
347
+ py::arg("verbose") = false,
348
+ py::arg("out_file") = ""
349
+ );
350
+
351
+ // ─────────────────────────────────────────────────────────────
352
+ // LargePrefixProjection (already has its own Load_py)
117
353
  // ─────────────────────────────────────────────────────────────
118
- m.def("BTMiner",
354
+ m.def("LargePrefixProjection",
119
355
  [](py::object data,
120
356
  double minsup,
121
357
  unsigned int time_limit,
@@ -124,140 +360,317 @@ PYBIND11_MODULE(_effspm, m) {
124
360
  bool verbose,
125
361
  const std::string &out_file)
126
362
  {
127
- btminer::time_limit = time_limit;
128
- btminer::pre_pro = preproc;
129
- btminer::use_dic = use_dic;
130
- btminer::use_list = false;
131
- btminer::b_disp = verbose;
132
- btminer::b_write = !out_file.empty();
133
- btminer::out_file = out_file;
363
+ largepp::time_limit = time_limit;
364
+ largepp::pre_pro = preproc;
365
+ largepp::use_dic = use_dic;
366
+ largepp::use_list = true; // large prefix uses list-based mining
367
+ largepp::b_disp = verbose;
368
+ largepp::b_write = !out_file.empty();
369
+ largepp::out_file = out_file;
370
+ largepp::just_build = false;
371
+
372
+ largepp::ClearCollected();
373
+ largepp::start_time = std::clock();
134
374
 
135
- btminer::ClearCollected();
136
- btminer::start_time = std::clock();
375
+ if (py::isinstance<py::str>(data)) {
376
+ std::string fname = data.cast<std::string>();
377
+ largepp::Load_instance(fname, minsup);
378
+ } else {
379
+ largepp::Load_py(data, minsup);
380
+ }
381
+
382
+ largepp::Freq_miner();
383
+
384
+ py::dict out;
385
+ out["patterns"] = largepp::GetCollected();
386
+ out["time"] = largepp::give_time(std::clock() - largepp::start_time);
387
+ return out;
388
+ },
389
+ py::arg("data"),
390
+ py::arg("minsup") = 0.01,
391
+ py::arg("time_limit") = 36000,
392
+ py::arg("preproc") = false,
393
+ py::arg("use_dic") = false,
394
+ py::arg("verbose") = false,
395
+ py::arg("out_file") = ""
396
+ );
397
+
398
+ // ─────────────────────────────────────────────────────────────
399
+ // LargeBTMiner (always uses professor's largebm::Load_instance)
400
+ // ─────────────────────────────────────────────────────────────
401
+ m.def("LargeBTMiner",
402
+ [](py::object data,
403
+ double minsup,
404
+ unsigned int time_limit,
405
+ bool preproc,
406
+ bool use_dic,
407
+ bool verbose,
408
+ const std::string &out_file)
409
+ {
410
+ using namespace largebm;
411
+
412
+ largebm::time_limit = time_limit;
413
+ largebm::pre_pro = preproc;
414
+ largebm::use_dic = use_dic;
415
+ largebm::use_list = false; // MDD-based
416
+ largebm::b_disp = verbose;
417
+ largebm::b_write = !out_file.empty();
418
+ largebm::out_file = out_file;
419
+ largebm::just_build = false;
420
+
421
+ largebm::ClearCollected();
422
+ largebm::items.clear();
423
+ largebm::item_dic.clear();
424
+ largebm::inv_item_dic.clear();
425
+ largebm::Tree.clear();
426
+ largebm::DFS.clear();
427
+
428
+ largebm::start_time = std::clock();
429
+
430
+ TempFile tmp;
431
+ std::string path;
137
432
 
138
433
  if (py::isinstance<py::str>(data)) {
139
- std::string path = data.cast<std::string>();
140
- if (!btminer::Load_instance(path, minsup))
141
- throw std::runtime_error("Failed to load file: " + path);
434
+ path = data.cast<std::string>();
142
435
  } else {
143
436
  auto seqs = data.cast<std::vector<std::vector<int>>>();
144
- btminer::items = std::move(seqs);
145
- btminer::N = btminer::items.size();
437
+ tmp.path = write_temp_seq_file(seqs);
438
+ path = tmp.path;
439
+ }
146
440
 
147
- int max_id = 0;
148
- for (auto &seq : btminer::items)
149
- for (int x : seq)
150
- max_id = std::max(max_id, std::abs(x));
151
- btminer::L = max_id;
441
+ if (verbose) {
442
+ std::cerr << "[LargeBTMiner] path=" << path
443
+ << " minsup=" << minsup
444
+ << " preproc=" << preproc
445
+ << " use_dic=" << use_dic
446
+ << std::endl;
447
+ }
152
448
 
153
- btminer::theta = (minsup < 1.0) ? std::ceil(minsup * btminer::N) : minsup;
449
+ if (!largebm::Load_instance(path, minsup)) {
450
+ throw std::runtime_error("LargeBTMiner: failed to load instance from: " + path);
451
+ }
154
452
 
155
- btminer::DFS.clear();
156
- btminer::DFS.reserve(btminer::L);
157
- for (unsigned int i = 0; i < btminer::L; ++i)
158
- btminer::DFS.emplace_back(-static_cast<int>(i) - 1);
453
+ largebm::Freq_miner();
159
454
 
160
- btminer::M = 0;
161
- btminer::E = 0;
162
- for (auto &seq : btminer::items) {
163
- btminer::M = std::max<unsigned int>(btminer::M, seq.size());
164
- btminer::E += seq.size();
165
- }
455
+ py::dict out;
456
+ out["patterns"] = largebm::GetCollected();
457
+ out["time"] = largebm::give_time(std::clock() - largebm::start_time);
458
+ return out;
459
+ },
460
+ py::arg("data"),
461
+ py::arg("minsup") = 0.01,
462
+ py::arg("time_limit") = 36000,
463
+ py::arg("preproc") = false,
464
+ py::arg("use_dic") = false,
465
+ py::arg("verbose") = false,
466
+ py::arg("out_file") = ""
467
+ );
468
+
469
+ // ─────────────────────────────────────────────────────────────
470
+ // LargeHTMiner (always uses professor's largehm::Load_instance; pre_pro forced ON)
471
+ // ─────────────────────────────────────────────────────────────
472
+ // ─────────────────────────────────────────────────────────────
473
+ // LargeHTMiner (professor's Large HTMiner, namespaced as largehm)
474
+ // ─────────────────────────────────────────────────────────────
475
+ m.def("LargeHTMiner",
476
+ [](py::object data,
477
+ double minsup,
478
+ unsigned int time_limit,
479
+ bool /*preproc*/, // kept for API symmetry; ignored
480
+ bool use_dic,
481
+ bool verbose,
482
+ const std::string &out_file)
483
+ {
484
+ using namespace largehm;
485
+
486
+ // 1) Global configuration (mirror professor's style)
487
+ largehm::time_limit = time_limit;
488
+ largehm::pre_pro = true; // always preprocess
489
+ largehm::use_dic = use_dic;
490
+ largehm::just_build = false;
491
+ largehm::b_disp = verbose;
492
+ largehm::b_write = !out_file.empty();
493
+ largehm::out_file = out_file;
494
+
495
+ // 2) HARD RESET of all global state for a fresh run
496
+ largehm::ClearCollected(); // our helper in largehm::utility.cpp
497
+
498
+ largehm::M = 0;
499
+ largehm::L = 0;
500
+ largehm::mlim = 0;
501
+ largehm::N = 0;
502
+ largehm::theta = 0;
503
+ largehm::E = 0;
504
+ largehm::itmset_exists = false;
505
+
506
+ // containers
507
+ // (item_dic reset is optional and not strictly needed here)
508
+ largehm::DFS.clear();
509
+ largehm::VDFS.clear();
510
+ largehm::Tree.clear();
511
+ largehm::VTree.clear();
512
+ largehm::CTree.clear();
513
+
514
+ largehm::start_time = std::clock();
515
+
516
+ // 3) Handle input (file path or Python list)
517
+ TempFile tmp;
518
+ std::string path;
519
+
520
+ if (py::isinstance<py::str>(data)) {
521
+ path = data.cast<std::string>();
522
+ } else {
523
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
524
+ tmp.path = write_temp_seq_file(seqs);
525
+ path = tmp.path;
166
526
  }
167
527
 
168
- btminer::Freq_miner();
528
+ if (verbose) {
529
+ std::cerr << "[LargeHTMiner] path=" << path
530
+ << " minsup=" << minsup
531
+ << " preproc(always)=true"
532
+ << " use_dic=" << use_dic
533
+ << std::endl;
534
+ }
535
+
536
+ // 4) Build MDD / load instance.
537
+ // NOTE: Load_instance() itself does Tree.emplace_back(0,0,0),
538
+ // so we DO NOT create a root node here.
539
+ if (!largehm::Load_instance(path, minsup)) {
540
+ throw std::runtime_error("LargeHTMiner: failed to load instance from: " + path);
541
+ }
542
+
543
+ // 5) Run miner (same timing logic as original main)
544
+ if (!largehm::just_build &&
545
+ largehm::give_time(std::clock() - largehm::start_time) < largehm::time_limit)
546
+ {
547
+ largehm::Freq_miner();
548
+ if (largehm::give_time(std::clock() - largehm::start_time) >= largehm::time_limit) {
549
+ std::cout << "TIME LIMIT REACHED\n";
550
+ }
551
+ }
169
552
 
553
+ // 6) Return collected patterns + runtime
170
554
  py::dict out;
171
- out["patterns"] = btminer::GetCollected();
172
- out["time"] = btminer::give_time(std::clock() - btminer::start_time);
555
+ out["patterns"] = largehm::GetCollected();
556
+ out["time"] = largehm::give_time(std::clock() - largehm::start_time);
173
557
  return out;
174
558
  },
175
559
  py::arg("data"),
176
560
  py::arg("minsup") = 0.01,
177
561
  py::arg("time_limit") = 36000,
178
- py::arg("preproc") = false,
562
+ py::arg("preproc") = false, // kept for API symmetry
179
563
  py::arg("use_dic") = false,
180
564
  py::arg("verbose") = false,
181
565
  py::arg("out_file") = ""
182
566
  );
183
567
 
184
- // ─────────────────────────────────────────────────────────────
185
- // HTMiner
186
- // ─────────────────────────────────────────────────────────────
187
- m.def("HTMiner",
568
+
569
+ }
570
+
571
+
572
+ /*#include <pybind11/pybind11.h>
573
+ #include <pybind11/stl.h>
574
+ namespace py = pybind11;
575
+ #include <iostream>
576
+
577
+
578
+ // PrefixProjection headers
579
+ #include "freq_miner.hpp"
580
+ #include "load_inst.hpp"
581
+ #include "utility.hpp"
582
+
583
+ // BTMiner (wrapped in its own namespace in source files)
584
+ #include "btminer/src/freq_miner.hpp"
585
+ #include "btminer/src/load_inst.hpp"
586
+ #include "btminer/src/utility.hpp"
587
+ #include "btminer/src/build_mdd.hpp"
588
+
589
+ // HTMiner (wrapped in its own namespace in source files)
590
+ #include "htminer/src/build_mdd.hpp" // ← ensure HTMiner MDD builder is available
591
+ #include "htminer/src/freq_miner.hpp"
592
+ #include "htminer/src/load_inst.hpp"
593
+ #include "htminer/src/utility.hpp"
594
+
595
+
596
+ #include "largepp/src/freq_miner.hpp"
597
+ #include "largepp/src/load_inst.hpp"
598
+ #include "largepp/src/utility.hpp"
599
+
600
+
601
+ #include "largebm/src/freq_miner.hpp"
602
+ #include "largebm/src/load_inst.hpp"
603
+ #include "largebm/src/utility.hpp"
604
+ #include "largebm/src/build_mdd.hpp"
605
+
606
+ #include "largehm/src/freq_miner.hpp"
607
+ #include "largehm/src/load_inst.hpp"
608
+ #include "largehm/src/utility.hpp"
609
+ #include "largehm/src/build_mdd.hpp"
610
+
611
+
612
+
613
+ PYBIND11_MODULE(_effspm, m) {
614
+ m.doc() = "Unified SPM library: PrefixProjection, BTMiner, HTMiner";
615
+
616
+ // ─────────────────────────────────────────────────────────────
617
+ // PrefixProjection
618
+ // ─────────────────────────────────────────────────────────────
619
+ m.def("PrefixProjection",
188
620
  [](py::object data,
189
- double minsup, unsigned int time_limit,
190
- bool preproc, bool use_dic,
191
- bool verbose, const std::string &out_file)
621
+ double minsup,
622
+ unsigned int time_limit,
623
+ bool preproc,
624
+ bool use_dic,
625
+ bool verbose,
626
+ const std::string &out_file)
192
627
  {
193
- // 1) set HTMiner globals (declared in htminer/src/utility.hpp)
194
- htminer::time_limit = time_limit;
195
- htminer::pre_pro = preproc;
196
- htminer::use_dic = use_dic;
197
- htminer::just_build = false; // or true if you want “build only”
198
- htminer::use_list = false; // HTMiner always uses MDD‐based mode
199
- htminer::b_disp = verbose;
200
- htminer::b_write = !out_file.empty();
201
- htminer::out_file = out_file;
202
- htminer::ClearCollected(); // clear any leftover patterns
203
- htminer::start_time = std::clock();
204
-
205
- // 2) load sequences (either from filename or from Python list)
628
+ ::time_limit = time_limit;
629
+ ::pre_pro = preproc;
630
+ ::use_dic = use_dic;
631
+ ::use_list = false;
632
+ ::b_disp = verbose;
633
+ ::b_write = !out_file.empty();
634
+ ::out_file = out_file;
635
+
636
+ ClearCollected();
637
+ start_time = std::clock();
638
+
206
639
  if (py::isinstance<py::str>(data)) {
207
640
  std::string path = data.cast<std::string>();
208
- if (!htminer::Load_instance(path, minsup))
641
+ if (!Load_instance(path, minsup))
209
642
  throw std::runtime_error("Failed to load file: " + path);
210
643
  } else {
211
644
  auto seqs = data.cast<std::vector<std::vector<int>>>();
212
- htminer::items = std::move(seqs);
213
- htminer::N = htminer::items.size();
645
+ items = std::move(seqs);
646
+ N = items.size();
214
647
 
215
- // compute L (max item ID), M (max sequence length), E (total entries)
216
648
  int max_id = 0;
217
- htminer::M = 0;
218
- htminer::E = 0;
219
- for (auto &seq : htminer::items) {
220
- htminer::M = std::max<unsigned int>(htminer::M, seq.size());
649
+ for (auto &seq : items)
221
650
  for (int x : seq)
222
651
  max_id = std::max(max_id, std::abs(x));
223
- htminer::E += seq.size();
652
+ L = max_id;
653
+
654
+ theta = (minsup < 1.0) ? std::ceil(minsup * N) : minsup;
655
+
656
+ DFS.clear();
657
+ DFS.reserve(L);
658
+ for (unsigned int i = 0; i < L; ++i)
659
+ DFS.emplace_back(-static_cast<int>(i) - 1);
660
+
661
+ M = 0;
662
+ E = 0;
663
+ for (auto &seq : items) {
664
+ M = std::max<unsigned int>(M, seq.size());
665
+ E += seq.size();
224
666
  }
225
- htminer::L = max_id;
226
- htminer::theta = (minsup < 1.0)
227
- ? static_cast<unsigned long long>(std::ceil(minsup * htminer::N))
228
- : static_cast<unsigned long long>(minsup);
229
-
230
- // build empty DFS stack (size L) as HTMiner expects
231
- htminer::DFS.clear();
232
- htminer::DFS.reserve(htminer::L);
233
- for (unsigned int i = 0; i < static_cast<unsigned int>(htminer::L); ++i)
234
- htminer::DFS.emplace_back(-static_cast<int>(i) - 1);
235
-
236
- // initialize VDFS if HTMiner needs it
237
- htminer::VDFS.clear();
238
- htminer::VDFS.resize(htminer::L);
239
667
  }
240
668
 
241
- // 3) run the mining algorithm
242
- htminer::Freq_miner();
243
-
244
- // std::cout << "[HTMiner] dumping all collected patterns:\n";
245
- // for (size_t i = 0; i < htminer::collectedPatterns.size(); ++i) {
246
- // const auto &seq = htminer::collectedPatterns[i];
247
- // std::cout << "Pattern " << i << ": { ";
248
- // for (int x : seq) {
249
- // std::cout << x << " ";
250
- // }
251
- // std::cout << "}\n";
252
- //}
253
- std::cout << " total patterns = "
254
- << htminer::collectedPatterns.size() << "\n";
255
- // ─────────────────────────────────────────────────
256
-
257
- // 4) return patterns + elapsed time
669
+ Freq_miner();
670
+
258
671
  py::dict out;
259
- out["patterns"] = htminer::GetCollected();
260
- out["time"] = htminer::give_time(std::clock() - htminer::start_time);
672
+ out["patterns"] = GetCollected();
673
+ out["time"] = give_time(std::clock() - start_time);
261
674
  return out;
262
675
  },
263
676
  py::arg("data"),
@@ -268,8 +681,223 @@ std::cout << " total patterns = "
268
681
  py::arg("verbose") = false,
269
682
  py::arg("out_file") = ""
270
683
  );
684
+ m.def("BTMiner",
685
+ [](py::object data,
686
+ double minsup,
687
+ unsigned int time_limit,
688
+ bool preproc,
689
+ bool use_dic,
690
+ bool verbose,
691
+ const std::string &out_file)
692
+ {
693
+ // We are calling the *professor* BTMiner, now namespaced as btminer::.
694
+ // So we only set the globals the professor code actually has.
695
+
696
+ // 1) configure professor globals
697
+ btminer::time_limit = static_cast<int>(time_limit);
698
+ btminer::pre_pro = preproc;
699
+ btminer::use_dic = use_dic;
700
+ btminer::b_disp = verbose;
701
+ btminer::b_write = !out_file.empty();
702
+ btminer::out_file = out_file;
703
+ btminer::N_mult = 1; // professor uses these too
704
+ btminer::M_mult = 1;
705
+ btminer::just_build = false; // we want full mining
706
+
707
+ btminer::start_time = std::clock();
708
+
709
+ // 2) load data
710
+ //
711
+ // Professor’s code is primarily file-based (Load_instance(const string&, double)).
712
+ // So: if user passes a file path → use the professor loader directly.
713
+ // If user passes a Python list-of-lists → we will build the MDD the same
714
+ // way professor’s loader does, but without changing his logic.
715
+ if (py::isinstance<py::str>(data)) {
716
+ // ----- FILE MODE -----
717
+ std::string path = data.cast<std::string>();
271
718
 
272
- m.def("LargePrefixProjection",
719
+ if (verbose) {
720
+ std::cerr << "[BT][binding] file=" << path
721
+ << " minsup=" << minsup
722
+ << " preproc=" << preproc << std::endl;
723
+ }
724
+
725
+ if (!btminer::Load_instance(path, minsup)) {
726
+ throw std::runtime_error("BTMiner: failed to load file: " + path);
727
+ }
728
+ } else {
729
+ // ----- PYTHON LIST MODE -----
730
+ //
731
+ // We mimic professor’s loader:
732
+ // - create root in Tree
733
+ // - compute N, M, L
734
+ // - compute theta from minsup
735
+ // - seed DFS (one Pattern per item, as in Preprocess branch)
736
+ // - call Build_MDD(...) for each sequence
737
+ //
738
+ // This DOES NOT change his mining logic; it just drives it from memory.
739
+
740
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
741
+
742
+ // clear MDD and globals to a known state
743
+ btminer::Tree.clear();
744
+ btminer::Tree.emplace_back(0, 0, 0); // root (exactly like professor)
745
+
746
+ // compute basic stats
747
+ int max_id = 0;
748
+ int max_len = 0;
749
+ int seq_count = 0;
750
+ long long entries = 0;
751
+
752
+ for (const auto &s : seqs) {
753
+ if (s.empty()) continue;
754
+ ++seq_count;
755
+ max_len = std::max<int>(max_len, static_cast<int>(s.size()));
756
+ for (int x : s) {
757
+ max_id = std::max(max_id, std::abs(x));
758
+ ++entries;
759
+ }
760
+ }
761
+
762
+ btminer::N = seq_count;
763
+ btminer::M = max_len;
764
+ btminer::L = max_id;
765
+ btminer::E = static_cast<int>(entries);
766
+
767
+ // theta = abs support
768
+ if (minsup < 1.0)
769
+ btminer::theta = static_cast<int>(std::ceil(minsup * btminer::N * btminer::N_mult));
770
+ else
771
+ btminer::theta = static_cast<int>(minsup);
772
+
773
+ // seed DFS exactly like professor does in the preprocessed branch:
774
+ btminer::DFS.clear();
775
+ btminer::DFS.reserve(btminer::L);
776
+ for (int i = 0; i < btminer::L; ++i)
777
+ btminer::DFS.emplace_back(-i - 1);
778
+
779
+ // now build the MDD, sequence by sequence
780
+ for (const auto &s : seqs) {
781
+ if (s.empty()) continue;
782
+ // professor’s Build_MDD takes a vector<int> by non-const ref
783
+ std::vector<int> tmp = s;
784
+ btminer::Build_MDD(tmp);
785
+ }
786
+
787
+ if (verbose) {
788
+ std::cerr << "[BT][binding] PY mode: N=" << btminer::N
789
+ << " L=" << btminer::L
790
+ << " M=" << btminer::M
791
+ << " E=" << btminer::E
792
+ << " theta=" << btminer::theta
793
+ << " Tree.size()=" << btminer::Tree.size()
794
+ << std::endl;
795
+ }
796
+ }
797
+
798
+ // 3) run professor’s miner
799
+ btminer::Freq_miner();
800
+
801
+ // 4) build python result
802
+ // 4) build python result
803
+ py::dict out;
804
+ out["patterns"] = btminer::GetCollected(); // ← NEW
805
+ out["num_patterns"] = btminer::num_patt;
806
+ out["time"] = btminer::give_time(std::clock() - btminer::start_time);
807
+ out["N"] = btminer::N;
808
+ out["L"] = btminer::L;
809
+ out["theta"] = btminer::theta;
810
+ return out;
811
+
812
+ },
813
+ py::arg("data"),
814
+ py::arg("minsup") = 0.01,
815
+ py::arg("time_limit") = 36000,
816
+ py::arg("preproc") = false,
817
+ py::arg("use_dic") = false,
818
+ py::arg("verbose") = false,
819
+ py::arg("out_file") = ""
820
+ );
821
+
822
+
823
+
824
+
825
+ // HTMiner
826
+ // ─────────────────────────────────────────────────────────────
827
+ // HTMiner
828
+ m.def("HTMiner",
829
+ [](py::object data,
830
+ double minsup, unsigned int time_limit,
831
+ bool preproc, bool use_dic,
832
+ bool verbose, const std::string &out_file)
833
+ {
834
+ htminer::time_limit = time_limit;
835
+ htminer::pre_pro = preproc;
836
+ htminer::use_dic = use_dic;
837
+ htminer::just_build = false;
838
+ htminer::use_list = false;
839
+ htminer::b_disp = verbose;
840
+ htminer::b_write = !out_file.empty();
841
+ htminer::out_file = out_file;
842
+ htminer::ClearCollected();
843
+ htminer::start_time = std::clock();
844
+
845
+ if (py::isinstance<py::str>(data)) {
846
+ std::string path = data.cast<std::string>();
847
+ if (!htminer::Load_instance(path, minsup))
848
+ throw std::runtime_error("Failed to load file: " + path);
849
+ } else {
850
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
851
+ htminer::items = std::move(seqs);
852
+ htminer::N = htminer::items.size();
853
+
854
+ int max_id = 0;
855
+ htminer::M = 0;
856
+ htminer::E = 0;
857
+ for (auto &seq : htminer::items) {
858
+ htminer::M = std::max<unsigned int>(htminer::M, seq.size());
859
+ for (int x : seq)
860
+ max_id = std::max(max_id, std::abs(x));
861
+ htminer::E += seq.size();
862
+ }
863
+ htminer::L = max_id;
864
+ htminer::theta = (minsup < 1.0)
865
+ ? static_cast<unsigned long long>(std::ceil(minsup * htminer::N))
866
+ : static_cast<unsigned long long>(minsup);
867
+
868
+ htminer::DFS.clear();
869
+ htminer::DFS.reserve(htminer::L);
870
+ for (unsigned int i = 0; i < static_cast<unsigned int>(htminer::L); ++i)
871
+ htminer::DFS.emplace_back(-static_cast<int>(i) - 1);
872
+
873
+ htminer::VDFS.clear();
874
+ htminer::VDFS.resize(htminer::L);
875
+ }
876
+
877
+ htminer::Freq_miner();
878
+
879
+ // 👇 now really respects verbose
880
+ if (verbose) {
881
+ std::cout << " total patterns = "
882
+ << htminer::collectedPatterns.size() << "\n";
883
+ }
884
+
885
+ py::dict out;
886
+ out["patterns"] = htminer::GetCollected();
887
+ out["time"] = htminer::give_time(std::clock() - htminer::start_time);
888
+ return out;
889
+ },
890
+ py::arg("data"),
891
+ py::arg("minsup") = 0.01,
892
+ py::arg("time_limit") = 36000,
893
+ py::arg("preproc") = false,
894
+ py::arg("use_dic") = false,
895
+ py::arg("verbose") = false,
896
+ py::arg("out_file") = ""
897
+ );
898
+
899
+
900
+ m.def("LargePrefixProjection",
273
901
  [](py::object data,
274
902
  double minsup,
275
903
  unsigned int time_limit,
@@ -281,28 +909,30 @@ std::cout << " total patterns = "
281
909
  largepp::time_limit = time_limit;
282
910
  largepp::pre_pro = preproc;
283
911
  largepp::use_dic = use_dic;
284
- largepp::use_list = true; // ← key difference
912
+ largepp::use_list = true;
285
913
  largepp::b_disp = verbose;
286
914
  largepp::b_write = !out_file.empty();
287
915
  largepp::out_file = out_file;
288
- largepp::just_build = false;
916
+ largepp::just_build = false;
289
917
 
290
918
  largepp::ClearCollected();
291
919
  largepp::start_time = std::clock();
292
- std::string fname = data.cast<std::string>();
293
- /* 1) load instance (py list or filename) */
294
- if (py::isinstance<py::str>(data))
295
-
296
- largepp::Load_instance(fname, minsup);
297
- else
298
- largepp::Load_py(data, minsup); // helper you’ll expose
299
-
300
- std::vector<unsigned long long> dbg;
301
-
302
-
303
-
304
920
 
921
+ // 👇 this was the last noisy one
922
+ if (verbose) {
923
+ std::cerr << " minsup=" << minsup
924
+ << " preproc=" << preproc
925
+ << " verbose=" << verbose
926
+ << " out_file=" << (out_file.empty() ? "(none)" : out_file)
927
+ << " use_dic=" << use_dic << "\n";
928
+ }
305
929
 
930
+ if (py::isinstance<py::str>(data)) {
931
+ std::string fname = data.cast<std::string>();
932
+ largepp::Load_instance(fname, minsup);
933
+ } else {
934
+ largepp::Load_py(data, minsup);
935
+ }
306
936
 
307
937
  largepp::Freq_miner();
308
938
 
@@ -320,6 +950,8 @@ std::cout << " total patterns = "
320
950
  py::arg("out_file") = ""
321
951
  );
322
952
 
953
+
954
+
323
955
  // ─────────────────────────────────────────────────────────────
324
956
  // LargeBTMiner -- Python wrapper for the largebm implementation
325
957
  // ─────────────────────────────────────────────────────────────
@@ -404,100 +1036,108 @@ std::cout << " total patterns = "
404
1036
 
405
1037
 
406
1038
 
407
- m.def("LargeBTMiner",
408
- [](py::object data,
409
- double minsup,
410
- unsigned int time_limit,
411
- bool preproc,
412
- bool use_dic,
413
- bool verbose,
414
- const std::string &out_file)
415
- {
416
- // 0) Set global flags and timers
417
- largebm::time_limit = time_limit;
418
- largebm::pre_pro = preproc;
419
- largebm::use_dic = use_dic;
420
- largebm::use_list = false; // large‑mode → always MDD
421
- largebm::b_disp = verbose;
422
- largebm::b_write = !out_file.empty();
423
- largebm::out_file = out_file;
424
- largebm::just_build = false;
1039
+ // ─────────────────────────────────────────────────────────────────────────
1040
+ // LargeBTMiner (MDD-based)
1041
+ // ─────────────────────────────────────────────────────────────────────────
1042
+ /*m.def("LargeBTMiner",
1043
+ [](py::object data,
1044
+ double minsup,
1045
+ unsigned int time_limit,
1046
+ bool preproc,
1047
+ bool use_dic,
1048
+ bool verbose,
1049
+ const std::string &out_file)
1050
+ {
1051
+ using namespace largebm;
1052
+
1053
+ // 0) Set global flags and timers
1054
+ largebm::time_limit = time_limit;
1055
+ largebm::pre_pro = preproc;
1056
+ largebm::use_dic = use_dic;
1057
+ largebm::use_list = false; // large-mode → always MDD
1058
+ largebm::b_disp = verbose;
1059
+ largebm::b_write = !out_file.empty();
1060
+ largebm::out_file = out_file;
1061
+ largebm::just_build = false;
1062
+
1063
+ // 0.1) Clear any leftover data/state from previous runs
1064
+ largebm::items.clear();
1065
+ largebm::item_dic.clear();
1066
+ largebm::inv_item_dic.clear();
1067
+ largebm::Tree.clear();
1068
+ largebm::DFS.clear();
1069
+ largebm::ClearCollected();
1070
+
1071
+ // 1) Load sequences (either from filename or from Python list)
1072
+ if (py::isinstance<py::str>(data)) {
1073
+ // ─────────── FILE-BASED MODE ───────────
1074
+ std::string path = data.cast<std::string>();
1075
+ if (!largebm::Load_instance(path, minsup))
1076
+ throw std::runtime_error("Failed to load file: " + path);
425
1077
 
426
- // 0.1) Clear any leftover data/state from previous runs
427
- largebm::items.clear();
428
- largebm::item_dic.clear();
429
- largebm::inv_item_dic.clear();
430
- largebm::Tree.clear();
431
- largebm::DFS.clear();
432
- largebm::ClearCollected();
1078
+ } else {
1079
+ // ────────── IN-MEMORY MODE ──────────
1080
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
1081
+ largebm::items = std::move(seqs);
1082
+ largebm::N = largebm::items.size();
433
1083
 
434
- // 1) Load sequences (either from filename or from Python list)
435
- if (py::isinstance<py::str>(data)) {
436
- // ─────────── FILE‑BASED MODE ───────────
437
- std::string path = data.cast<std::string>();
438
- if (!largebm::Load_instance(path, minsup))
439
- throw std::runtime_error("Failed to load file: " + path);
1084
+ // 1.1) Compute basic DB statistics (M, E, L) and absolute support θ
1085
+ int max_id = 0;
1086
+ largebm::M = 0;
1087
+ largebm::E = 0;
1088
+ for (auto &seq : largebm::items) {
1089
+ largebm::M = std::max<unsigned int>(largebm::M, static_cast<unsigned int>(seq.size()));
1090
+ largebm::E += static_cast<unsigned long long>(seq.size());
1091
+ for (int x : seq) max_id = std::max(max_id, std::abs(x));
1092
+ }
1093
+ largebm::L = static_cast<unsigned int>(max_id);
1094
+ largebm::theta = (minsup < 1.0)
1095
+ ? static_cast<unsigned long long>(std::ceil(minsup * largebm::N))
1096
+ : static_cast<unsigned long long>(minsup);
440
1097
 
441
- } else {
442
- // ────────── IN‑MEMORY MODE ──────────
443
- auto seqs = data.cast<std::vector<std::vector<int>>>();
444
- largebm::items = std::move(seqs);
445
- largebm::N = largebm::items.size();
1098
+ // 1.2) Initialize DFS buffer (size = L)
1099
+ largebm::DFS.reserve(largebm::L);
1100
+ for (unsigned int i = 0; i < largebm::L; ++i)
1101
+ largebm::DFS.emplace_back(-static_cast<int>(i) - 1);
446
1102
 
447
- // 1.1) Compute basic DB statistics (M, E, L) and absolute support θ
448
- int max_id = 0;
449
- largebm::M = 0;
450
- largebm::E = 0;
451
- for (auto &seq : largebm::items) {
452
- largebm::M = std::max<unsigned int>(largebm::M, static_cast<unsigned int>(seq.size()));
453
- largebm::E += static_cast<unsigned long long>(seq.size());
454
- for (int x : seq) max_id = std::max(max_id, std::abs(x));
455
- }
456
- largebm::L = static_cast<unsigned int>(max_id);
457
- largebm::theta = (minsup < 1.0)
458
- ? static_cast<unsigned long long>(std::ceil(minsup * largebm::N))
459
- : static_cast<unsigned long long>(minsup);
460
-
461
- // 1.2) Initialize DFS buffer (size = L)
462
- largebm::DFS.reserve(largebm::L);
463
- for (unsigned int i = 0; i < largebm::L; ++i)
464
- largebm::DFS.emplace_back(-static_cast<int>(i) - 1);
465
-
466
- // 1.3) Build the MDD “Tree”
467
- // Insert one dummy root node (item=0, freq=0, anct=0)
468
- largebm::Tree.emplace_back(0, 0, 0);
469
- for (auto &seq : largebm::items)
470
- largebm::Build_MDD(const_cast<std::vector<int>&>(seq));
471
- }
1103
+ // 1.3) Build the MDD “Tree”
1104
+ // Insert one dummy root node (item=0, freq=0, anct=0)
1105
+ largebm::Tree.emplace_back(0, 0, 0);
1106
+ for (auto &seq : largebm::items)
1107
+ largebm::Build_MDD(const_cast<std::vector<int>&>(seq));
1108
+ }
472
1109
 
473
- // 2) Rebuild inversedictionary from fresh item_dic
474
- {
475
- std::vector<int> inv(largebm::item_dic.size() + 1);
476
- for (int old = 1; old <= static_cast<int>(largebm::item_dic.size()); ++old) {
477
- int cid = largebm::item_dic[old - 1];
478
- if (cid > 0) inv[cid] = old;
479
- }
480
- largebm::inv_item_dic = std::move(inv);
1110
+ // 2) Rebuild inverse-dictionary from fresh item_dic
1111
+ {
1112
+ std::vector<int> inv(largebm::item_dic.size() + 1);
1113
+ for (int old = 1; old <= static_cast<int>(largebm::item_dic.size()); ++old) {
1114
+ int cid = largebm::item_dic[old - 1];
1115
+ if (cid > 0) inv[cid] = old;
481
1116
  }
1117
+ largebm::inv_item_dic = std::move(inv);
1118
+ }
482
1119
 
483
- // 3) Start timing and run the miner
484
- largebm::start_time = std::clock();
485
- largebm::Freq_miner();
1120
+ // 3) Start timing and run the miner
1121
+ largebm::start_time = std::clock();
1122
+ largebm::Freq_miner();
1123
+
1124
+ // 4) Collect results and elapsed time
1125
+ const auto& pats = largebm::GetCollected();
1126
+
1127
+ py::dict out;
1128
+ out["patterns"] = pats;
1129
+ out["time"] = largebm::give_time(std::clock() - largebm::start_time);
1130
+ return out;
1131
+ },
1132
+ py::arg("data"),
1133
+ py::arg("minsup") = 0.01,
1134
+ py::arg("time_limit") = 36000,
1135
+ py::arg("preproc") = false,
1136
+ py::arg("use_dic") = false,
1137
+ py::arg("verbose") = false,
1138
+ py::arg("out_file") = ""
1139
+ );
486
1140
 
487
- // 4) Collect results and elapsed time
488
- py::dict out;
489
- out["patterns"] = largebm::GetCollected();
490
- out["time"] = largebm::give_time(std::clock() - largebm::start_time);
491
- return out;
492
- },
493
- py::arg("data"),
494
- py::arg("minsup") = 0.01,
495
- py::arg("time_limit") = 36000,
496
- py::arg("preproc") = false,
497
- py::arg("use_dic") = false,
498
- py::arg("verbose") = false,
499
- py::arg("out_file") = ""
500
- );
501
1141
 
502
1142
 
503
1143
  m.def("LargeHTMiner",
@@ -606,4 +1246,4 @@ m.def("LargeHTMiner",
606
1246
 
607
1247
 
608
1248
 
609
- }
1249
+ } */