effspm 0.2.7__cp312-cp312-win_amd64.whl → 0.3.3__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. effspm/_effspm.cp312-win_amd64.pyd +0 -0
  2. effspm/_effspm.cpp +961 -210
  3. effspm/btminer/src/build_mdd.cpp +42 -17
  4. effspm/btminer/src/build_mdd.hpp +13 -19
  5. effspm/btminer/src/freq_miner.cpp +134 -49
  6. effspm/btminer/src/freq_miner.hpp +16 -0
  7. effspm/btminer/src/load_inst.cpp +211 -126
  8. effspm/btminer/src/load_inst.hpp +22 -4
  9. effspm/btminer/src/main.cpp +83 -0
  10. effspm/btminer/src/utility.cpp +26 -41
  11. effspm/btminer/src/utility.hpp +6 -30
  12. effspm/freq_miner.hpp +2 -1
  13. effspm/htminer/src/build_mdd.cpp +46 -124
  14. effspm/htminer/src/build_mdd.hpp +56 -49
  15. effspm/htminer/src/freq_miner.cpp +341 -307
  16. effspm/htminer/src/freq_miner.hpp +39 -40
  17. effspm/htminer/src/load_inst.cpp +287 -336
  18. effspm/htminer/src/load_inst.hpp +23 -6
  19. effspm/htminer/src/main.cpp +97 -0
  20. effspm/htminer/src/utility.cpp +38 -57
  21. effspm/htminer/src/utility.hpp +9 -64
  22. effspm/largebm/src/build_mdd.cpp +69 -110
  23. effspm/largebm/src/build_mdd.hpp +22 -37
  24. effspm/largebm/src/freq_miner.cpp +241 -291
  25. effspm/largebm/src/freq_miner.hpp +25 -36
  26. effspm/largebm/src/load_inst.cpp +20 -26
  27. effspm/largebm/src/load_inst.hpp +24 -34
  28. effspm/largebm/src/main.cpp +95 -0
  29. effspm/largebm/src/utility.cpp +11 -21
  30. effspm/largebm/src/utility.hpp +7 -10
  31. effspm/largehm/src/build_mdd.cpp +75 -110
  32. effspm/largehm/src/build_mdd.hpp +53 -73
  33. effspm/largehm/src/freq_miner.cpp +134 -191
  34. effspm/largehm/src/freq_miner.hpp +37 -60
  35. effspm/largehm/src/load_inst.cpp +137 -174
  36. effspm/largehm/src/load_inst.hpp +13 -50
  37. effspm/largehm/src/main.cpp +95 -0
  38. effspm/largehm/src/utility.cpp +46 -28
  39. effspm/largehm/src/utility.hpp +18 -16
  40. effspm/largepp/src/freq_miner.cpp +184 -156
  41. effspm/largepp/src/freq_miner.hpp +11 -36
  42. effspm/largepp/src/load_inst.cpp +32 -12
  43. effspm/largepp/src/load_inst.hpp +15 -9
  44. effspm/largepp/src/main.cpp +108 -0
  45. effspm/largepp/src/pattern.hpp +31 -0
  46. effspm/load_inst.cpp +8 -8
  47. effspm/load_inst.hpp +1 -1
  48. effspm/main.cpp +103 -0
  49. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/METADATA +1 -1
  50. effspm-0.3.3.dist-info/RECORD +60 -0
  51. effspm-0.2.7.dist-info/RECORD +0 -53
  52. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/WHEEL +0 -0
  53. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/licenses/LICENSE +0 -0
  54. {effspm-0.2.7.dist-info → effspm-0.3.3.dist-info}/top_level.txt +0 -0
effspm/_effspm.cpp CHANGED
@@ -2,49 +2,97 @@
2
2
 
3
3
  #include <pybind11/pybind11.h>
4
4
  #include <pybind11/stl.h>
5
- namespace py = pybind11;
5
+
6
6
  #include <iostream>
7
+ #include <fstream>
8
+ #include <cstdio> // std::remove
9
+ #include <vector>
10
+ #include <string>
11
+ #include <ctime>
12
+ #include <cmath>
7
13
 
14
+ namespace py = pybind11;
8
15
 
9
- // PrefixProjection headers
16
+ // PrefixProjection headers (global namespace)
10
17
  #include "freq_miner.hpp"
11
18
  #include "load_inst.hpp"
12
19
  #include "utility.hpp"
13
20
 
14
- // BTMiner (wrapped in its own namespace in source files)
21
+ // BTMiner (namespaced)
15
22
  #include "btminer/src/freq_miner.hpp"
16
23
  #include "btminer/src/load_inst.hpp"
17
24
  #include "btminer/src/utility.hpp"
18
25
  #include "btminer/src/build_mdd.hpp"
19
26
 
20
- // HTMiner (wrapped in its own namespace in source files)
21
- #include "htminer/src/build_mdd.hpp" // ← ensure HTMiner MDD builder is available
27
+ // HTMiner (namespaced)
28
+ #include "htminer/src/build_mdd.hpp"
22
29
  #include "htminer/src/freq_miner.hpp"
23
30
  #include "htminer/src/load_inst.hpp"
24
31
  #include "htminer/src/utility.hpp"
25
32
 
26
-
33
+ // LargePrefixProjection
27
34
  #include "largepp/src/freq_miner.hpp"
28
35
  #include "largepp/src/load_inst.hpp"
29
36
  #include "largepp/src/utility.hpp"
30
37
 
38
+ // LargeBTMiner
31
39
  #include "largebm/src/freq_miner.hpp"
32
40
  #include "largebm/src/load_inst.hpp"
33
41
  #include "largebm/src/utility.hpp"
34
42
  #include "largebm/src/build_mdd.hpp"
35
43
 
44
+ // LargeHTMiner
36
45
  #include "largehm/src/freq_miner.hpp"
37
46
  #include "largehm/src/load_inst.hpp"
38
47
  #include "largehm/src/utility.hpp"
39
48
  #include "largehm/src/build_mdd.hpp"
40
49
 
50
+ namespace {
51
+
52
+ // RAII helper for temp file
53
+ struct TempFile {
54
+ std::string path;
55
+ ~TempFile() {
56
+ if (!path.empty()) {
57
+ std::remove(path.c_str());
58
+ }
59
+ }
60
+ };
61
+
62
+ // Write Python list[list[int]] to a temp file in professor’s format:
63
+ // one sequence per line, items separated by spaces.
64
+ std::string write_temp_seq_file(const std::vector<std::vector<int>>& seqs) {
65
+ char tmp_name[L_tmpnam];
66
+ if (!std::tmpnam(tmp_name)) {
67
+ throw std::runtime_error("Failed to create temporary file name");
68
+ }
69
+ std::string path = std::string(tmp_name) + ".txt";
70
+
71
+ std::ofstream ofs(path);
72
+ if (!ofs) {
73
+ throw std::runtime_error("Failed to open temporary file for writing: " + path);
74
+ }
75
+
76
+ for (const auto& seq : seqs) {
77
+ for (size_t i = 0; i < seq.size(); ++i) {
78
+ if (i) ofs << ' ';
79
+ ofs << seq[i];
80
+ }
81
+ ofs << '\n';
82
+ }
83
+
84
+ ofs.close();
85
+ return path;
86
+ }
87
+
88
+ } // anonymous namespace
41
89
 
42
90
 
43
91
  PYBIND11_MODULE(_effspm, m) {
44
- m.doc() = "Unified SPM library: PrefixProjection, BTMiner, HTMiner";
92
+ m.doc() = "Unified SPM library: PrefixProjection, BTMiner, HTMiner, Large* variants";
45
93
 
46
94
  // ─────────────────────────────────────────────────────────────
47
- // PrefixProjection
95
+ // PrefixProjection (works directly on Python lists or files)
48
96
  // ─────────────────────────────────────────────────────────────
49
97
  m.def("PrefixProjection",
50
98
  [](py::object data,
@@ -59,7 +107,7 @@ PYBIND11_MODULE(_effspm, m) {
59
107
  ::pre_pro = preproc;
60
108
  ::use_dic = use_dic;
61
109
  ::use_list = false;
62
- ::b_disp = verbose;
110
+ ::b_disp = verbose; // controls prints in original code
63
111
  ::b_write = !out_file.empty();
64
112
  ::out_file = out_file;
65
113
 
@@ -69,7 +117,7 @@ PYBIND11_MODULE(_effspm, m) {
69
117
  if (py::isinstance<py::str>(data)) {
70
118
  std::string path = data.cast<std::string>();
71
119
  if (!Load_instance(path, minsup))
72
- throw std::runtime_error("Failed to load file: " + path);
120
+ throw std::runtime_error("PrefixProjection: failed to load file: " + path);
73
121
  } else {
74
122
  auto seqs = data.cast<std::vector<std::vector<int>>>();
75
123
  items = std::move(seqs);
@@ -113,9 +161,355 @@ PYBIND11_MODULE(_effspm, m) {
113
161
  );
114
162
 
115
163
  // ─────────────────────────────────────────────────────────────
116
- // BTMiner
164
+ // BTMiner (always uses professor's Load_instance)
165
+ // ─────────────────────────────────────────────────────────────
166
+ // ─────────────────────────────────────────────────────────────
167
+ // BTMiner (always uses professor's Load_instance)
168
+ // ─────────────────────────────────────────────────────────────
169
+ /*m.def("BTMiner",
170
+ [](py::object data,
171
+ double minsup,
172
+ unsigned int time_limit,
173
+ bool preproc,
174
+ bool use_dic,
175
+ bool verbose,
176
+ const std::string &out_file)
177
+ {
178
+ // 1) Configure professor globals
179
+ btminer::time_limit = static_cast<int>(time_limit);
180
+ btminer::pre_pro = preproc;
181
+ btminer::use_dic = use_dic;
182
+ btminer::b_disp = verbose;
183
+ btminer::b_write = !out_file.empty();
184
+ btminer::out_file = out_file;
185
+ btminer::N_mult = 1;
186
+ btminer::M_mult = 1;
187
+ btminer::just_build = false;
188
+
189
+ // 2) HARD RESET of *known* global state for BTMiner
190
+ // (Only touch what we know exists in btminer namespace)
191
+ btminer::ClearCollected(); // clear collected patterns
192
+ btminer::Tree.clear(); // clear MDD tree
193
+ btminer::DFS.clear(); // clear DFS patterns
194
+
195
+ btminer::M = 0;
196
+ btminer::L = 0;
197
+ btminer::N = 0;
198
+ btminer::theta = 0;
199
+ btminer::E = 0;
200
+ btminer::num_patt = 0; // reset pattern counter if defined
201
+
202
+ // NOTE: we do NOT reinsert root here; btminer::Load_instance()
203
+ // is responsible for calling Tree.emplace_back(0,0,0) as needed.
204
+
205
+ btminer::start_time = std::clock();
206
+
207
+ // 3) Handle input (path or list-of-lists)
208
+ TempFile tmp;
209
+ std::string path;
210
+
211
+ if (py::isinstance<py::str>(data)) {
212
+ // File path: use directly
213
+ path = data.cast<std::string>();
214
+ } else {
215
+ // Python list → write to a temp file in professor’s format
216
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
217
+ tmp.path = write_temp_seq_file(seqs);
218
+ path = tmp.path;
219
+ }
220
+
221
+ if (verbose) {
222
+ std::cerr << "[BTMiner] path=" << path
223
+ << " minsup=" << minsup
224
+ << " preproc=" << preproc
225
+ << " use_dic=" << use_dic
226
+ << std::endl;
227
+ }
228
+
229
+ // 4) Build MDD + run miner
230
+ if (!btminer::Load_instance(path, minsup)) {
231
+ throw std::runtime_error("BTMiner: failed to load instance from: " + path);
232
+ }
233
+
234
+ btminer::Freq_miner();
235
+
236
+ // 5) Return results
237
+ py::dict out;
238
+ out["patterns"] = btminer::GetCollected();
239
+ out["num_patterns"] = btminer::num_patt;
240
+ out["time"] = btminer::give_time(std::clock() - btminer::start_time);
241
+ out["N"] = btminer::N;
242
+ out["L"] = btminer::L;
243
+ out["theta"] = btminer::theta;
244
+ return out;
245
+ },
246
+ py::arg("data"),
247
+ py::arg("minsup") = 0.01,
248
+ py::arg("time_limit") = 36000,
249
+ py::arg("preproc") = false,
250
+ py::arg("use_dic") = false,
251
+ py::arg("verbose") = false,
252
+ py::arg("out_file") = ""
253
+ ); */
254
+ m.def("BTMiner",
255
+ [](py::object data,
256
+ double minsup,
257
+ unsigned int time_limit,
258
+ bool preproc,
259
+ bool use_dic,
260
+ bool verbose,
261
+ const std::string &out_file)
262
+ {
263
+ // 1) Configure professor globals
264
+ btminer::time_limit = static_cast<int>(time_limit);
265
+ btminer::pre_pro = preproc;
266
+ btminer::use_dic = use_dic;
267
+ btminer::b_disp = verbose;
268
+ btminer::b_write = !out_file.empty();
269
+ btminer::out_file = out_file;
270
+ btminer::N_mult = 1;
271
+ btminer::M_mult = 1;
272
+ btminer::just_build = false;
273
+
274
+ // 2) HARD RESET of *known* global state for BTMiner
275
+ btminer::ClearCollected(); // clear collected patterns
276
+ btminer::Tree.clear(); // clear MDD tree
277
+ btminer::DFS.clear(); // clear DFS patterns
278
+
279
+ // clear all frequency / mapping / item structures
280
+ btminer::freq.clear();
281
+ btminer::item_dic.clear();
282
+ btminer::item_map.clear();
283
+ btminer::item_map_rev.clear();
284
+ btminer::items.clear(); // if you have this defined anywhere
285
+
286
+ // reset scalar globals
287
+ btminer::M = 0;
288
+ btminer::L = 0;
289
+ btminer::N = 0;
290
+ btminer::theta = 0;
291
+ btminer::E = 0;
292
+ btminer::num_patt = 0;
293
+ btminer::num_nodes = 0;
294
+ btminer::cur_node = 0;
295
+ // N_mult, M_mult, flags are set just above
296
+
297
+ btminer::start_time = std::clock();
298
+
299
+
300
+ // 3) Handle input (path or list-of-lists)
301
+ TempFile tmp;
302
+ std::string path;
303
+
304
+ if (py::isinstance<py::str>(data)) {
305
+ // File path: use directly
306
+ path = data.cast<std::string>();
307
+ } else {
308
+ // Python list → write to a temp file in professor’s format
309
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
310
+ tmp.path = write_temp_seq_file(seqs);
311
+ path = tmp.path;
312
+ }
313
+
314
+ if (verbose) {
315
+ std::cerr << "[BTMiner] path=" << path
316
+ << " minsup=" << minsup
317
+ << " preproc=" << preproc
318
+ << " use_dic=" << use_dic
319
+ << std::endl;
320
+ }
321
+
322
+ // 4) Build MDD + run miner
323
+ if (!btminer::Load_instance(path, minsup)) {
324
+ throw std::runtime_error("BTMiner: failed to load instance from: " + path);
325
+ }
326
+
327
+ btminer::Freq_miner();
328
+
329
+ // 5) Return results
330
+ py::dict out;
331
+ out["patterns"] = btminer::GetCollected();
332
+ out["num_patterns"] = btminer::num_patt;
333
+ out["time"] = btminer::give_time(std::clock() - btminer::start_time);
334
+ out["N"] = btminer::N;
335
+ out["L"] = btminer::L;
336
+ out["theta"] = btminer::theta;
337
+ return out;
338
+ },
339
+ py::arg("data"),
340
+ py::arg("minsup") = 0.01,
341
+ py::arg("time_limit") = 36000,
342
+ py::arg("preproc") = false,
343
+ py::arg("use_dic") = false,
344
+ py::arg("verbose") = false,
345
+ py::arg("out_file") = ""
346
+ );
347
+
348
+ // ─────────────────────────────────────────────────────────────
349
+ // HTMiner (works on files; we use a temp file for in-memory data)
350
+ // ─────────────────────────────────────────────────────────────
351
+ // ─────────────────────────────────────────────────────────────
352
+ // HTMiner (always uses professor's Load_instance; pre_pro forced ON)
353
+ // ─────────────────────────────────────────────────────────────
354
+ m.def("HTMiner",
355
+ [](py::object data,
356
+ double minsup,
357
+ unsigned int time_limit,
358
+ bool /*preproc*/, // Python arg is ignored internally
359
+ bool use_dic,
360
+ bool verbose,
361
+ const std::string &out_file)
362
+ {
363
+ using namespace htminer;
364
+
365
+ // ───────── Global parameter setup ─────────
366
+ htminer::time_limit = time_limit;
367
+
368
+ // IMPORTANT: always run with preprocessing ON,
369
+ // regardless of the Python `preproc` flag.
370
+ htminer::pre_pro = true;
371
+ htminer::use_dic = use_dic;
372
+ htminer::just_build = false;
373
+ htminer::b_disp = verbose;
374
+ htminer::b_write = !out_file.empty();
375
+ htminer::out_file = out_file;
376
+
377
+ // ───────── HARD RESET of HTMiner globals ─────────
378
+ htminer::ClearCollected();
379
+ htminer::Tree.clear();
380
+ htminer::VTree.clear();
381
+ htminer::CTree.clear();
382
+ htminer::DFS.clear();
383
+ htminer::VDFS.clear();
384
+ htminer::item_dic.clear();
385
+
386
+ htminer::M = 0;
387
+ htminer::N = 0;
388
+ htminer::L = 0;
389
+ htminer::E = 0;
390
+ htminer::theta = 0;
391
+ htminer::mlim = 0;
392
+ htminer::itmset_exists = false;
393
+
394
+ // NOTE: do NOT add a root arc here;
395
+ // htminer::Load_instance() already does Tree.emplace_back(0,0,0)
396
+ htminer::start_time = std::clock();
397
+
398
+ // ───────── Handle input (path or in-memory sequences) ─────────
399
+ TempFile tmp;
400
+ std::string path;
401
+
402
+ if (py::isinstance<py::str>(data)) {
403
+ // data is a file path
404
+ path = data.cast<std::string>();
405
+ } else {
406
+ // data is a list[list[int]] → write a temp file in the same text format
407
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
408
+ tmp.path = write_temp_seq_file(seqs);
409
+ path = tmp.path;
410
+ }
411
+
412
+ if (verbose) {
413
+ std::cerr << "[HTMiner] path=" << path
414
+ << " minsup=" << minsup
415
+ << " preproc(always)=true"
416
+ << " use_dic=" << use_dic
417
+ << std::endl;
418
+ }
419
+
420
+ // ───────── Build MDD via professor's loader ─────────
421
+ if (!htminer::Load_instance(path, minsup)) {
422
+ throw std::runtime_error("HTMiner: failed to load instance from: " + path);
423
+ }
424
+
425
+ // ───────── Run miner ─────────
426
+ htminer::Freq_miner();
427
+
428
+ // ───────── Return results ─────────
429
+ py::dict out;
430
+ out["patterns"] = htminer::GetCollected();
431
+ out["time"] = htminer::give_time(std::clock() - htminer::start_time);
432
+ return out;
433
+ },
434
+ py::arg("data"),
435
+ py::arg("minsup") = 0.01,
436
+ py::arg("time_limit") = 36000,
437
+ py::arg("preproc") = false, // kept for API symmetry, but IGNORED
438
+ py::arg("use_dic") = false,
439
+ py::arg("verbose") = false,
440
+ py::arg("out_file") = ""
441
+ );
442
+
443
+ // ─────────────────────────────────────────────────────────────
444
+ // LargePrefixProjection (already has its own Load_py)
117
445
  // ─────────────────────────────────────────────────────────────
118
- m.def("BTMiner",
446
+ m.def("LargePrefixProjection",
447
+ [](py::object data,
448
+ double minsup,
449
+ unsigned int time_limit,
450
+ bool preproc,
451
+ bool use_dic,
452
+ bool verbose,
453
+ const std::string &out_file)
454
+ {
455
+ // 1) Configure global flags
456
+ largepp::time_limit = time_limit;
457
+ largepp::pre_pro = preproc;
458
+ largepp::use_dic = use_dic;
459
+ largepp::use_list = true; // LargePrefixProjection is list-based
460
+ largepp::b_disp = verbose;
461
+ largepp::b_write = !out_file.empty();
462
+ largepp::out_file = out_file;
463
+ largepp::just_build = false;
464
+
465
+ // 2) HARD RESET of largepp global state
466
+ // (only touch symbols that actually exist in largepp)
467
+ largepp::ClearCollected(); // clear previously collected patterns
468
+
469
+ // If these exist in largepp::load_inst.hpp / utility.hpp they’ll compile;
470
+ // if the compiler complains about any of them, just comment that line out.
471
+ largepp::items.clear(); // transaction DB
472
+ largepp::DFS.clear(); // DFS pattern stack, if list-based miner uses it
473
+
474
+ largepp::M = 0;
475
+ largepp::L = 0;
476
+ largepp::N = 0;
477
+ largepp::theta = 0;
478
+ largepp::E = 0;
479
+ largepp::num_patt = 0;
480
+
481
+ largepp::start_time = std::clock();
482
+
483
+ // 3) Handle input (path or Python list)
484
+ if (py::isinstance<py::str>(data)) {
485
+ std::string fname = data.cast<std::string>();
486
+ largepp::Load_instance(fname, minsup);
487
+ } else {
488
+ largepp::Load_py(data, minsup);
489
+ }
490
+
491
+ // 4) Run miner
492
+ largepp::Freq_miner();
493
+
494
+ // 5) Return results
495
+ py::dict out;
496
+ out["patterns"] = largepp::GetCollected();
497
+ out["time"] = largepp::give_time(std::clock() - largepp::start_time);
498
+ return out;
499
+ },
500
+ py::arg("data"),
501
+ py::arg("minsup") = 0.01,
502
+ py::arg("time_limit") = 36000,
503
+ py::arg("preproc") = false,
504
+ py::arg("use_dic") = false,
505
+ py::arg("verbose") = false,
506
+ py::arg("out_file") = ""
507
+ );
508
+
509
+ // ─────────────────────────────────────────────────────────────
510
+ // LargeBTMiner (always uses professor's largebm::Load_instance)
511
+ // ─────────────────────────────────────────────────────────────
512
+ m.def("LargeBTMiner",
119
513
  [](py::object data,
120
514
  double minsup,
121
515
  unsigned int time_limit,
@@ -124,140 +518,270 @@ PYBIND11_MODULE(_effspm, m) {
124
518
  bool verbose,
125
519
  const std::string &out_file)
126
520
  {
127
- btminer::time_limit = time_limit;
128
- btminer::pre_pro = preproc;
129
- btminer::use_dic = use_dic;
130
- btminer::use_list = false;
131
- btminer::b_disp = verbose;
132
- btminer::b_write = !out_file.empty();
133
- btminer::out_file = out_file;
521
+ using namespace largebm;
522
+
523
+ largebm::time_limit = time_limit;
524
+ largebm::pre_pro = preproc;
525
+ largebm::use_dic = use_dic;
526
+ largebm::use_list = false; // MDD-based
527
+ largebm::b_disp = verbose;
528
+ largebm::b_write = !out_file.empty();
529
+ largebm::out_file = out_file;
530
+ largebm::just_build = false;
531
+
532
+ largebm::ClearCollected();
533
+ largebm::items.clear();
534
+ largebm::item_dic.clear();
535
+ largebm::inv_item_dic.clear();
536
+ largebm::Tree.clear();
537
+ largebm::DFS.clear();
538
+
539
+ largebm::start_time = std::clock();
134
540
 
135
- btminer::ClearCollected();
136
- btminer::start_time = std::clock();
541
+ TempFile tmp;
542
+ std::string path;
137
543
 
138
544
  if (py::isinstance<py::str>(data)) {
139
- std::string path = data.cast<std::string>();
140
- if (!btminer::Load_instance(path, minsup))
141
- throw std::runtime_error("Failed to load file: " + path);
545
+ path = data.cast<std::string>();
142
546
  } else {
143
547
  auto seqs = data.cast<std::vector<std::vector<int>>>();
144
- btminer::items = std::move(seqs);
145
- btminer::N = btminer::items.size();
548
+ tmp.path = write_temp_seq_file(seqs);
549
+ path = tmp.path;
550
+ }
146
551
 
147
- int max_id = 0;
148
- for (auto &seq : btminer::items)
149
- for (int x : seq)
150
- max_id = std::max(max_id, std::abs(x));
151
- btminer::L = max_id;
552
+ if (verbose) {
553
+ std::cerr << "[LargeBTMiner] path=" << path
554
+ << " minsup=" << minsup
555
+ << " preproc=" << preproc
556
+ << " use_dic=" << use_dic
557
+ << std::endl;
558
+ }
152
559
 
153
- btminer::theta = (minsup < 1.0) ? std::ceil(minsup * btminer::N) : minsup;
560
+ if (!largebm::Load_instance(path, minsup)) {
561
+ throw std::runtime_error("LargeBTMiner: failed to load instance from: " + path);
562
+ }
154
563
 
155
- btminer::DFS.clear();
156
- btminer::DFS.reserve(btminer::L);
157
- for (unsigned int i = 0; i < btminer::L; ++i)
158
- btminer::DFS.emplace_back(-static_cast<int>(i) - 1);
564
+ largebm::Freq_miner();
159
565
 
160
- btminer::M = 0;
161
- btminer::E = 0;
162
- for (auto &seq : btminer::items) {
163
- btminer::M = std::max<unsigned int>(btminer::M, seq.size());
164
- btminer::E += seq.size();
165
- }
566
+ py::dict out;
567
+ out["patterns"] = largebm::GetCollected();
568
+ out["time"] = largebm::give_time(std::clock() - largebm::start_time);
569
+ return out;
570
+ },
571
+ py::arg("data"),
572
+ py::arg("minsup") = 0.01,
573
+ py::arg("time_limit") = 36000,
574
+ py::arg("preproc") = false,
575
+ py::arg("use_dic") = false,
576
+ py::arg("verbose") = false,
577
+ py::arg("out_file") = ""
578
+ );
579
+
580
+ // ─────────────────────────────────────────────────────────────
581
+ // LargeHTMiner (always uses professor's largehm::Load_instance; pre_pro forced ON)
582
+ // ─────────────────────────────────────────────────────────────
583
+ // ─────────────────────────────────────────────────────────────
584
+ // LargeHTMiner (professor's Large HTMiner, namespaced as largehm)
585
+ // ─────────────────────────────────────────────────────────────
586
+ m.def("LargeHTMiner",
587
+ [](py::object data,
588
+ double minsup,
589
+ unsigned int time_limit,
590
+ bool /*preproc*/, // kept for API symmetry; ignored
591
+ bool use_dic,
592
+ bool verbose,
593
+ const std::string &out_file)
594
+ {
595
+ using namespace largehm;
596
+
597
+ // 1) Global configuration (mirror professor's style)
598
+ largehm::time_limit = time_limit;
599
+ largehm::pre_pro = true; // always preprocess
600
+ largehm::use_dic = use_dic;
601
+ largehm::just_build = false;
602
+ largehm::b_disp = verbose;
603
+ largehm::b_write = !out_file.empty();
604
+ largehm::out_file = out_file;
605
+
606
+ // 2) HARD RESET of all global state for a fresh run
607
+ largehm::ClearCollected(); // our helper in largehm::utility.cpp
608
+
609
+ largehm::M = 0;
610
+ largehm::L = 0;
611
+ largehm::mlim = 0;
612
+ largehm::N = 0;
613
+ largehm::theta = 0;
614
+ largehm::E = 0;
615
+ largehm::itmset_exists = false;
616
+
617
+ // containers
618
+ // (item_dic reset is optional and not strictly needed here)
619
+ largehm::DFS.clear();
620
+ largehm::VDFS.clear();
621
+ largehm::Tree.clear();
622
+ largehm::VTree.clear();
623
+ largehm::CTree.clear();
624
+
625
+ largehm::start_time = std::clock();
626
+
627
+ // 3) Handle input (file path or Python list)
628
+ TempFile tmp;
629
+ std::string path;
630
+
631
+ if (py::isinstance<py::str>(data)) {
632
+ path = data.cast<std::string>();
633
+ } else {
634
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
635
+ tmp.path = write_temp_seq_file(seqs);
636
+ path = tmp.path;
637
+ }
638
+
639
+ if (verbose) {
640
+ std::cerr << "[LargeHTMiner] path=" << path
641
+ << " minsup=" << minsup
642
+ << " preproc(always)=true"
643
+ << " use_dic=" << use_dic
644
+ << std::endl;
166
645
  }
167
646
 
168
- btminer::Freq_miner();
647
+ // 4) Build MDD / load instance.
648
+ // NOTE: Load_instance() itself does Tree.emplace_back(0,0,0),
649
+ // so we DO NOT create a root node here.
650
+ if (!largehm::Load_instance(path, minsup)) {
651
+ throw std::runtime_error("LargeHTMiner: failed to load instance from: " + path);
652
+ }
169
653
 
654
+ // 5) Run miner (same timing logic as original main)
655
+ if (!largehm::just_build &&
656
+ largehm::give_time(std::clock() - largehm::start_time) < largehm::time_limit)
657
+ {
658
+ largehm::Freq_miner();
659
+ if (largehm::give_time(std::clock() - largehm::start_time) >= largehm::time_limit) {
660
+ std::cout << "TIME LIMIT REACHED\n";
661
+ }
662
+ }
663
+
664
+ // 6) Return collected patterns + runtime
170
665
  py::dict out;
171
- out["patterns"] = btminer::GetCollected();
172
- out["time"] = btminer::give_time(std::clock() - btminer::start_time);
666
+ out["patterns"] = largehm::GetCollected();
667
+ out["time"] = largehm::give_time(std::clock() - largehm::start_time);
173
668
  return out;
174
669
  },
175
670
  py::arg("data"),
176
671
  py::arg("minsup") = 0.01,
177
672
  py::arg("time_limit") = 36000,
178
- py::arg("preproc") = false,
673
+ py::arg("preproc") = false, // kept for API symmetry
179
674
  py::arg("use_dic") = false,
180
675
  py::arg("verbose") = false,
181
676
  py::arg("out_file") = ""
182
677
  );
183
678
 
184
- // ─────────────────────────────────────────────────────────────
185
- // HTMiner
186
- // ─────────────────────────────────────────────────────────────
187
- m.def("HTMiner",
679
+
680
+ }
681
+
682
+
683
+ /*#include <pybind11/pybind11.h>
684
+ #include <pybind11/stl.h>
685
+ namespace py = pybind11;
686
+ #include <iostream>
687
+
688
+
689
+ // PrefixProjection headers
690
+ #include "freq_miner.hpp"
691
+ #include "load_inst.hpp"
692
+ #include "utility.hpp"
693
+
694
+ // BTMiner (wrapped in its own namespace in source files)
695
+ #include "btminer/src/freq_miner.hpp"
696
+ #include "btminer/src/load_inst.hpp"
697
+ #include "btminer/src/utility.hpp"
698
+ #include "btminer/src/build_mdd.hpp"
699
+
700
+ // HTMiner (wrapped in its own namespace in source files)
701
+ #include "htminer/src/build_mdd.hpp" // ← ensure HTMiner MDD builder is available
702
+ #include "htminer/src/freq_miner.hpp"
703
+ #include "htminer/src/load_inst.hpp"
704
+ #include "htminer/src/utility.hpp"
705
+
706
+
707
+ #include "largepp/src/freq_miner.hpp"
708
+ #include "largepp/src/load_inst.hpp"
709
+ #include "largepp/src/utility.hpp"
710
+
711
+
712
+ #include "largebm/src/freq_miner.hpp"
713
+ #include "largebm/src/load_inst.hpp"
714
+ #include "largebm/src/utility.hpp"
715
+ #include "largebm/src/build_mdd.hpp"
716
+
717
+ #include "largehm/src/freq_miner.hpp"
718
+ #include "largehm/src/load_inst.hpp"
719
+ #include "largehm/src/utility.hpp"
720
+ #include "largehm/src/build_mdd.hpp"
721
+
722
+
723
+
724
+ PYBIND11_MODULE(_effspm, m) {
725
+ m.doc() = "Unified SPM library: PrefixProjection, BTMiner, HTMiner";
726
+
727
+ // ─────────────────────────────────────────────────────────────
728
+ // PrefixProjection
729
+ // ─────────────────────────────────────────────────────────────
730
+ m.def("PrefixProjection",
188
731
  [](py::object data,
189
- double minsup, unsigned int time_limit,
190
- bool preproc, bool use_dic,
191
- bool verbose, const std::string &out_file)
732
+ double minsup,
733
+ unsigned int time_limit,
734
+ bool preproc,
735
+ bool use_dic,
736
+ bool verbose,
737
+ const std::string &out_file)
192
738
  {
193
- // 1) set HTMiner globals (declared in htminer/src/utility.hpp)
194
- htminer::time_limit = time_limit;
195
- htminer::pre_pro = preproc;
196
- htminer::use_dic = use_dic;
197
- htminer::just_build = false; // or true if you want “build only”
198
- htminer::use_list = false; // HTMiner always uses MDD‐based mode
199
- htminer::b_disp = verbose;
200
- htminer::b_write = !out_file.empty();
201
- htminer::out_file = out_file;
202
- htminer::ClearCollected(); // clear any leftover patterns
203
- htminer::start_time = std::clock();
204
-
205
- // 2) load sequences (either from filename or from Python list)
739
+ ::time_limit = time_limit;
740
+ ::pre_pro = preproc;
741
+ ::use_dic = use_dic;
742
+ ::use_list = false;
743
+ ::b_disp = verbose;
744
+ ::b_write = !out_file.empty();
745
+ ::out_file = out_file;
746
+
747
+ ClearCollected();
748
+ start_time = std::clock();
749
+
206
750
  if (py::isinstance<py::str>(data)) {
207
751
  std::string path = data.cast<std::string>();
208
- if (!htminer::Load_instance(path, minsup))
752
+ if (!Load_instance(path, minsup))
209
753
  throw std::runtime_error("Failed to load file: " + path);
210
754
  } else {
211
755
  auto seqs = data.cast<std::vector<std::vector<int>>>();
212
- htminer::items = std::move(seqs);
213
- htminer::N = htminer::items.size();
756
+ items = std::move(seqs);
757
+ N = items.size();
214
758
 
215
- // compute L (max item ID), M (max sequence length), E (total entries)
216
759
  int max_id = 0;
217
- htminer::M = 0;
218
- htminer::E = 0;
219
- for (auto &seq : htminer::items) {
220
- htminer::M = std::max<unsigned int>(htminer::M, seq.size());
760
+ for (auto &seq : items)
221
761
  for (int x : seq)
222
762
  max_id = std::max(max_id, std::abs(x));
223
- htminer::E += seq.size();
763
+ L = max_id;
764
+
765
+ theta = (minsup < 1.0) ? std::ceil(minsup * N) : minsup;
766
+
767
+ DFS.clear();
768
+ DFS.reserve(L);
769
+ for (unsigned int i = 0; i < L; ++i)
770
+ DFS.emplace_back(-static_cast<int>(i) - 1);
771
+
772
+ M = 0;
773
+ E = 0;
774
+ for (auto &seq : items) {
775
+ M = std::max<unsigned int>(M, seq.size());
776
+ E += seq.size();
224
777
  }
225
- htminer::L = max_id;
226
- htminer::theta = (minsup < 1.0)
227
- ? static_cast<unsigned long long>(std::ceil(minsup * htminer::N))
228
- : static_cast<unsigned long long>(minsup);
229
-
230
- // build empty DFS stack (size L) as HTMiner expects
231
- htminer::DFS.clear();
232
- htminer::DFS.reserve(htminer::L);
233
- for (unsigned int i = 0; i < static_cast<unsigned int>(htminer::L); ++i)
234
- htminer::DFS.emplace_back(-static_cast<int>(i) - 1);
235
-
236
- // initialize VDFS if HTMiner needs it
237
- htminer::VDFS.clear();
238
- htminer::VDFS.resize(htminer::L);
239
778
  }
240
779
 
241
- // 3) run the mining algorithm
242
- htminer::Freq_miner();
243
-
244
- // std::cout << "[HTMiner] dumping all collected patterns:\n";
245
- // for (size_t i = 0; i < htminer::collectedPatterns.size(); ++i) {
246
- // const auto &seq = htminer::collectedPatterns[i];
247
- // std::cout << "Pattern " << i << ": { ";
248
- // for (int x : seq) {
249
- // std::cout << x << " ";
250
- // }
251
- // std::cout << "}\n";
252
- //}
253
- std::cout << " total patterns = "
254
- << htminer::collectedPatterns.size() << "\n";
255
- // ─────────────────────────────────────────────────
256
-
257
- // 4) return patterns + elapsed time
780
+ Freq_miner();
781
+
258
782
  py::dict out;
259
- out["patterns"] = htminer::GetCollected();
260
- out["time"] = htminer::give_time(std::clock() - htminer::start_time);
783
+ out["patterns"] = GetCollected();
784
+ out["time"] = give_time(std::clock() - start_time);
261
785
  return out;
262
786
  },
263
787
  py::arg("data"),
@@ -268,8 +792,223 @@ std::cout << " total patterns = "
268
792
  py::arg("verbose") = false,
269
793
  py::arg("out_file") = ""
270
794
  );
795
+ m.def("BTMiner",
796
+ [](py::object data,
797
+ double minsup,
798
+ unsigned int time_limit,
799
+ bool preproc,
800
+ bool use_dic,
801
+ bool verbose,
802
+ const std::string &out_file)
803
+ {
804
+ // We are calling the *professor* BTMiner, now namespaced as btminer::.
805
+ // So we only set the globals the professor code actually has.
806
+
807
+ // 1) configure professor globals
808
+ btminer::time_limit = static_cast<int>(time_limit);
809
+ btminer::pre_pro = preproc;
810
+ btminer::use_dic = use_dic;
811
+ btminer::b_disp = verbose;
812
+ btminer::b_write = !out_file.empty();
813
+ btminer::out_file = out_file;
814
+ btminer::N_mult = 1; // professor uses these too
815
+ btminer::M_mult = 1;
816
+ btminer::just_build = false; // we want full mining
817
+
818
+ btminer::start_time = std::clock();
819
+
820
+ // 2) load data
821
+ //
822
+ // Professor’s code is primarily file-based (Load_instance(const string&, double)).
823
+ // So: if user passes a file path → use the professor loader directly.
824
+ // If user passes a Python list-of-lists → we will build the MDD the same
825
+ // way professor’s loader does, but without changing his logic.
826
+ if (py::isinstance<py::str>(data)) {
827
+ // ----- FILE MODE -----
828
+ std::string path = data.cast<std::string>();
829
+
830
+ if (verbose) {
831
+ std::cerr << "[BT][binding] file=" << path
832
+ << " minsup=" << minsup
833
+ << " preproc=" << preproc << std::endl;
834
+ }
835
+
836
+ if (!btminer::Load_instance(path, minsup)) {
837
+ throw std::runtime_error("BTMiner: failed to load file: " + path);
838
+ }
839
+ } else {
840
+ // ----- PYTHON LIST MODE -----
841
+ //
842
+ // We mimic professor’s loader:
843
+ // - create root in Tree
844
+ // - compute N, M, L
845
+ // - compute theta from minsup
846
+ // - seed DFS (one Pattern per item, as in Preprocess branch)
847
+ // - call Build_MDD(...) for each sequence
848
+ //
849
+ // This DOES NOT change his mining logic; it just drives it from memory.
850
+
851
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
852
+
853
+ // clear MDD and globals to a known state
854
+ btminer::Tree.clear();
855
+ btminer::Tree.emplace_back(0, 0, 0); // root (exactly like professor)
856
+
857
+ // compute basic stats
858
+ int max_id = 0;
859
+ int max_len = 0;
860
+ int seq_count = 0;
861
+ long long entries = 0;
862
+
863
+ for (const auto &s : seqs) {
864
+ if (s.empty()) continue;
865
+ ++seq_count;
866
+ max_len = std::max<int>(max_len, static_cast<int>(s.size()));
867
+ for (int x : s) {
868
+ max_id = std::max(max_id, std::abs(x));
869
+ ++entries;
870
+ }
871
+ }
872
+
873
+ btminer::N = seq_count;
874
+ btminer::M = max_len;
875
+ btminer::L = max_id;
876
+ btminer::E = static_cast<int>(entries);
877
+
878
+ // theta = abs support
879
+ if (minsup < 1.0)
880
+ btminer::theta = static_cast<int>(std::ceil(minsup * btminer::N * btminer::N_mult));
881
+ else
882
+ btminer::theta = static_cast<int>(minsup);
883
+
884
+ // seed DFS exactly like professor does in the preprocessed branch:
885
+ btminer::DFS.clear();
886
+ btminer::DFS.reserve(btminer::L);
887
+ for (int i = 0; i < btminer::L; ++i)
888
+ btminer::DFS.emplace_back(-i - 1);
889
+
890
+ // now build the MDD, sequence by sequence
891
+ for (const auto &s : seqs) {
892
+ if (s.empty()) continue;
893
+ // professor’s Build_MDD takes a vector<int> by non-const ref
894
+ std::vector<int> tmp = s;
895
+ btminer::Build_MDD(tmp);
896
+ }
897
+
898
+ if (verbose) {
899
+ std::cerr << "[BT][binding] PY mode: N=" << btminer::N
900
+ << " L=" << btminer::L
901
+ << " M=" << btminer::M
902
+ << " E=" << btminer::E
903
+ << " theta=" << btminer::theta
904
+ << " Tree.size()=" << btminer::Tree.size()
905
+ << std::endl;
906
+ }
907
+ }
908
+
909
+ // 3) run professor’s miner
910
+ btminer::Freq_miner();
911
+
912
+ // 4) build python result
913
+ // 4) build python result
914
+ py::dict out;
915
+ out["patterns"] = btminer::GetCollected(); // ← NEW
916
+ out["num_patterns"] = btminer::num_patt;
917
+ out["time"] = btminer::give_time(std::clock() - btminer::start_time);
918
+ out["N"] = btminer::N;
919
+ out["L"] = btminer::L;
920
+ out["theta"] = btminer::theta;
921
+ return out;
922
+
923
+ },
924
+ py::arg("data"),
925
+ py::arg("minsup") = 0.01,
926
+ py::arg("time_limit") = 36000,
927
+ py::arg("preproc") = false,
928
+ py::arg("use_dic") = false,
929
+ py::arg("verbose") = false,
930
+ py::arg("out_file") = ""
931
+ );
932
+
933
+
934
+
935
+
936
+ // HTMiner
937
+ // ─────────────────────────────────────────────────────────────
938
+ // HTMiner
939
+ m.def("HTMiner",
940
+ [](py::object data,
941
+ double minsup, unsigned int time_limit,
942
+ bool preproc, bool use_dic,
943
+ bool verbose, const std::string &out_file)
944
+ {
945
+ htminer::time_limit = time_limit;
946
+ htminer::pre_pro = preproc;
947
+ htminer::use_dic = use_dic;
948
+ htminer::just_build = false;
949
+ htminer::use_list = false;
950
+ htminer::b_disp = verbose;
951
+ htminer::b_write = !out_file.empty();
952
+ htminer::out_file = out_file;
953
+ htminer::ClearCollected();
954
+ htminer::start_time = std::clock();
955
+
956
+ if (py::isinstance<py::str>(data)) {
957
+ std::string path = data.cast<std::string>();
958
+ if (!htminer::Load_instance(path, minsup))
959
+ throw std::runtime_error("Failed to load file: " + path);
960
+ } else {
961
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
962
+ htminer::items = std::move(seqs);
963
+ htminer::N = htminer::items.size();
964
+
965
+ int max_id = 0;
966
+ htminer::M = 0;
967
+ htminer::E = 0;
968
+ for (auto &seq : htminer::items) {
969
+ htminer::M = std::max<unsigned int>(htminer::M, seq.size());
970
+ for (int x : seq)
971
+ max_id = std::max(max_id, std::abs(x));
972
+ htminer::E += seq.size();
973
+ }
974
+ htminer::L = max_id;
975
+ htminer::theta = (minsup < 1.0)
976
+ ? static_cast<unsigned long long>(std::ceil(minsup * htminer::N))
977
+ : static_cast<unsigned long long>(minsup);
978
+
979
+ htminer::DFS.clear();
980
+ htminer::DFS.reserve(htminer::L);
981
+ for (unsigned int i = 0; i < static_cast<unsigned int>(htminer::L); ++i)
982
+ htminer::DFS.emplace_back(-static_cast<int>(i) - 1);
983
+
984
+ htminer::VDFS.clear();
985
+ htminer::VDFS.resize(htminer::L);
986
+ }
987
+
988
+ htminer::Freq_miner();
989
+
990
+ // 👇 now really respects verbose
991
+ if (verbose) {
992
+ std::cout << " total patterns = "
993
+ << htminer::collectedPatterns.size() << "\n";
994
+ }
995
+
996
+ py::dict out;
997
+ out["patterns"] = htminer::GetCollected();
998
+ out["time"] = htminer::give_time(std::clock() - htminer::start_time);
999
+ return out;
1000
+ },
1001
+ py::arg("data"),
1002
+ py::arg("minsup") = 0.01,
1003
+ py::arg("time_limit") = 36000,
1004
+ py::arg("preproc") = false,
1005
+ py::arg("use_dic") = false,
1006
+ py::arg("verbose") = false,
1007
+ py::arg("out_file") = ""
1008
+ );
1009
+
271
1010
 
272
- m.def("LargePrefixProjection",
1011
+ m.def("LargePrefixProjection",
273
1012
  [](py::object data,
274
1013
  double minsup,
275
1014
  unsigned int time_limit,
@@ -281,28 +1020,30 @@ std::cout << " total patterns = "
281
1020
  largepp::time_limit = time_limit;
282
1021
  largepp::pre_pro = preproc;
283
1022
  largepp::use_dic = use_dic;
284
- largepp::use_list = true; // ← key difference
1023
+ largepp::use_list = true;
285
1024
  largepp::b_disp = verbose;
286
1025
  largepp::b_write = !out_file.empty();
287
1026
  largepp::out_file = out_file;
288
- largepp::just_build = false;
1027
+ largepp::just_build = false;
289
1028
 
290
1029
  largepp::ClearCollected();
291
1030
  largepp::start_time = std::clock();
292
- std::string fname = data.cast<std::string>();
293
- /* 1) load instance (py list or filename) */
294
- if (py::isinstance<py::str>(data))
295
-
296
- largepp::Load_instance(fname, minsup);
297
- else
298
- largepp::Load_py(data, minsup); // helper you’ll expose
299
-
300
- std::vector<unsigned long long> dbg;
301
-
302
-
303
-
304
1031
 
1032
+ // 👇 this was the last noisy one
1033
+ if (verbose) {
1034
+ std::cerr << " minsup=" << minsup
1035
+ << " preproc=" << preproc
1036
+ << " verbose=" << verbose
1037
+ << " out_file=" << (out_file.empty() ? "(none)" : out_file)
1038
+ << " use_dic=" << use_dic << "\n";
1039
+ }
305
1040
 
1041
+ if (py::isinstance<py::str>(data)) {
1042
+ std::string fname = data.cast<std::string>();
1043
+ largepp::Load_instance(fname, minsup);
1044
+ } else {
1045
+ largepp::Load_py(data, minsup);
1046
+ }
306
1047
 
307
1048
  largepp::Freq_miner();
308
1049
 
@@ -320,6 +1061,8 @@ std::cout << " total patterns = "
320
1061
  py::arg("out_file") = ""
321
1062
  );
322
1063
 
1064
+
1065
+
323
1066
  // ─────────────────────────────────────────────────────────────
324
1067
  // LargeBTMiner -- Python wrapper for the largebm implementation
325
1068
  // ─────────────────────────────────────────────────────────────
@@ -404,100 +1147,108 @@ std::cout << " total patterns = "
404
1147
 
405
1148
 
406
1149
 
407
- m.def("LargeBTMiner",
408
- [](py::object data,
409
- double minsup,
410
- unsigned int time_limit,
411
- bool preproc,
412
- bool use_dic,
413
- bool verbose,
414
- const std::string &out_file)
415
- {
416
- // 0) Set global flags and timers
417
- largebm::time_limit = time_limit;
418
- largebm::pre_pro = preproc;
419
- largebm::use_dic = use_dic;
420
- largebm::use_list = false; // large‑mode → always MDD
421
- largebm::b_disp = verbose;
422
- largebm::b_write = !out_file.empty();
423
- largebm::out_file = out_file;
424
- largebm::just_build = false;
1150
+ // ─────────────────────────────────────────────────────────────────────────
1151
+ // LargeBTMiner (MDD-based)
1152
+ // ─────────────────────────────────────────────────────────────────────────
1153
+ /*m.def("LargeBTMiner",
1154
+ [](py::object data,
1155
+ double minsup,
1156
+ unsigned int time_limit,
1157
+ bool preproc,
1158
+ bool use_dic,
1159
+ bool verbose,
1160
+ const std::string &out_file)
1161
+ {
1162
+ using namespace largebm;
1163
+
1164
+ // 0) Set global flags and timers
1165
+ largebm::time_limit = time_limit;
1166
+ largebm::pre_pro = preproc;
1167
+ largebm::use_dic = use_dic;
1168
+ largebm::use_list = false; // large-mode → always MDD
1169
+ largebm::b_disp = verbose;
1170
+ largebm::b_write = !out_file.empty();
1171
+ largebm::out_file = out_file;
1172
+ largebm::just_build = false;
1173
+
1174
+ // 0.1) Clear any leftover data/state from previous runs
1175
+ largebm::items.clear();
1176
+ largebm::item_dic.clear();
1177
+ largebm::inv_item_dic.clear();
1178
+ largebm::Tree.clear();
1179
+ largebm::DFS.clear();
1180
+ largebm::ClearCollected();
1181
+
1182
+ // 1) Load sequences (either from filename or from Python list)
1183
+ if (py::isinstance<py::str>(data)) {
1184
+ // ─────────── FILE-BASED MODE ───────────
1185
+ std::string path = data.cast<std::string>();
1186
+ if (!largebm::Load_instance(path, minsup))
1187
+ throw std::runtime_error("Failed to load file: " + path);
425
1188
 
426
- // 0.1) Clear any leftover data/state from previous runs
427
- largebm::items.clear();
428
- largebm::item_dic.clear();
429
- largebm::inv_item_dic.clear();
430
- largebm::Tree.clear();
431
- largebm::DFS.clear();
432
- largebm::ClearCollected();
1189
+ } else {
1190
+ // ────────── IN-MEMORY MODE ──────────
1191
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
1192
+ largebm::items = std::move(seqs);
1193
+ largebm::N = largebm::items.size();
433
1194
 
434
- // 1) Load sequences (either from filename or from Python list)
435
- if (py::isinstance<py::str>(data)) {
436
- // ─────────── FILE‑BASED MODE ───────────
437
- std::string path = data.cast<std::string>();
438
- if (!largebm::Load_instance(path, minsup))
439
- throw std::runtime_error("Failed to load file: " + path);
1195
+ // 1.1) Compute basic DB statistics (M, E, L) and absolute support θ
1196
+ int max_id = 0;
1197
+ largebm::M = 0;
1198
+ largebm::E = 0;
1199
+ for (auto &seq : largebm::items) {
1200
+ largebm::M = std::max<unsigned int>(largebm::M, static_cast<unsigned int>(seq.size()));
1201
+ largebm::E += static_cast<unsigned long long>(seq.size());
1202
+ for (int x : seq) max_id = std::max(max_id, std::abs(x));
1203
+ }
1204
+ largebm::L = static_cast<unsigned int>(max_id);
1205
+ largebm::theta = (minsup < 1.0)
1206
+ ? static_cast<unsigned long long>(std::ceil(minsup * largebm::N))
1207
+ : static_cast<unsigned long long>(minsup);
440
1208
 
441
- } else {
442
- // ────────── IN‑MEMORY MODE ──────────
443
- auto seqs = data.cast<std::vector<std::vector<int>>>();
444
- largebm::items = std::move(seqs);
445
- largebm::N = largebm::items.size();
1209
+ // 1.2) Initialize DFS buffer (size = L)
1210
+ largebm::DFS.reserve(largebm::L);
1211
+ for (unsigned int i = 0; i < largebm::L; ++i)
1212
+ largebm::DFS.emplace_back(-static_cast<int>(i) - 1);
446
1213
 
447
- // 1.1) Compute basic DB statistics (M, E, L) and absolute support θ
448
- int max_id = 0;
449
- largebm::M = 0;
450
- largebm::E = 0;
451
- for (auto &seq : largebm::items) {
452
- largebm::M = std::max<unsigned int>(largebm::M, static_cast<unsigned int>(seq.size()));
453
- largebm::E += static_cast<unsigned long long>(seq.size());
454
- for (int x : seq) max_id = std::max(max_id, std::abs(x));
455
- }
456
- largebm::L = static_cast<unsigned int>(max_id);
457
- largebm::theta = (minsup < 1.0)
458
- ? static_cast<unsigned long long>(std::ceil(minsup * largebm::N))
459
- : static_cast<unsigned long long>(minsup);
460
-
461
- // 1.2) Initialize DFS buffer (size = L)
462
- largebm::DFS.reserve(largebm::L);
463
- for (unsigned int i = 0; i < largebm::L; ++i)
464
- largebm::DFS.emplace_back(-static_cast<int>(i) - 1);
465
-
466
- // 1.3) Build the MDD “Tree”
467
- // Insert one dummy root node (item=0, freq=0, anct=0)
468
- largebm::Tree.emplace_back(0, 0, 0);
469
- for (auto &seq : largebm::items)
470
- largebm::Build_MDD(const_cast<std::vector<int>&>(seq));
471
- }
1214
+ // 1.3) Build the MDD “Tree”
1215
+ // Insert one dummy root node (item=0, freq=0, anct=0)
1216
+ largebm::Tree.emplace_back(0, 0, 0);
1217
+ for (auto &seq : largebm::items)
1218
+ largebm::Build_MDD(const_cast<std::vector<int>&>(seq));
1219
+ }
472
1220
 
473
- // 2) Rebuild inversedictionary from fresh item_dic
474
- {
475
- std::vector<int> inv(largebm::item_dic.size() + 1);
476
- for (int old = 1; old <= static_cast<int>(largebm::item_dic.size()); ++old) {
477
- int cid = largebm::item_dic[old - 1];
478
- if (cid > 0) inv[cid] = old;
479
- }
480
- largebm::inv_item_dic = std::move(inv);
1221
+ // 2) Rebuild inverse-dictionary from fresh item_dic
1222
+ {
1223
+ std::vector<int> inv(largebm::item_dic.size() + 1);
1224
+ for (int old = 1; old <= static_cast<int>(largebm::item_dic.size()); ++old) {
1225
+ int cid = largebm::item_dic[old - 1];
1226
+ if (cid > 0) inv[cid] = old;
481
1227
  }
1228
+ largebm::inv_item_dic = std::move(inv);
1229
+ }
482
1230
 
483
- // 3) Start timing and run the miner
484
- largebm::start_time = std::clock();
485
- largebm::Freq_miner();
1231
+ // 3) Start timing and run the miner
1232
+ largebm::start_time = std::clock();
1233
+ largebm::Freq_miner();
1234
+
1235
+ // 4) Collect results and elapsed time
1236
+ const auto& pats = largebm::GetCollected();
1237
+
1238
+ py::dict out;
1239
+ out["patterns"] = pats;
1240
+ out["time"] = largebm::give_time(std::clock() - largebm::start_time);
1241
+ return out;
1242
+ },
1243
+ py::arg("data"),
1244
+ py::arg("minsup") = 0.01,
1245
+ py::arg("time_limit") = 36000,
1246
+ py::arg("preproc") = false,
1247
+ py::arg("use_dic") = false,
1248
+ py::arg("verbose") = false,
1249
+ py::arg("out_file") = ""
1250
+ );
486
1251
 
487
- // 4) Collect results and elapsed time
488
- py::dict out;
489
- out["patterns"] = largebm::GetCollected();
490
- out["time"] = largebm::give_time(std::clock() - largebm::start_time);
491
- return out;
492
- },
493
- py::arg("data"),
494
- py::arg("minsup") = 0.01,
495
- py::arg("time_limit") = 36000,
496
- py::arg("preproc") = false,
497
- py::arg("use_dic") = false,
498
- py::arg("verbose") = false,
499
- py::arg("out_file") = ""
500
- );
501
1252
 
502
1253
 
503
1254
  m.def("LargeHTMiner",
@@ -606,4 +1357,4 @@ m.def("LargeHTMiner",
606
1357
 
607
1358
 
608
1359
 
609
- }
1360
+ } */