effspm 0.2.7__cp39-cp39-macosx_11_0_arm64.whl → 0.3.1__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. effspm/_effspm.cpp +818 -200
  2. effspm/_effspm.cpython-39-darwin.so +0 -0
  3. effspm/btminer/src/build_mdd.cpp +42 -17
  4. effspm/btminer/src/build_mdd.hpp +13 -19
  5. effspm/btminer/src/freq_miner.cpp +134 -49
  6. effspm/btminer/src/freq_miner.hpp +16 -0
  7. effspm/btminer/src/load_inst.cpp +202 -126
  8. effspm/btminer/src/load_inst.hpp +22 -4
  9. effspm/btminer/src/main.cpp +83 -0
  10. effspm/btminer/src/utility.cpp +26 -41
  11. effspm/btminer/src/utility.hpp +6 -30
  12. effspm/freq_miner.hpp +2 -1
  13. effspm/htminer/src/build_mdd.cpp +46 -124
  14. effspm/htminer/src/build_mdd.hpp +56 -49
  15. effspm/htminer/src/freq_miner.cpp +341 -307
  16. effspm/htminer/src/freq_miner.hpp +39 -40
  17. effspm/htminer/src/load_inst.cpp +287 -336
  18. effspm/htminer/src/load_inst.hpp +23 -6
  19. effspm/htminer/src/main.cpp +97 -0
  20. effspm/htminer/src/utility.cpp +38 -57
  21. effspm/htminer/src/utility.hpp +9 -64
  22. effspm/largebm/src/build_mdd.cpp +69 -110
  23. effspm/largebm/src/build_mdd.hpp +22 -37
  24. effspm/largebm/src/freq_miner.cpp +241 -291
  25. effspm/largebm/src/freq_miner.hpp +25 -36
  26. effspm/largebm/src/load_inst.cpp +20 -26
  27. effspm/largebm/src/load_inst.hpp +24 -34
  28. effspm/largebm/src/main.cpp +95 -0
  29. effspm/largebm/src/utility.cpp +11 -21
  30. effspm/largebm/src/utility.hpp +7 -10
  31. effspm/largehm/src/build_mdd.cpp +75 -110
  32. effspm/largehm/src/build_mdd.hpp +53 -73
  33. effspm/largehm/src/freq_miner.cpp +134 -191
  34. effspm/largehm/src/freq_miner.hpp +37 -60
  35. effspm/largehm/src/load_inst.cpp +137 -174
  36. effspm/largehm/src/load_inst.hpp +13 -50
  37. effspm/largehm/src/main.cpp +95 -0
  38. effspm/largehm/src/utility.cpp +46 -28
  39. effspm/largehm/src/utility.hpp +18 -16
  40. effspm/largepp/src/freq_miner.cpp +184 -156
  41. effspm/largepp/src/freq_miner.hpp +11 -36
  42. effspm/largepp/src/load_inst.cpp +32 -12
  43. effspm/largepp/src/load_inst.hpp +15 -9
  44. effspm/largepp/src/main.cpp +108 -0
  45. effspm/largepp/src/pattern.hpp +31 -0
  46. effspm/load_inst.cpp +8 -8
  47. effspm/load_inst.hpp +1 -1
  48. effspm/main.cpp +103 -0
  49. {effspm-0.2.7.dist-info → effspm-0.3.1.dist-info}/METADATA +1 -1
  50. effspm-0.3.1.dist-info/RECORD +60 -0
  51. effspm-0.2.7.dist-info/RECORD +0 -53
  52. {effspm-0.2.7.dist-info → effspm-0.3.1.dist-info}/WHEEL +0 -0
  53. {effspm-0.2.7.dist-info → effspm-0.3.1.dist-info}/licenses/LICENSE +0 -0
  54. {effspm-0.2.7.dist-info → effspm-0.3.1.dist-info}/top_level.txt +0 -0
effspm/_effspm.cpp CHANGED
@@ -2,49 +2,97 @@
2
2
 
3
3
  #include <pybind11/pybind11.h>
4
4
  #include <pybind11/stl.h>
5
- namespace py = pybind11;
5
+
6
6
  #include <iostream>
7
+ #include <fstream>
8
+ #include <cstdio> // std::remove
9
+ #include <vector>
10
+ #include <string>
11
+ #include <ctime>
12
+ #include <cmath>
7
13
 
14
+ namespace py = pybind11;
8
15
 
9
- // PrefixProjection headers
16
+ // PrefixProjection headers (global namespace)
10
17
  #include "freq_miner.hpp"
11
18
  #include "load_inst.hpp"
12
19
  #include "utility.hpp"
13
20
 
14
- // BTMiner (wrapped in its own namespace in source files)
21
+ // BTMiner (namespaced)
15
22
  #include "btminer/src/freq_miner.hpp"
16
23
  #include "btminer/src/load_inst.hpp"
17
24
  #include "btminer/src/utility.hpp"
18
25
  #include "btminer/src/build_mdd.hpp"
19
26
 
20
- // HTMiner (wrapped in its own namespace in source files)
21
- #include "htminer/src/build_mdd.hpp" // ← ensure HTMiner MDD builder is available
27
+ // HTMiner (namespaced)
28
+ #include "htminer/src/build_mdd.hpp"
22
29
  #include "htminer/src/freq_miner.hpp"
23
30
  #include "htminer/src/load_inst.hpp"
24
31
  #include "htminer/src/utility.hpp"
25
32
 
26
-
33
+ // LargePrefixProjection
27
34
  #include "largepp/src/freq_miner.hpp"
28
35
  #include "largepp/src/load_inst.hpp"
29
36
  #include "largepp/src/utility.hpp"
30
37
 
38
+ // LargeBTMiner
31
39
  #include "largebm/src/freq_miner.hpp"
32
40
  #include "largebm/src/load_inst.hpp"
33
41
  #include "largebm/src/utility.hpp"
34
42
  #include "largebm/src/build_mdd.hpp"
35
43
 
44
+ // LargeHTMiner
36
45
  #include "largehm/src/freq_miner.hpp"
37
46
  #include "largehm/src/load_inst.hpp"
38
47
  #include "largehm/src/utility.hpp"
39
48
  #include "largehm/src/build_mdd.hpp"
40
49
 
50
+ namespace {
51
+
52
+ // RAII helper for temp file
53
+ struct TempFile {
54
+ std::string path;
55
+ ~TempFile() {
56
+ if (!path.empty()) {
57
+ std::remove(path.c_str());
58
+ }
59
+ }
60
+ };
61
+
62
+ // Write Python list[list[int]] to a temp file in professor’s format:
63
+ // one sequence per line, items separated by spaces.
64
+ std::string write_temp_seq_file(const std::vector<std::vector<int>>& seqs) {
65
+ char tmp_name[L_tmpnam];
66
+ if (!std::tmpnam(tmp_name)) {
67
+ throw std::runtime_error("Failed to create temporary file name");
68
+ }
69
+ std::string path = std::string(tmp_name) + ".txt";
70
+
71
+ std::ofstream ofs(path);
72
+ if (!ofs) {
73
+ throw std::runtime_error("Failed to open temporary file for writing: " + path);
74
+ }
75
+
76
+ for (const auto& seq : seqs) {
77
+ for (size_t i = 0; i < seq.size(); ++i) {
78
+ if (i) ofs << ' ';
79
+ ofs << seq[i];
80
+ }
81
+ ofs << '\n';
82
+ }
83
+
84
+ ofs.close();
85
+ return path;
86
+ }
87
+
88
+ } // anonymous namespace
41
89
 
42
90
 
43
91
  PYBIND11_MODULE(_effspm, m) {
44
- m.doc() = "Unified SPM library: PrefixProjection, BTMiner, HTMiner";
92
+ m.doc() = "Unified SPM library: PrefixProjection, BTMiner, HTMiner, Large* variants";
45
93
 
46
94
  // ─────────────────────────────────────────────────────────────
47
- // PrefixProjection
95
+ // PrefixProjection (works directly on Python lists or files)
48
96
  // ─────────────────────────────────────────────────────────────
49
97
  m.def("PrefixProjection",
50
98
  [](py::object data,
@@ -59,7 +107,7 @@ PYBIND11_MODULE(_effspm, m) {
59
107
  ::pre_pro = preproc;
60
108
  ::use_dic = use_dic;
61
109
  ::use_list = false;
62
- ::b_disp = verbose;
110
+ ::b_disp = verbose; // controls prints in original code
63
111
  ::b_write = !out_file.empty();
64
112
  ::out_file = out_file;
65
113
 
@@ -69,7 +117,7 @@ PYBIND11_MODULE(_effspm, m) {
69
117
  if (py::isinstance<py::str>(data)) {
70
118
  std::string path = data.cast<std::string>();
71
119
  if (!Load_instance(path, minsup))
72
- throw std::runtime_error("Failed to load file: " + path);
120
+ throw std::runtime_error("PrefixProjection: failed to load file: " + path);
73
121
  } else {
74
122
  auto seqs = data.cast<std::vector<std::vector<int>>>();
75
123
  items = std::move(seqs);
@@ -113,7 +161,7 @@ PYBIND11_MODULE(_effspm, m) {
113
161
  );
114
162
 
115
163
  // ─────────────────────────────────────────────────────────────
116
- // BTMiner
164
+ // BTMiner (always uses professor's Load_instance)
117
165
  // ─────────────────────────────────────────────────────────────
118
166
  m.def("BTMiner",
119
167
  [](py::object data,
@@ -124,52 +172,54 @@ PYBIND11_MODULE(_effspm, m) {
124
172
  bool verbose,
125
173
  const std::string &out_file)
126
174
  {
127
- btminer::time_limit = time_limit;
175
+ // Configure professor globals
176
+ btminer::time_limit = static_cast<int>(time_limit);
128
177
  btminer::pre_pro = preproc;
129
178
  btminer::use_dic = use_dic;
130
- btminer::use_list = false;
131
179
  btminer::b_disp = verbose;
132
180
  btminer::b_write = !out_file.empty();
133
181
  btminer::out_file = out_file;
182
+ btminer::N_mult = 1;
183
+ btminer::M_mult = 1;
184
+ btminer::just_build = false;
134
185
 
135
186
  btminer::ClearCollected();
136
187
  btminer::start_time = std::clock();
137
188
 
189
+ TempFile tmp;
190
+ std::string path;
191
+
138
192
  if (py::isinstance<py::str>(data)) {
139
- std::string path = data.cast<std::string>();
140
- if (!btminer::Load_instance(path, minsup))
141
- throw std::runtime_error("Failed to load file: " + path);
193
+ // File path: use directly
194
+ path = data.cast<std::string>();
142
195
  } else {
196
+ // Python list → write to a temp file in the same format
143
197
  auto seqs = data.cast<std::vector<std::vector<int>>>();
144
- btminer::items = std::move(seqs);
145
- btminer::N = btminer::items.size();
146
-
147
- int max_id = 0;
148
- for (auto &seq : btminer::items)
149
- for (int x : seq)
150
- max_id = std::max(max_id, std::abs(x));
151
- btminer::L = max_id;
152
-
153
- btminer::theta = (minsup < 1.0) ? std::ceil(minsup * btminer::N) : minsup;
198
+ tmp.path = write_temp_seq_file(seqs);
199
+ path = tmp.path;
200
+ }
154
201
 
155
- btminer::DFS.clear();
156
- btminer::DFS.reserve(btminer::L);
157
- for (unsigned int i = 0; i < btminer::L; ++i)
158
- btminer::DFS.emplace_back(-static_cast<int>(i) - 1);
202
+ if (verbose) {
203
+ std::cerr << "[BTMiner] path=" << path
204
+ << " minsup=" << minsup
205
+ << " preproc=" << preproc
206
+ << " use_dic=" << use_dic
207
+ << std::endl;
208
+ }
159
209
 
160
- btminer::M = 0;
161
- btminer::E = 0;
162
- for (auto &seq : btminer::items) {
163
- btminer::M = std::max<unsigned int>(btminer::M, seq.size());
164
- btminer::E += seq.size();
165
- }
210
+ if (!btminer::Load_instance(path, minsup)) {
211
+ throw std::runtime_error("BTMiner: failed to load instance from: " + path);
166
212
  }
167
213
 
168
214
  btminer::Freq_miner();
169
215
 
170
216
  py::dict out;
171
- out["patterns"] = btminer::GetCollected();
172
- out["time"] = btminer::give_time(std::clock() - btminer::start_time);
217
+ out["patterns"] = btminer::GetCollected();
218
+ out["num_patterns"] = btminer::num_patt;
219
+ out["time"] = btminer::give_time(std::clock() - btminer::start_time);
220
+ out["N"] = btminer::N;
221
+ out["L"] = btminer::L;
222
+ out["theta"] = btminer::theta;
173
223
  return out;
174
224
  },
175
225
  py::arg("data"),
@@ -181,83 +231,424 @@ PYBIND11_MODULE(_effspm, m) {
181
231
  py::arg("out_file") = ""
182
232
  );
183
233
 
184
- // ─────────────────────────────────────────────────────────────
185
- // HTMiner
234
+ // ─────────────────────────────────────────────────────────────
235
+ // HTMiner (works on files; we use a temp file for in-memory data)
236
+ // ─────────────────────────────────────────────────────────────
237
+ // ─────────────────────────────────────────────────────────────
238
+ // HTMiner (always uses professor's Load_instance; pre_pro forced ON)
186
239
  // ─────────────────────────────────────────────────────────────
187
240
  m.def("HTMiner",
241
+ [](py::object data,
242
+ double minsup,
243
+ unsigned int time_limit,
244
+ bool /*preproc*/, // Python arg is ignored internally
245
+ bool use_dic,
246
+ bool verbose,
247
+ const std::string &out_file)
248
+ {
249
+ using namespace htminer;
250
+
251
+ // ───────── Global parameter setup ─────────
252
+ htminer::time_limit = time_limit;
253
+
254
+ // IMPORTANT: always run with preprocessing ON,
255
+ // regardless of the Python `preproc` flag.
256
+ htminer::pre_pro = true;
257
+ htminer::use_dic = use_dic;
258
+ htminer::just_build = false;
259
+ htminer::b_disp = verbose;
260
+ htminer::b_write = !out_file.empty();
261
+ htminer::out_file = out_file;
262
+
263
+ // ───────── HARD RESET of HTMiner globals ─────────
264
+ htminer::ClearCollected();
265
+ htminer::Tree.clear();
266
+ htminer::VTree.clear();
267
+ htminer::CTree.clear();
268
+ htminer::DFS.clear();
269
+ htminer::VDFS.clear();
270
+ htminer::item_dic.clear();
271
+
272
+ htminer::M = 0;
273
+ htminer::N = 0;
274
+ htminer::L = 0;
275
+ htminer::E = 0;
276
+ htminer::theta = 0;
277
+ htminer::mlim = 0;
278
+ htminer::itmset_exists = false;
279
+
280
+ // NOTE: do NOT add a root arc here;
281
+ // htminer::Load_instance() already does Tree.emplace_back(0,0,0)
282
+ htminer::start_time = std::clock();
283
+
284
+ // ───────── Handle input (path or in-memory sequences) ─────────
285
+ TempFile tmp;
286
+ std::string path;
287
+
288
+ if (py::isinstance<py::str>(data)) {
289
+ // data is a file path
290
+ path = data.cast<std::string>();
291
+ } else {
292
+ // data is a list[list[int]] → write a temp file in the same text format
293
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
294
+ tmp.path = write_temp_seq_file(seqs);
295
+ path = tmp.path;
296
+ }
297
+
298
+ if (verbose) {
299
+ std::cerr << "[HTMiner] path=" << path
300
+ << " minsup=" << minsup
301
+ << " preproc(always)=true"
302
+ << " use_dic=" << use_dic
303
+ << std::endl;
304
+ }
305
+
306
+ // ───────── Build MDD via professor's loader ─────────
307
+ if (!htminer::Load_instance(path, minsup)) {
308
+ throw std::runtime_error("HTMiner: failed to load instance from: " + path);
309
+ }
310
+
311
+ // ───────── Run miner ─────────
312
+ htminer::Freq_miner();
313
+
314
+ // ───────── Return results ─────────
315
+ py::dict out;
316
+ out["patterns"] = htminer::GetCollected();
317
+ out["time"] = htminer::give_time(std::clock() - htminer::start_time);
318
+ return out;
319
+ },
320
+ py::arg("data"),
321
+ py::arg("minsup") = 0.01,
322
+ py::arg("time_limit") = 36000,
323
+ py::arg("preproc") = false, // kept for API symmetry, but IGNORED
324
+ py::arg("use_dic") = false,
325
+ py::arg("verbose") = false,
326
+ py::arg("out_file") = ""
327
+ );
328
+
329
+ // ─────────────────────────────────────────────────────────────
330
+ // LargePrefixProjection (already has its own Load_py)
331
+ // ─────────────────────────────────────────────────────────────
332
+ m.def("LargePrefixProjection",
188
333
  [](py::object data,
189
- double minsup, unsigned int time_limit,
190
- bool preproc, bool use_dic,
191
- bool verbose, const std::string &out_file)
334
+ double minsup,
335
+ unsigned int time_limit,
336
+ bool preproc,
337
+ bool use_dic,
338
+ bool verbose,
339
+ const std::string &out_file)
192
340
  {
193
- // 1) set HTMiner globals (declared in htminer/src/utility.hpp)
194
- htminer::time_limit = time_limit;
195
- htminer::pre_pro = preproc;
196
- htminer::use_dic = use_dic;
197
- htminer::just_build = false; // or true if you want “build only”
198
- htminer::use_list = false; // HTMiner always uses MDD‐based mode
199
- htminer::b_disp = verbose;
200
- htminer::b_write = !out_file.empty();
201
- htminer::out_file = out_file;
202
- htminer::ClearCollected(); // clear any leftover patterns
203
- htminer::start_time = std::clock();
204
-
205
- // 2) load sequences (either from filename or from Python list)
341
+ largepp::time_limit = time_limit;
342
+ largepp::pre_pro = preproc;
343
+ largepp::use_dic = use_dic;
344
+ largepp::use_list = true; // large prefix uses list-based mining
345
+ largepp::b_disp = verbose;
346
+ largepp::b_write = !out_file.empty();
347
+ largepp::out_file = out_file;
348
+ largepp::just_build = false;
349
+
350
+ largepp::ClearCollected();
351
+ largepp::start_time = std::clock();
352
+
353
+ if (py::isinstance<py::str>(data)) {
354
+ std::string fname = data.cast<std::string>();
355
+ largepp::Load_instance(fname, minsup);
356
+ } else {
357
+ largepp::Load_py(data, minsup);
358
+ }
359
+
360
+ largepp::Freq_miner();
361
+
362
+ py::dict out;
363
+ out["patterns"] = largepp::GetCollected();
364
+ out["time"] = largepp::give_time(std::clock() - largepp::start_time);
365
+ return out;
366
+ },
367
+ py::arg("data"),
368
+ py::arg("minsup") = 0.01,
369
+ py::arg("time_limit") = 36000,
370
+ py::arg("preproc") = false,
371
+ py::arg("use_dic") = false,
372
+ py::arg("verbose") = false,
373
+ py::arg("out_file") = ""
374
+ );
375
+
376
+ // ─────────────────────────────────────────────────────────────
377
+ // LargeBTMiner (always uses professor's largebm::Load_instance)
378
+ // ─────────────────────────────────────────────────────────────
379
+ m.def("LargeBTMiner",
380
+ [](py::object data,
381
+ double minsup,
382
+ unsigned int time_limit,
383
+ bool preproc,
384
+ bool use_dic,
385
+ bool verbose,
386
+ const std::string &out_file)
387
+ {
388
+ using namespace largebm;
389
+
390
+ largebm::time_limit = time_limit;
391
+ largebm::pre_pro = preproc;
392
+ largebm::use_dic = use_dic;
393
+ largebm::use_list = false; // MDD-based
394
+ largebm::b_disp = verbose;
395
+ largebm::b_write = !out_file.empty();
396
+ largebm::out_file = out_file;
397
+ largebm::just_build = false;
398
+
399
+ largebm::ClearCollected();
400
+ largebm::items.clear();
401
+ largebm::item_dic.clear();
402
+ largebm::inv_item_dic.clear();
403
+ largebm::Tree.clear();
404
+ largebm::DFS.clear();
405
+
406
+ largebm::start_time = std::clock();
407
+
408
+ TempFile tmp;
409
+ std::string path;
410
+
411
+ if (py::isinstance<py::str>(data)) {
412
+ path = data.cast<std::string>();
413
+ } else {
414
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
415
+ tmp.path = write_temp_seq_file(seqs);
416
+ path = tmp.path;
417
+ }
418
+
419
+ if (verbose) {
420
+ std::cerr << "[LargeBTMiner] path=" << path
421
+ << " minsup=" << minsup
422
+ << " preproc=" << preproc
423
+ << " use_dic=" << use_dic
424
+ << std::endl;
425
+ }
426
+
427
+ if (!largebm::Load_instance(path, minsup)) {
428
+ throw std::runtime_error("LargeBTMiner: failed to load instance from: " + path);
429
+ }
430
+
431
+ largebm::Freq_miner();
432
+
433
+ py::dict out;
434
+ out["patterns"] = largebm::GetCollected();
435
+ out["time"] = largebm::give_time(std::clock() - largebm::start_time);
436
+ return out;
437
+ },
438
+ py::arg("data"),
439
+ py::arg("minsup") = 0.01,
440
+ py::arg("time_limit") = 36000,
441
+ py::arg("preproc") = false,
442
+ py::arg("use_dic") = false,
443
+ py::arg("verbose") = false,
444
+ py::arg("out_file") = ""
445
+ );
446
+
447
+ // ─────────────────────────────────────────────────────────────
448
+ // LargeHTMiner (always uses professor's largehm::Load_instance; pre_pro forced ON)
449
+ // ─────────────────────────────────────────────────────────────
450
+ // ─────────────────────────────────────────────────────────────
451
+ // LargeHTMiner (professor's Large HTMiner, namespaced as largehm)
452
+ // ─────────────────────────────────────────────────────────────
453
+ m.def("LargeHTMiner",
454
+ [](py::object data,
455
+ double minsup,
456
+ unsigned int time_limit,
457
+ bool /*preproc*/, // kept for API symmetry; ignored
458
+ bool use_dic,
459
+ bool verbose,
460
+ const std::string &out_file)
461
+ {
462
+ using namespace largehm;
463
+
464
+ // 1) Global configuration (mirror professor's style)
465
+ largehm::time_limit = time_limit;
466
+ largehm::pre_pro = true; // always preprocess
467
+ largehm::use_dic = use_dic;
468
+ largehm::just_build = false;
469
+ largehm::b_disp = verbose;
470
+ largehm::b_write = !out_file.empty();
471
+ largehm::out_file = out_file;
472
+
473
+ // 2) HARD RESET of all global state for a fresh run
474
+ largehm::ClearCollected(); // our helper in largehm::utility.cpp
475
+
476
+ largehm::M = 0;
477
+ largehm::L = 0;
478
+ largehm::mlim = 0;
479
+ largehm::N = 0;
480
+ largehm::theta = 0;
481
+ largehm::E = 0;
482
+ largehm::itmset_exists = false;
483
+
484
+ // containers
485
+ // (item_dic reset is optional and not strictly needed here)
486
+ largehm::DFS.clear();
487
+ largehm::VDFS.clear();
488
+ largehm::Tree.clear();
489
+ largehm::VTree.clear();
490
+ largehm::CTree.clear();
491
+
492
+ largehm::start_time = std::clock();
493
+
494
+ // 3) Handle input (file path or Python list)
495
+ TempFile tmp;
496
+ std::string path;
497
+
498
+ if (py::isinstance<py::str>(data)) {
499
+ path = data.cast<std::string>();
500
+ } else {
501
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
502
+ tmp.path = write_temp_seq_file(seqs);
503
+ path = tmp.path;
504
+ }
505
+
506
+ if (verbose) {
507
+ std::cerr << "[LargeHTMiner] path=" << path
508
+ << " minsup=" << minsup
509
+ << " preproc(always)=true"
510
+ << " use_dic=" << use_dic
511
+ << std::endl;
512
+ }
513
+
514
+ // 4) Build MDD / load instance.
515
+ // NOTE: Load_instance() itself does Tree.emplace_back(0,0,0),
516
+ // so we DO NOT create a root node here.
517
+ if (!largehm::Load_instance(path, minsup)) {
518
+ throw std::runtime_error("LargeHTMiner: failed to load instance from: " + path);
519
+ }
520
+
521
+ // 5) Run miner (same timing logic as original main)
522
+ if (!largehm::just_build &&
523
+ largehm::give_time(std::clock() - largehm::start_time) < largehm::time_limit)
524
+ {
525
+ largehm::Freq_miner();
526
+ if (largehm::give_time(std::clock() - largehm::start_time) >= largehm::time_limit) {
527
+ std::cout << "TIME LIMIT REACHED\n";
528
+ }
529
+ }
530
+
531
+ // 6) Return collected patterns + runtime
532
+ py::dict out;
533
+ out["patterns"] = largehm::GetCollected();
534
+ out["time"] = largehm::give_time(std::clock() - largehm::start_time);
535
+ return out;
536
+ },
537
+ py::arg("data"),
538
+ py::arg("minsup") = 0.01,
539
+ py::arg("time_limit") = 36000,
540
+ py::arg("preproc") = false, // kept for API symmetry
541
+ py::arg("use_dic") = false,
542
+ py::arg("verbose") = false,
543
+ py::arg("out_file") = ""
544
+ );
545
+
546
+
547
+ }
548
+
549
+
550
+ /*#include <pybind11/pybind11.h>
551
+ #include <pybind11/stl.h>
552
+ namespace py = pybind11;
553
+ #include <iostream>
554
+
555
+
556
+ // PrefixProjection headers
557
+ #include "freq_miner.hpp"
558
+ #include "load_inst.hpp"
559
+ #include "utility.hpp"
560
+
561
+ // BTMiner (wrapped in its own namespace in source files)
562
+ #include "btminer/src/freq_miner.hpp"
563
+ #include "btminer/src/load_inst.hpp"
564
+ #include "btminer/src/utility.hpp"
565
+ #include "btminer/src/build_mdd.hpp"
566
+
567
+ // HTMiner (wrapped in its own namespace in source files)
568
+ #include "htminer/src/build_mdd.hpp" // ← ensure HTMiner MDD builder is available
569
+ #include "htminer/src/freq_miner.hpp"
570
+ #include "htminer/src/load_inst.hpp"
571
+ #include "htminer/src/utility.hpp"
572
+
573
+
574
+ #include "largepp/src/freq_miner.hpp"
575
+ #include "largepp/src/load_inst.hpp"
576
+ #include "largepp/src/utility.hpp"
577
+
578
+
579
+ #include "largebm/src/freq_miner.hpp"
580
+ #include "largebm/src/load_inst.hpp"
581
+ #include "largebm/src/utility.hpp"
582
+ #include "largebm/src/build_mdd.hpp"
583
+
584
+ #include "largehm/src/freq_miner.hpp"
585
+ #include "largehm/src/load_inst.hpp"
586
+ #include "largehm/src/utility.hpp"
587
+ #include "largehm/src/build_mdd.hpp"
588
+
589
+
590
+
591
+ PYBIND11_MODULE(_effspm, m) {
592
+ m.doc() = "Unified SPM library: PrefixProjection, BTMiner, HTMiner";
593
+
594
+ // ─────────────────────────────────────────────────────────────
595
+ // PrefixProjection
596
+ // ─────────────────────────────────────────────────────────────
597
+ m.def("PrefixProjection",
598
+ [](py::object data,
599
+ double minsup,
600
+ unsigned int time_limit,
601
+ bool preproc,
602
+ bool use_dic,
603
+ bool verbose,
604
+ const std::string &out_file)
605
+ {
606
+ ::time_limit = time_limit;
607
+ ::pre_pro = preproc;
608
+ ::use_dic = use_dic;
609
+ ::use_list = false;
610
+ ::b_disp = verbose;
611
+ ::b_write = !out_file.empty();
612
+ ::out_file = out_file;
613
+
614
+ ClearCollected();
615
+ start_time = std::clock();
616
+
206
617
  if (py::isinstance<py::str>(data)) {
207
618
  std::string path = data.cast<std::string>();
208
- if (!htminer::Load_instance(path, minsup))
619
+ if (!Load_instance(path, minsup))
209
620
  throw std::runtime_error("Failed to load file: " + path);
210
621
  } else {
211
622
  auto seqs = data.cast<std::vector<std::vector<int>>>();
212
- htminer::items = std::move(seqs);
213
- htminer::N = htminer::items.size();
623
+ items = std::move(seqs);
624
+ N = items.size();
214
625
 
215
- // compute L (max item ID), M (max sequence length), E (total entries)
216
626
  int max_id = 0;
217
- htminer::M = 0;
218
- htminer::E = 0;
219
- for (auto &seq : htminer::items) {
220
- htminer::M = std::max<unsigned int>(htminer::M, seq.size());
627
+ for (auto &seq : items)
221
628
  for (int x : seq)
222
629
  max_id = std::max(max_id, std::abs(x));
223
- htminer::E += seq.size();
630
+ L = max_id;
631
+
632
+ theta = (minsup < 1.0) ? std::ceil(minsup * N) : minsup;
633
+
634
+ DFS.clear();
635
+ DFS.reserve(L);
636
+ for (unsigned int i = 0; i < L; ++i)
637
+ DFS.emplace_back(-static_cast<int>(i) - 1);
638
+
639
+ M = 0;
640
+ E = 0;
641
+ for (auto &seq : items) {
642
+ M = std::max<unsigned int>(M, seq.size());
643
+ E += seq.size();
224
644
  }
225
- htminer::L = max_id;
226
- htminer::theta = (minsup < 1.0)
227
- ? static_cast<unsigned long long>(std::ceil(minsup * htminer::N))
228
- : static_cast<unsigned long long>(minsup);
229
-
230
- // build empty DFS stack (size L) as HTMiner expects
231
- htminer::DFS.clear();
232
- htminer::DFS.reserve(htminer::L);
233
- for (unsigned int i = 0; i < static_cast<unsigned int>(htminer::L); ++i)
234
- htminer::DFS.emplace_back(-static_cast<int>(i) - 1);
235
-
236
- // initialize VDFS if HTMiner needs it
237
- htminer::VDFS.clear();
238
- htminer::VDFS.resize(htminer::L);
239
645
  }
240
646
 
241
- // 3) run the mining algorithm
242
- htminer::Freq_miner();
243
-
244
- // std::cout << "[HTMiner] dumping all collected patterns:\n";
245
- // for (size_t i = 0; i < htminer::collectedPatterns.size(); ++i) {
246
- // const auto &seq = htminer::collectedPatterns[i];
247
- // std::cout << "Pattern " << i << ": { ";
248
- // for (int x : seq) {
249
- // std::cout << x << " ";
250
- // }
251
- // std::cout << "}\n";
252
- //}
253
- std::cout << " total patterns = "
254
- << htminer::collectedPatterns.size() << "\n";
255
- // ─────────────────────────────────────────────────
256
-
257
- // 4) return patterns + elapsed time
647
+ Freq_miner();
648
+
258
649
  py::dict out;
259
- out["patterns"] = htminer::GetCollected();
260
- out["time"] = htminer::give_time(std::clock() - htminer::start_time);
650
+ out["patterns"] = GetCollected();
651
+ out["time"] = give_time(std::clock() - start_time);
261
652
  return out;
262
653
  },
263
654
  py::arg("data"),
@@ -268,8 +659,223 @@ std::cout << " total patterns = "
268
659
  py::arg("verbose") = false,
269
660
  py::arg("out_file") = ""
270
661
  );
662
+ m.def("BTMiner",
663
+ [](py::object data,
664
+ double minsup,
665
+ unsigned int time_limit,
666
+ bool preproc,
667
+ bool use_dic,
668
+ bool verbose,
669
+ const std::string &out_file)
670
+ {
671
+ // We are calling the *professor* BTMiner, now namespaced as btminer::.
672
+ // So we only set the globals the professor code actually has.
673
+
674
+ // 1) configure professor globals
675
+ btminer::time_limit = static_cast<int>(time_limit);
676
+ btminer::pre_pro = preproc;
677
+ btminer::use_dic = use_dic;
678
+ btminer::b_disp = verbose;
679
+ btminer::b_write = !out_file.empty();
680
+ btminer::out_file = out_file;
681
+ btminer::N_mult = 1; // professor uses these too
682
+ btminer::M_mult = 1;
683
+ btminer::just_build = false; // we want full mining
684
+
685
+ btminer::start_time = std::clock();
686
+
687
+ // 2) load data
688
+ //
689
+ // Professor’s code is primarily file-based (Load_instance(const string&, double)).
690
+ // So: if user passes a file path → use the professor loader directly.
691
+ // If user passes a Python list-of-lists → we will build the MDD the same
692
+ // way professor’s loader does, but without changing his logic.
693
+ if (py::isinstance<py::str>(data)) {
694
+ // ----- FILE MODE -----
695
+ std::string path = data.cast<std::string>();
271
696
 
272
- m.def("LargePrefixProjection",
697
+ if (verbose) {
698
+ std::cerr << "[BT][binding] file=" << path
699
+ << " minsup=" << minsup
700
+ << " preproc=" << preproc << std::endl;
701
+ }
702
+
703
+ if (!btminer::Load_instance(path, minsup)) {
704
+ throw std::runtime_error("BTMiner: failed to load file: " + path);
705
+ }
706
+ } else {
707
+ // ----- PYTHON LIST MODE -----
708
+ //
709
+ // We mimic professor’s loader:
710
+ // - create root in Tree
711
+ // - compute N, M, L
712
+ // - compute theta from minsup
713
+ // - seed DFS (one Pattern per item, as in Preprocess branch)
714
+ // - call Build_MDD(...) for each sequence
715
+ //
716
+ // This DOES NOT change his mining logic; it just drives it from memory.
717
+
718
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
719
+
720
+ // clear MDD and globals to a known state
721
+ btminer::Tree.clear();
722
+ btminer::Tree.emplace_back(0, 0, 0); // root (exactly like professor)
723
+
724
+ // compute basic stats
725
+ int max_id = 0;
726
+ int max_len = 0;
727
+ int seq_count = 0;
728
+ long long entries = 0;
729
+
730
+ for (const auto &s : seqs) {
731
+ if (s.empty()) continue;
732
+ ++seq_count;
733
+ max_len = std::max<int>(max_len, static_cast<int>(s.size()));
734
+ for (int x : s) {
735
+ max_id = std::max(max_id, std::abs(x));
736
+ ++entries;
737
+ }
738
+ }
739
+
740
+ btminer::N = seq_count;
741
+ btminer::M = max_len;
742
+ btminer::L = max_id;
743
+ btminer::E = static_cast<int>(entries);
744
+
745
+ // theta = abs support
746
+ if (minsup < 1.0)
747
+ btminer::theta = static_cast<int>(std::ceil(minsup * btminer::N * btminer::N_mult));
748
+ else
749
+ btminer::theta = static_cast<int>(minsup);
750
+
751
+ // seed DFS exactly like professor does in the preprocessed branch:
752
+ btminer::DFS.clear();
753
+ btminer::DFS.reserve(btminer::L);
754
+ for (int i = 0; i < btminer::L; ++i)
755
+ btminer::DFS.emplace_back(-i - 1);
756
+
757
+ // now build the MDD, sequence by sequence
758
+ for (const auto &s : seqs) {
759
+ if (s.empty()) continue;
760
+ // professor’s Build_MDD takes a vector<int> by non-const ref
761
+ std::vector<int> tmp = s;
762
+ btminer::Build_MDD(tmp);
763
+ }
764
+
765
+ if (verbose) {
766
+ std::cerr << "[BT][binding] PY mode: N=" << btminer::N
767
+ << " L=" << btminer::L
768
+ << " M=" << btminer::M
769
+ << " E=" << btminer::E
770
+ << " theta=" << btminer::theta
771
+ << " Tree.size()=" << btminer::Tree.size()
772
+ << std::endl;
773
+ }
774
+ }
775
+
776
+ // 3) run professor’s miner
777
+ btminer::Freq_miner();
778
+
779
+ // 4) build python result
780
+ // 4) build python result
781
+ py::dict out;
782
+ out["patterns"] = btminer::GetCollected(); // ← NEW
783
+ out["num_patterns"] = btminer::num_patt;
784
+ out["time"] = btminer::give_time(std::clock() - btminer::start_time);
785
+ out["N"] = btminer::N;
786
+ out["L"] = btminer::L;
787
+ out["theta"] = btminer::theta;
788
+ return out;
789
+
790
+ },
791
+ py::arg("data"),
792
+ py::arg("minsup") = 0.01,
793
+ py::arg("time_limit") = 36000,
794
+ py::arg("preproc") = false,
795
+ py::arg("use_dic") = false,
796
+ py::arg("verbose") = false,
797
+ py::arg("out_file") = ""
798
+ );
799
+
800
+
801
+
802
+
803
+ // HTMiner
804
+ // ─────────────────────────────────────────────────────────────
805
+ // HTMiner
806
+ m.def("HTMiner",
807
+ [](py::object data,
808
+ double minsup, unsigned int time_limit,
809
+ bool preproc, bool use_dic,
810
+ bool verbose, const std::string &out_file)
811
+ {
812
+ htminer::time_limit = time_limit;
813
+ htminer::pre_pro = preproc;
814
+ htminer::use_dic = use_dic;
815
+ htminer::just_build = false;
816
+ htminer::use_list = false;
817
+ htminer::b_disp = verbose;
818
+ htminer::b_write = !out_file.empty();
819
+ htminer::out_file = out_file;
820
+ htminer::ClearCollected();
821
+ htminer::start_time = std::clock();
822
+
823
+ if (py::isinstance<py::str>(data)) {
824
+ std::string path = data.cast<std::string>();
825
+ if (!htminer::Load_instance(path, minsup))
826
+ throw std::runtime_error("Failed to load file: " + path);
827
+ } else {
828
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
829
+ htminer::items = std::move(seqs);
830
+ htminer::N = htminer::items.size();
831
+
832
+ int max_id = 0;
833
+ htminer::M = 0;
834
+ htminer::E = 0;
835
+ for (auto &seq : htminer::items) {
836
+ htminer::M = std::max<unsigned int>(htminer::M, seq.size());
837
+ for (int x : seq)
838
+ max_id = std::max(max_id, std::abs(x));
839
+ htminer::E += seq.size();
840
+ }
841
+ htminer::L = max_id;
842
+ htminer::theta = (minsup < 1.0)
843
+ ? static_cast<unsigned long long>(std::ceil(minsup * htminer::N))
844
+ : static_cast<unsigned long long>(minsup);
845
+
846
+ htminer::DFS.clear();
847
+ htminer::DFS.reserve(htminer::L);
848
+ for (unsigned int i = 0; i < static_cast<unsigned int>(htminer::L); ++i)
849
+ htminer::DFS.emplace_back(-static_cast<int>(i) - 1);
850
+
851
+ htminer::VDFS.clear();
852
+ htminer::VDFS.resize(htminer::L);
853
+ }
854
+
855
+ htminer::Freq_miner();
856
+
857
+ // 👇 now really respects verbose
858
+ if (verbose) {
859
+ std::cout << " total patterns = "
860
+ << htminer::collectedPatterns.size() << "\n";
861
+ }
862
+
863
+ py::dict out;
864
+ out["patterns"] = htminer::GetCollected();
865
+ out["time"] = htminer::give_time(std::clock() - htminer::start_time);
866
+ return out;
867
+ },
868
+ py::arg("data"),
869
+ py::arg("minsup") = 0.01,
870
+ py::arg("time_limit") = 36000,
871
+ py::arg("preproc") = false,
872
+ py::arg("use_dic") = false,
873
+ py::arg("verbose") = false,
874
+ py::arg("out_file") = ""
875
+ );
876
+
877
+
878
+ m.def("LargePrefixProjection",
273
879
  [](py::object data,
274
880
  double minsup,
275
881
  unsigned int time_limit,
@@ -281,28 +887,30 @@ std::cout << " total patterns = "
281
887
  largepp::time_limit = time_limit;
282
888
  largepp::pre_pro = preproc;
283
889
  largepp::use_dic = use_dic;
284
- largepp::use_list = true; // ← key difference
890
+ largepp::use_list = true;
285
891
  largepp::b_disp = verbose;
286
892
  largepp::b_write = !out_file.empty();
287
893
  largepp::out_file = out_file;
288
- largepp::just_build = false;
894
+ largepp::just_build = false;
289
895
 
290
896
  largepp::ClearCollected();
291
897
  largepp::start_time = std::clock();
292
- std::string fname = data.cast<std::string>();
293
- /* 1) load instance (py list or filename) */
294
- if (py::isinstance<py::str>(data))
295
-
296
- largepp::Load_instance(fname, minsup);
297
- else
298
- largepp::Load_py(data, minsup); // helper you’ll expose
299
-
300
- std::vector<unsigned long long> dbg;
301
-
302
-
303
-
304
898
 
899
+ // 👇 this was the last noisy one
900
+ if (verbose) {
901
+ std::cerr << " minsup=" << minsup
902
+ << " preproc=" << preproc
903
+ << " verbose=" << verbose
904
+ << " out_file=" << (out_file.empty() ? "(none)" : out_file)
905
+ << " use_dic=" << use_dic << "\n";
906
+ }
305
907
 
908
+ if (py::isinstance<py::str>(data)) {
909
+ std::string fname = data.cast<std::string>();
910
+ largepp::Load_instance(fname, minsup);
911
+ } else {
912
+ largepp::Load_py(data, minsup);
913
+ }
306
914
 
307
915
  largepp::Freq_miner();
308
916
 
@@ -320,6 +928,8 @@ std::cout << " total patterns = "
320
928
  py::arg("out_file") = ""
321
929
  );
322
930
 
931
+
932
+
323
933
  // ─────────────────────────────────────────────────────────────
324
934
  // LargeBTMiner -- Python wrapper for the largebm implementation
325
935
  // ─────────────────────────────────────────────────────────────
@@ -404,100 +1014,108 @@ std::cout << " total patterns = "
404
1014
 
405
1015
 
406
1016
 
407
- m.def("LargeBTMiner",
408
- [](py::object data,
409
- double minsup,
410
- unsigned int time_limit,
411
- bool preproc,
412
- bool use_dic,
413
- bool verbose,
414
- const std::string &out_file)
415
- {
416
- // 0) Set global flags and timers
417
- largebm::time_limit = time_limit;
418
- largebm::pre_pro = preproc;
419
- largebm::use_dic = use_dic;
420
- largebm::use_list = false; // large‑mode → always MDD
421
- largebm::b_disp = verbose;
422
- largebm::b_write = !out_file.empty();
423
- largebm::out_file = out_file;
424
- largebm::just_build = false;
1017
+ // ─────────────────────────────────────────────────────────────────────────
1018
+ // LargeBTMiner (MDD-based)
1019
+ // ─────────────────────────────────────────────────────────────────────────
1020
+ /*m.def("LargeBTMiner",
1021
+ [](py::object data,
1022
+ double minsup,
1023
+ unsigned int time_limit,
1024
+ bool preproc,
1025
+ bool use_dic,
1026
+ bool verbose,
1027
+ const std::string &out_file)
1028
+ {
1029
+ using namespace largebm;
1030
+
1031
+ // 0) Set global flags and timers
1032
+ largebm::time_limit = time_limit;
1033
+ largebm::pre_pro = preproc;
1034
+ largebm::use_dic = use_dic;
1035
+ largebm::use_list = false; // large-mode → always MDD
1036
+ largebm::b_disp = verbose;
1037
+ largebm::b_write = !out_file.empty();
1038
+ largebm::out_file = out_file;
1039
+ largebm::just_build = false;
1040
+
1041
+ // 0.1) Clear any leftover data/state from previous runs
1042
+ largebm::items.clear();
1043
+ largebm::item_dic.clear();
1044
+ largebm::inv_item_dic.clear();
1045
+ largebm::Tree.clear();
1046
+ largebm::DFS.clear();
1047
+ largebm::ClearCollected();
1048
+
1049
+ // 1) Load sequences (either from filename or from Python list)
1050
+ if (py::isinstance<py::str>(data)) {
1051
+ // ─────────── FILE-BASED MODE ───────────
1052
+ std::string path = data.cast<std::string>();
1053
+ if (!largebm::Load_instance(path, minsup))
1054
+ throw std::runtime_error("Failed to load file: " + path);
425
1055
 
426
- // 0.1) Clear any leftover data/state from previous runs
427
- largebm::items.clear();
428
- largebm::item_dic.clear();
429
- largebm::inv_item_dic.clear();
430
- largebm::Tree.clear();
431
- largebm::DFS.clear();
432
- largebm::ClearCollected();
1056
+ } else {
1057
+ // ────────── IN-MEMORY MODE ──────────
1058
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
1059
+ largebm::items = std::move(seqs);
1060
+ largebm::N = largebm::items.size();
433
1061
 
434
- // 1) Load sequences (either from filename or from Python list)
435
- if (py::isinstance<py::str>(data)) {
436
- // ─────────── FILE‑BASED MODE ───────────
437
- std::string path = data.cast<std::string>();
438
- if (!largebm::Load_instance(path, minsup))
439
- throw std::runtime_error("Failed to load file: " + path);
1062
+ // 1.1) Compute basic DB statistics (M, E, L) and absolute support θ
1063
+ int max_id = 0;
1064
+ largebm::M = 0;
1065
+ largebm::E = 0;
1066
+ for (auto &seq : largebm::items) {
1067
+ largebm::M = std::max<unsigned int>(largebm::M, static_cast<unsigned int>(seq.size()));
1068
+ largebm::E += static_cast<unsigned long long>(seq.size());
1069
+ for (int x : seq) max_id = std::max(max_id, std::abs(x));
1070
+ }
1071
+ largebm::L = static_cast<unsigned int>(max_id);
1072
+ largebm::theta = (minsup < 1.0)
1073
+ ? static_cast<unsigned long long>(std::ceil(minsup * largebm::N))
1074
+ : static_cast<unsigned long long>(minsup);
440
1075
 
441
- } else {
442
- // ────────── IN‑MEMORY MODE ──────────
443
- auto seqs = data.cast<std::vector<std::vector<int>>>();
444
- largebm::items = std::move(seqs);
445
- largebm::N = largebm::items.size();
1076
+ // 1.2) Initialize DFS buffer (size = L)
1077
+ largebm::DFS.reserve(largebm::L);
1078
+ for (unsigned int i = 0; i < largebm::L; ++i)
1079
+ largebm::DFS.emplace_back(-static_cast<int>(i) - 1);
446
1080
 
447
- // 1.1) Compute basic DB statistics (M, E, L) and absolute support θ
448
- int max_id = 0;
449
- largebm::M = 0;
450
- largebm::E = 0;
451
- for (auto &seq : largebm::items) {
452
- largebm::M = std::max<unsigned int>(largebm::M, static_cast<unsigned int>(seq.size()));
453
- largebm::E += static_cast<unsigned long long>(seq.size());
454
- for (int x : seq) max_id = std::max(max_id, std::abs(x));
455
- }
456
- largebm::L = static_cast<unsigned int>(max_id);
457
- largebm::theta = (minsup < 1.0)
458
- ? static_cast<unsigned long long>(std::ceil(minsup * largebm::N))
459
- : static_cast<unsigned long long>(minsup);
460
-
461
- // 1.2) Initialize DFS buffer (size = L)
462
- largebm::DFS.reserve(largebm::L);
463
- for (unsigned int i = 0; i < largebm::L; ++i)
464
- largebm::DFS.emplace_back(-static_cast<int>(i) - 1);
465
-
466
- // 1.3) Build the MDD “Tree”
467
- // Insert one dummy root node (item=0, freq=0, anct=0)
468
- largebm::Tree.emplace_back(0, 0, 0);
469
- for (auto &seq : largebm::items)
470
- largebm::Build_MDD(const_cast<std::vector<int>&>(seq));
471
- }
1081
+ // 1.3) Build the MDD “Tree”
1082
+ // Insert one dummy root node (item=0, freq=0, anct=0)
1083
+ largebm::Tree.emplace_back(0, 0, 0);
1084
+ for (auto &seq : largebm::items)
1085
+ largebm::Build_MDD(const_cast<std::vector<int>&>(seq));
1086
+ }
472
1087
 
473
- // 2) Rebuild inversedictionary from fresh item_dic
474
- {
475
- std::vector<int> inv(largebm::item_dic.size() + 1);
476
- for (int old = 1; old <= static_cast<int>(largebm::item_dic.size()); ++old) {
477
- int cid = largebm::item_dic[old - 1];
478
- if (cid > 0) inv[cid] = old;
479
- }
480
- largebm::inv_item_dic = std::move(inv);
1088
+ // 2) Rebuild inverse-dictionary from fresh item_dic
1089
+ {
1090
+ std::vector<int> inv(largebm::item_dic.size() + 1);
1091
+ for (int old = 1; old <= static_cast<int>(largebm::item_dic.size()); ++old) {
1092
+ int cid = largebm::item_dic[old - 1];
1093
+ if (cid > 0) inv[cid] = old;
481
1094
  }
1095
+ largebm::inv_item_dic = std::move(inv);
1096
+ }
482
1097
 
483
- // 3) Start timing and run the miner
484
- largebm::start_time = std::clock();
485
- largebm::Freq_miner();
1098
+ // 3) Start timing and run the miner
1099
+ largebm::start_time = std::clock();
1100
+ largebm::Freq_miner();
1101
+
1102
+ // 4) Collect results and elapsed time
1103
+ const auto& pats = largebm::GetCollected();
1104
+
1105
+ py::dict out;
1106
+ out["patterns"] = pats;
1107
+ out["time"] = largebm::give_time(std::clock() - largebm::start_time);
1108
+ return out;
1109
+ },
1110
+ py::arg("data"),
1111
+ py::arg("minsup") = 0.01,
1112
+ py::arg("time_limit") = 36000,
1113
+ py::arg("preproc") = false,
1114
+ py::arg("use_dic") = false,
1115
+ py::arg("verbose") = false,
1116
+ py::arg("out_file") = ""
1117
+ );
486
1118
 
487
- // 4) Collect results and elapsed time
488
- py::dict out;
489
- out["patterns"] = largebm::GetCollected();
490
- out["time"] = largebm::give_time(std::clock() - largebm::start_time);
491
- return out;
492
- },
493
- py::arg("data"),
494
- py::arg("minsup") = 0.01,
495
- py::arg("time_limit") = 36000,
496
- py::arg("preproc") = false,
497
- py::arg("use_dic") = false,
498
- py::arg("verbose") = false,
499
- py::arg("out_file") = ""
500
- );
501
1119
 
502
1120
 
503
1121
  m.def("LargeHTMiner",
@@ -606,4 +1224,4 @@ m.def("LargeHTMiner",
606
1224
 
607
1225
 
608
1226
 
609
- }
1227
+ } */