effspm 0.1.12__cp313-cp313-macosx_10_13_universal2.whl → 0.2.1__cp313-cp313-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of effspm might be problematic. Click here for more details.

Files changed (48) hide show
  1. effspm/__init__.py +3 -3
  2. effspm/_effspm.cpp +437 -13
  3. effspm/_effspm.cpython-313-darwin.so +0 -0
  4. effspm/btminer/src/freq_miner.cpp +3 -0
  5. effspm/btminer/src/load_inst.cpp +4 -0
  6. effspm/btminer/src/load_inst.hpp +2 -0
  7. effspm/btminer/src/utility.cpp +31 -33
  8. effspm/btminer/src/utility.hpp +9 -13
  9. effspm/htminer/src/build_mdd.cpp +192 -0
  10. effspm/htminer/src/build_mdd.hpp +64 -0
  11. effspm/htminer/src/freq_miner.cpp +350 -0
  12. effspm/htminer/src/freq_miner.hpp +60 -0
  13. effspm/htminer/src/load_inst.cpp +381 -0
  14. effspm/htminer/src/load_inst.hpp +23 -0
  15. effspm/htminer/src/main.cpp +96 -0
  16. effspm/htminer/src/utility.cpp +72 -0
  17. effspm/htminer/src/utility.hpp +77 -0
  18. effspm/largebm/src/build_mdd.cpp +137 -0
  19. effspm/largebm/src/build_mdd.hpp +47 -0
  20. effspm/largebm/src/freq_miner.cpp +342 -0
  21. effspm/largebm/src/freq_miner.hpp +48 -0
  22. effspm/largebm/src/load_inst.cpp +235 -0
  23. effspm/largebm/src/load_inst.hpp +45 -0
  24. effspm/largebm/src/main.cpp +95 -0
  25. effspm/largebm/src/utility.cpp +45 -0
  26. effspm/largebm/src/utility.hpp +18 -0
  27. effspm/largehm/src/build_mdd.cpp +173 -0
  28. effspm/largehm/src/build_mdd.hpp +93 -0
  29. effspm/largehm/src/freq_miner.cpp +441 -0
  30. effspm/largehm/src/freq_miner.hpp +77 -0
  31. effspm/largehm/src/load_inst.cpp +357 -0
  32. effspm/largehm/src/load_inst.hpp +64 -0
  33. effspm/largehm/src/main.cpp +95 -0
  34. effspm/largehm/src/utility.cpp +38 -0
  35. effspm/largehm/src/utility.hpp +29 -0
  36. effspm/largepp/src/freq_miner.cpp +170 -0
  37. effspm/largepp/src/freq_miner.hpp +43 -0
  38. effspm/largepp/src/load_inst.cpp +219 -0
  39. effspm/largepp/src/load_inst.hpp +28 -0
  40. effspm/largepp/src/main.cpp +108 -0
  41. effspm/largepp/src/utility.cpp +33 -0
  42. effspm/largepp/src/utility.hpp +20 -0
  43. {effspm-0.1.12.dist-info → effspm-0.2.1.dist-info}/METADATA +1 -1
  44. effspm-0.2.1.dist-info/RECORD +59 -0
  45. {effspm-0.1.12.dist-info → effspm-0.2.1.dist-info}/WHEEL +1 -1
  46. effspm-0.1.12.dist-info/RECORD +0 -25
  47. {effspm-0.1.12.dist-info → effspm-0.2.1.dist-info}/licenses/LICENSE +0 -0
  48. {effspm-0.1.12.dist-info → effspm-0.2.1.dist-info}/top_level.txt +0 -0
effspm/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- from ._effspm import PrefixProjection, BTMiner
2
-
3
- __all__ = ['PrefixProjection', 'BTMiner']
1
+ # effspm/__init__.py
2
+ from ._effspm import PrefixProjection, LargePrefixProjection, BTMiner ,LargeBTMiner,LargeHTMiner,HTMiner
3
+ __all__ = ["PrefixProjection", "LargePrefixProjection", "BTMiner","LargeBTMiner","LargeHTMiner","HTMiner"]
effspm/_effspm.cpp CHANGED
@@ -1,5 +1,10 @@
1
+ // _effspm.cpp
2
+
1
3
  #include <pybind11/pybind11.h>
2
4
  #include <pybind11/stl.h>
5
+ namespace py = pybind11;
6
+ #include <iostream>
7
+
3
8
 
4
9
  // PrefixProjection headers
5
10
  #include "freq_miner.hpp"
@@ -12,12 +17,35 @@
12
17
  #include "btminer/src/utility.hpp"
13
18
  #include "btminer/src/build_mdd.hpp"
14
19
 
15
- namespace py = pybind11;
20
+ // HTMiner (wrapped in its own namespace in source files)
21
+ #include "htminer/src/build_mdd.hpp" // ← ensure HTMiner MDD builder is available
22
+ #include "htminer/src/freq_miner.hpp"
23
+ #include "htminer/src/load_inst.hpp"
24
+ #include "htminer/src/utility.hpp"
25
+
26
+
27
+ #include "largepp/src/freq_miner.hpp"
28
+ #include "largepp/src/load_inst.hpp"
29
+ #include "largepp/src/utility.hpp"
30
+
31
+ #include "largebm/src/freq_miner.hpp"
32
+ #include "largebm/src/load_inst.hpp"
33
+ #include "largebm/src/utility.hpp"
34
+ #include "largebm/src/build_mdd.hpp"
35
+
36
+ #include "largehm/src/freq_miner.hpp"
37
+ #include "largehm/src/load_inst.hpp"
38
+ #include "largehm/src/utility.hpp"
39
+ #include "largehm/src/build_mdd.hpp"
40
+
41
+
16
42
 
17
43
  PYBIND11_MODULE(_effspm, m) {
18
- m.doc() = "Unified SPM library: PrefixProjection, BTMiner, and more";
44
+ m.doc() = "Unified SPM library: PrefixProjection, BTMiner, HTMiner";
19
45
 
46
+ // ─────────────────────────────────────────────────────────────
20
47
  // PrefixProjection
48
+ // ─────────────────────────────────────────────────────────────
21
49
  m.def("PrefixProjection",
22
50
  [](py::object data,
23
51
  double minsup,
@@ -76,15 +104,17 @@ PYBIND11_MODULE(_effspm, m) {
76
104
  return out;
77
105
  },
78
106
  py::arg("data"),
79
- py::arg("minsup") = 0.01,
107
+ py::arg("minsup") = 0.01,
80
108
  py::arg("time_limit") = 36000,
81
- py::arg("preproc") = false,
82
- py::arg("use_dic") = false,
83
- py::arg("verbose") = false,
84
- py::arg("out_file") = ""
109
+ py::arg("preproc") = false,
110
+ py::arg("use_dic") = false,
111
+ py::arg("verbose") = false,
112
+ py::arg("out_file") = ""
85
113
  );
86
114
 
115
+ // ─────────────────────────────────────────────────────────────
87
116
  // BTMiner
117
+ // ─────────────────────────────────────────────────────────────
88
118
  m.def("BTMiner",
89
119
  [](py::object data,
90
120
  double minsup,
@@ -143,11 +173,405 @@ PYBIND11_MODULE(_effspm, m) {
143
173
  return out;
144
174
  },
145
175
  py::arg("data"),
146
- py::arg("minsup") = 0.01,
176
+ py::arg("minsup") = 0.01,
147
177
  py::arg("time_limit") = 36000,
148
- py::arg("preproc") = false,
149
- py::arg("use_dic") = false,
150
- py::arg("verbose") = false,
151
- py::arg("out_file") = ""
178
+ py::arg("preproc") = false,
179
+ py::arg("use_dic") = false,
180
+ py::arg("verbose") = false,
181
+ py::arg("out_file") = ""
152
182
  );
153
- }
183
+
184
+ // ─────────────────────────────────────────────────────────────
185
+ // HTMiner
186
+ // ─────────────────────────────────────────────────────────────
187
+ m.def("HTMiner",
188
+ [](py::object data,
189
+ double minsup, unsigned int time_limit,
190
+ bool preproc, bool use_dic,
191
+ bool verbose, const std::string &out_file)
192
+ {
193
+ // 1) set HTMiner globals (declared in htminer/src/utility.hpp)
194
+ htminer::time_limit = time_limit;
195
+ htminer::pre_pro = preproc;
196
+ htminer::use_dic = use_dic;
197
+ htminer::just_build = false; // or true if you want “build only”
198
+ htminer::use_list = false; // HTMiner always uses MDD‐based mode
199
+ htminer::b_disp = verbose;
200
+ htminer::b_write = !out_file.empty();
201
+ htminer::out_file = out_file;
202
+ htminer::ClearCollected(); // clear any leftover patterns
203
+ htminer::start_time = std::clock();
204
+
205
+ // 2) load sequences (either from filename or from Python list)
206
+ if (py::isinstance<py::str>(data)) {
207
+ std::string path = data.cast<std::string>();
208
+ if (!htminer::Load_instance(path, minsup))
209
+ throw std::runtime_error("Failed to load file: " + path);
210
+ } else {
211
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
212
+ htminer::items = std::move(seqs);
213
+ htminer::N = htminer::items.size();
214
+
215
+ // compute L (max item ID), M (max sequence length), E (total entries)
216
+ int max_id = 0;
217
+ htminer::M = 0;
218
+ htminer::E = 0;
219
+ for (auto &seq : htminer::items) {
220
+ htminer::M = std::max<unsigned int>(htminer::M, seq.size());
221
+ for (int x : seq)
222
+ max_id = std::max(max_id, std::abs(x));
223
+ htminer::E += seq.size();
224
+ }
225
+ htminer::L = max_id;
226
+ htminer::theta = (minsup < 1.0)
227
+ ? static_cast<unsigned long long>(std::ceil(minsup * htminer::N))
228
+ : static_cast<unsigned long long>(minsup);
229
+
230
+ // build empty DFS stack (size L) as HTMiner expects
231
+ htminer::DFS.clear();
232
+ htminer::DFS.reserve(htminer::L);
233
+ for (unsigned int i = 0; i < static_cast<unsigned int>(htminer::L); ++i)
234
+ htminer::DFS.emplace_back(-static_cast<int>(i) - 1);
235
+
236
+ // initialize VDFS if HTMiner needs it
237
+ htminer::VDFS.clear();
238
+ htminer::VDFS.resize(htminer::L);
239
+ }
240
+
241
+ // 3) run the mining algorithm
242
+ htminer::Freq_miner();
243
+
244
+ // std::cout << "[HTMiner] dumping all collected patterns:\n";
245
+ // for (size_t i = 0; i < htminer::collectedPatterns.size(); ++i) {
246
+ // const auto &seq = htminer::collectedPatterns[i];
247
+ // std::cout << "Pattern " << i << ": { ";
248
+ // for (int x : seq) {
249
+ // std::cout << x << " ";
250
+ // }
251
+ // std::cout << "}\n";
252
+ //}
253
+ std::cout << " total patterns = "
254
+ << htminer::collectedPatterns.size() << "\n";
255
+ // ─────────────────────────────────────────────────
256
+
257
+ // 4) return patterns + elapsed time
258
+ py::dict out;
259
+ out["patterns"] = htminer::GetCollected();
260
+ out["time"] = htminer::give_time(std::clock() - htminer::start_time);
261
+ return out;
262
+ },
263
+ py::arg("data"),
264
+ py::arg("minsup") = 0.01,
265
+ py::arg("time_limit") = 36000,
266
+ py::arg("preproc") = false,
267
+ py::arg("use_dic") = false,
268
+ py::arg("verbose") = false,
269
+ py::arg("out_file") = ""
270
+ );
271
+
272
+ m.def("LargePrefixProjection",
273
+ [](py::object data,
274
+ double minsup,
275
+ unsigned int time_limit,
276
+ bool preproc,
277
+ bool use_dic,
278
+ bool verbose,
279
+ const std::string &out_file)
280
+ {
281
+ largepp::time_limit = time_limit;
282
+ largepp::pre_pro = preproc;
283
+ largepp::use_dic = use_dic;
284
+ largepp::use_list = true; // ← key difference
285
+ largepp::b_disp = verbose;
286
+ largepp::b_write = !out_file.empty();
287
+ largepp::out_file = out_file;
288
+ largepp::just_build = false;
289
+
290
+ largepp::ClearCollected();
291
+ largepp::start_time = std::clock();
292
+ std::string fname = data.cast<std::string>();
293
+ /* 1) load instance (py list or filename) */
294
+ if (py::isinstance<py::str>(data))
295
+
296
+ largepp::Load_instance(fname, minsup);
297
+ else
298
+ largepp::Load_py(data, minsup); // helper you’ll expose
299
+
300
+ std::vector<unsigned long long> dbg;
301
+
302
+
303
+
304
+
305
+
306
+
307
+ largepp::Freq_miner();
308
+
309
+ py::dict out;
310
+ out["patterns"] = largepp::GetCollected();
311
+ out["time"] = largepp::give_time(std::clock() - largepp::start_time);
312
+ return out;
313
+ },
314
+ py::arg("data"),
315
+ py::arg("minsup") = 0.01,
316
+ py::arg("time_limit") = 36000,
317
+ py::arg("preproc") = false,
318
+ py::arg("use_dic") = false,
319
+ py::arg("verbose") = false,
320
+ py::arg("out_file") = ""
321
+ );
322
+
323
+ // ─────────────────────────────────────────────────────────────
324
+ // LargeBTMiner -- Python wrapper for the largebm implementation
325
+ // ─────────────────────────────────────────────────────────────
326
+ // m.def(
327
+ // "LargeBTMiner",
328
+ // [](py::object data,
329
+ // double minsup ,
330
+ // unsigned int time_limit,
331
+ // bool preproc ,
332
+ // bool use_dic,
333
+ // bool verbose,
334
+ // const std::string &out_file )
335
+ // {
336
+ // /* 1) Global flags */
337
+ // largebm::time_limit = time_limit;
338
+ // largebm::pre_pro = preproc;
339
+ // largebm::use_dic = use_dic;
340
+ // largebm::use_list = false; // large-mode → always MDD
341
+ // largebm::just_build = false;
342
+ // largebm::b_disp = verbose;
343
+ // largebm::b_write = !out_file.empty();
344
+ // largebm::out_file = out_file;
345
+
346
+ // /* 2) Reset per-run state */
347
+ // largebm::ClearCollected();
348
+ // largebm::start_time = std::clock();
349
+
350
+ // /* 3) Load the DB (file path or in-memory list<list<int>>) */
351
+ // if (py::isinstance<py::str>(data)) {
352
+ // std::string path = data.cast<std::string>();
353
+ // if (!largebm::Load_instance(path, minsup))
354
+ // throw std::runtime_error("Failed to load file: " + path);
355
+ // } else {
356
+ // // In-memory sequences
357
+ // largebm::items = std::move(data.cast<std::vector<std::vector<int>>>());
358
+ // largebm::N = static_cast<unsigned int>(largebm::items.size());
359
+
360
+ // /* -- basic stats -- */
361
+ // int max_id = 0;
362
+ // largebm::M = 0;
363
+ // largebm::E = 0;
364
+ // for ( auto &seq : largebm::items) {
365
+ // largebm::M = std::max<unsigned int>(largebm::M,
366
+ // static_cast<unsigned int>(seq.size()));
367
+ // largebm::E += static_cast<unsigned long long>(seq.size());
368
+ // for (int x : seq) max_id = std::max(max_id, std::abs(x));
369
+ // }
370
+ // largebm::L = static_cast<unsigned int>(max_id);
371
+ // largebm::theta = (minsup < 1.0)
372
+ // ? static_cast<unsigned long long>(std::ceil(minsup * largebm::N))
373
+ // : static_cast<unsigned long long>(minsup);
374
+
375
+ // /* -- DFS buffer (size = L) -- */
376
+ // largebm::DFS.clear();
377
+ // largebm::DFS.reserve(largebm::L);
378
+ // for (unsigned int i = 0; i < largebm::L; ++i)
379
+ // largebm::DFS.emplace_back(-static_cast<int>(i) - 1);
380
+
381
+ // /* -- Build the MDD -- */
382
+ // largebm::Tree.clear();
383
+ // largebm::Tree.emplace_back(0, 0, 0); // dummy root
384
+ // for ( auto &seq : largebm::items)
385
+ // largebm::Build_MDD(seq);
386
+ // }
387
+
388
+ // /* 4) Mine and return results */
389
+ // largebm::Freq_miner();
390
+
391
+ // py::dict out;
392
+ // out["patterns"] = largebm::GetCollected();
393
+ // out["time"] = largebm::give_time(std::clock() - largebm::start_time);
394
+ // return out;
395
+ // },
396
+ // py::arg("data"),
397
+ // py::arg("minsup") = 0.01,
398
+ // py::arg("time_limit") = 36000,
399
+ // py::arg("preproc") = false,
400
+ // py::arg("use_dic") = false,
401
+ // py::arg("verbose") = false,
402
+ // py::arg("out_file") = ""
403
+ // );
404
+
405
+
406
+ m.def("LargeBTMiner",
407
+ [](py::object data,
408
+ double minsup,
409
+ unsigned int time_limit,
410
+ bool preproc,
411
+ bool use_dic,
412
+ bool verbose,
413
+ const std::string &out_file)
414
+ {
415
+ largebm::time_limit = time_limit;
416
+ largebm::pre_pro = preproc;
417
+ largebm::use_dic = use_dic;
418
+ largebm::use_list = false; // <-- switch into “large” mode
419
+ largebm::b_disp = verbose;
420
+ largebm::b_write = !out_file.empty();
421
+ largebm::out_file = out_file;
422
+ largebm::just_build = false;
423
+
424
+ // ── Build the inverse‐dictionary here ────────────────────────────
425
+ {
426
+ std::vector<int> local_inv( largebm::item_dic.size() + 1 );
427
+ for (int old = 1; old <= (int)largebm::item_dic.size(); ++old) {
428
+ int cid = largebm::item_dic[old - 1];
429
+ if (cid > 0)
430
+ local_inv[cid] = old;
431
+ }
432
+ largebm::inv_item_dic = std::move(local_inv);
433
+ }
434
+ // ─std::cerr << "inv_item_dic size=" << largebm::inv_item_dic.size() << "\n";
435
+ for (size_t i = 0; i < largebm::inv_item_dic.size(); ++i) {
436
+ //std::cerr << i << "→" << largebm::inv_item_dic[i] << " ";
437
+ }
438
+ std::cerr << "\n";
439
+
440
+ largebm::ClearCollected();
441
+ largebm::start_time = std::clock();
442
+
443
+ if (py::isinstance<py::str>(data)) {
444
+ // load from filename
445
+ std::string path = data.cast<std::string>();
446
+ if (!largebm::Load_instance(path, minsup))
447
+ throw std::runtime_error("Failed to load file: " + path);
448
+ }
449
+ else {
450
+ // load from in‐memory sequences
451
+ largebm::items = std::move(data.cast<std::vector<std::vector<int>>>());
452
+
453
+ }
454
+
455
+ largebm::Freq_miner();
456
+
457
+ py::dict out;
458
+ out["patterns"] = largebm::GetCollected();
459
+ out["time"] = largebm::give_time(std::clock() - largebm::start_time);
460
+ return out;
461
+ },
462
+ py::arg("data"),
463
+ py::arg("minsup") = 0.01,
464
+ py::arg("time_limit") = 36000,
465
+ py::arg("preproc") = false,
466
+ py::arg("use_dic") = false,
467
+ py::arg("verbose") = false,
468
+ py::arg("out_file") = ""
469
+ );
470
+
471
+ m.def("LargeHTMiner",
472
+ [](py::object data,
473
+ double minsup,
474
+ unsigned int time_limit,
475
+ bool preproc,
476
+ bool use_dic,
477
+ bool verbose,
478
+ const std::string &out_file)
479
+ {
480
+ // 0) Set global flags and timers:
481
+ largehm::time_limit = time_limit;
482
+ largehm::pre_pro = preproc;
483
+ largehm::use_dic = use_dic;
484
+ largehm::use_list = true; // force in‐memory mode
485
+ largehm::b_disp = verbose;
486
+ largehm::b_write = !out_file.empty();
487
+ largehm::out_file = out_file;
488
+ largehm::just_build = false;
489
+
490
+ largehm::ClearCollected();
491
+ largehm::start_time = std::clock();
492
+
493
+ if (py::isinstance<py::str>(data)) {
494
+ // ───────────── FILE‐BASED MODE ─────────────
495
+ // Force mlim so that every item lands in temp_vec (never temp_lim):
496
+ largehm::mlim = UINT_MAX;
497
+
498
+ std::string path = data.cast<std::string>();
499
+ if (! largehm::Load_instance(path, minsup))
500
+ throw std::runtime_error("Failed to load file: " + path);
501
+ }
502
+ else {
503
+ // ───────────── IN‐MEMORY MODE ─────────────
504
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
505
+ largehm::items = std::move(seqs);
506
+ largehm::N = largehm::items.size();
507
+
508
+ // 1) Compute L = maximum absolute item ID
509
+ int max_id = 0;
510
+ for (auto &seq : largehm::items)
511
+ for (int x : seq)
512
+ max_id = std::max(max_id, std::abs(x));
513
+ largehm::L = static_cast<unsigned int>(max_id);
514
+
515
+ // 2) Compute theta as absolute support threshold
516
+ largehm::theta = (minsup < 1.0)
517
+ ? static_cast<unsigned long long>(std::ceil(minsup * largehm::N))
518
+ : static_cast<unsigned long long>(minsup);
519
+
520
+ // 3) Initialize DFS (size = L)
521
+ largehm::DFS.clear();
522
+ largehm::DFS.reserve(largehm::L);
523
+ for (unsigned int i = 0; i < largehm::L; ++i)
524
+ largehm::DFS.emplace_back(-static_cast<int>(i) - 1);
525
+
526
+ // 4) Compute M (max sequence length) and E (total entries)
527
+ largehm::M = 0;
528
+ largehm::E = 0;
529
+ for (auto &seq : largehm::items) {
530
+ largehm::M = std::max<unsigned int>(
531
+ largehm::M, static_cast<unsigned int>(seq.size()));
532
+ largehm::E += seq.size();
533
+ }
534
+
535
+ // 5) ─── Build the MDD “manually” ───
536
+ largehm::Tree.clear();
537
+ largehm::VTree.clear();
538
+ largehm::CTree.clear();
539
+
540
+ // Insert exactly one dummy root node (chld=0, sibl=0, freq=0):
541
+ largehm::Tree.emplace_back(0,0,0);
542
+
543
+ // For each sequence “seq”, insert into MDD by placing a single −1 sentinel:
544
+ for (auto &seq : largehm::items) {
545
+ // Copy the item IDs:
546
+ std::vector<int> temp_vec = seq;
547
+ // Only a single “−1” is needed to force the suffix insertion:
548
+ std::vector<int> temp_lim(1, -1);
549
+
550
+ largehm::Build_MDD(temp_vec, temp_lim);
551
+ }
552
+
553
+
554
+ }
555
+
556
+ // 6) Run the frequency miner (Tree is now properly built):
557
+ largehm::Freq_miner();
558
+
559
+ // 7) Return results to Python:
560
+ py::dict out;
561
+ out["patterns"] = largehm::GetCollected();
562
+ out["time"] = largehm::give_time(std::clock() - largehm::start_time);
563
+ return out;
564
+ },
565
+ py::arg("data"),
566
+ py::arg("minsup") = 0.01,
567
+ py::arg("time_limit") = 36000,
568
+ py::arg("preproc") = false,
569
+ py::arg("use_dic") = false,
570
+ py::arg("verbose") = false,
571
+ py::arg("out_file") = ""
572
+ );
573
+
574
+
575
+
576
+
577
+ }
Binary file
@@ -156,6 +156,9 @@ void Extend_patt(Pattern _patt) {
156
156
  }
157
157
 
158
158
  void Out_patt(std::vector<int>& seq, int freq) {
159
+
160
+ btminer::collected.push_back(seq); // make pattern visible to Python
161
+
159
162
  std::ofstream file_o;
160
163
  if (b_write) file_o.open(out_file, std::ios::app);
161
164
 
@@ -18,6 +18,7 @@ using namespace std;
18
18
 
19
19
  extern int num_nodes, cur_node;
20
20
 
21
+
21
22
  map<string, int> item_map;
22
23
  map<int, string> item_map_rev;
23
24
  vector<int> freq;
@@ -27,6 +28,9 @@ void Load_items_pre(string& inst_name);
27
28
  bool Load_items(string& inst_name);
28
29
  bool Preprocess(string& inst, double thresh);
29
30
 
31
+
32
+
33
+
30
34
  bool Load_instance(string& items_file, double thresh) {
31
35
  clock_t kk = clock();
32
36
  Tree.emplace_back(0, 0, 0);
@@ -20,4 +20,6 @@ extern int N, M, L, theta, num_nodes, M_mult, N_mult, time_limit, cur_node;
20
20
 
21
21
  extern clock_t start_time;
22
22
 
23
+
24
+
23
25
  } // namespace btminer
@@ -5,63 +5,61 @@
5
5
 
6
6
  namespace btminer {
7
7
 
8
- // === Global Variables ===
9
- bool use_dic = false;
8
+ // ─── Global definitions ──────────────────────────────────────────
9
+ bool use_dic = false;
10
10
  std::vector<std::vector<int>> items;
11
- bool use_list = false;
11
+ bool use_list = false;
12
12
  bool just_build = false;
13
- int E = 0, M = 0, N = 0, L = 0, theta = 0;
13
+ int E = 0, M = 0, N = 0, L = 0, theta = 0;
14
14
  std::vector<Pattern> DFS;
15
- clock_t start_time;
15
+ clock_t start_time = 0;
16
16
  bool b_disp = false, b_write = false;
17
17
  std::string out_file;
18
18
 
19
- bool pre_pro = true;
20
- int N_mult = 1, M_mult = 1;
21
- int time_limit = 30 * 3600;
19
+ bool pre_pro = true;
20
+ int N_mult = 1, M_mult = 1;
21
+ int time_limit = 30 * 3600;
22
22
 
23
- // === Function Definitions ===
23
+ // buffer of mined patterns returned to Python
24
+ std::vector<std::vector<int>> collected;
24
25
 
25
- int find_ID(std::vector<int>& vec, int itm) {
26
+ void ClearCollected() { collected.clear(); }
27
+ const std::vector<std::vector<int>>& GetCollected() { return collected; }
28
+
29
+ // ─── Utility functions ───────────────────────────────────────────
30
+ int find_ID(std::vector<int>& vec, int itm)
31
+ {
26
32
  int plc = 0;
27
- while (plc < vec.size() && vec[plc] != itm)
28
- ++plc;
29
- return (plc == vec.size()) ? -1 : plc;
33
+ while (plc < static_cast<int>(vec.size()) && vec[plc] != itm) ++plc;
34
+ return (plc == static_cast<int>(vec.size())) ? -1 : plc;
30
35
  }
31
36
 
32
- bool check_parent(int cur_arc, int str_pnt, int start, std::vector<int>& strpnt_vec) {
37
+ bool check_parent(int cur_arc, int str_pnt, int start,
38
+ std::vector<int>& strpnt_vec)
39
+ {
33
40
  std::vector<int> ancestors;
34
41
  int cur_anct = Tree[cur_arc].anct;
35
42
 
36
43
  while (Tree[cur_anct].itmset > Tree[str_pnt].itmset) {
37
- if (Tree[cur_anct].item > 0)
38
- ancestors.push_back(cur_anct);
44
+ if (Tree[cur_anct].item > 0) ancestors.push_back(cur_anct);
39
45
  cur_anct = Tree[cur_anct].anct;
40
46
  }
47
+ if (Tree[cur_anct].itmset == Tree[str_pnt].itmset) return true;
41
48
 
42
- if (Tree[cur_anct].itmset == Tree[str_pnt].itmset)
43
- return true;
44
-
45
- for (auto it = ancestors.rbegin(); it != ancestors.rend(); ++it) {
46
- for (int i = start; i < strpnt_vec.size(); ++i) {
47
- if (strpnt_vec[i] == *it)
48
- return true;
49
- }
50
- }
49
+ for (auto it = ancestors.rbegin(); it != ancestors.rend(); ++it)
50
+ for (int i = start; i < static_cast<int>(strpnt_vec.size()); ++i)
51
+ if (strpnt_vec[i] == *it) return true;
51
52
 
52
53
  return false;
53
54
  }
54
55
 
55
- bool find_pnt(Arc* pnt, std::vector<Arc*>& vec, int pos) {
56
- for (size_t i = pos; i < vec.size(); ++i) {
57
- if (vec[i] == pnt)
58
- return true;
59
- }
56
+ bool find_pnt(Arc* pnt, std::vector<Arc*>& vec, int pos)
57
+ {
58
+ for (size_t i = pos; i < vec.size(); ++i)
59
+ if (vec[i] == pnt) return true;
60
60
  return false;
61
61
  }
62
62
 
63
- float give_time(clock_t kk) {
64
- return static_cast<float>(kk) / CLOCKS_PER_SEC;
65
- }
63
+ double give_time(clock_t kk) { return double(kk) / CLOCKS_PER_SEC; }
66
64
 
67
65
  } // namespace btminer
@@ -12,11 +12,11 @@ namespace btminer {
12
12
  // === Utility function declarations ===
13
13
  bool find_pnt(Arc* pnt, std::vector<Arc*>& vec, int pos);
14
14
  int find_ID(std::vector<int>& vec, int itm);
15
- float give_time(clock_t kk);
15
+ double give_time(clock_t kk);
16
16
  bool check_parent(int cur_arc, int str_pnt, int start, std::vector<int>& strpnt_vec);
17
17
 
18
18
  // === Global variables (DECLARATIONS ONLY) ===
19
- extern std::vector<std::vector<int>> items;
19
+
20
20
  extern bool use_list;
21
21
  extern bool just_build;
22
22
  extern int E, M, N, L, theta;
@@ -27,18 +27,14 @@ extern std::string out_file;
27
27
  extern bool pre_pro;
28
28
  extern int N_mult, M_mult;
29
29
  extern int time_limit;
30
+ extern std::vector<std::vector<int>> items;
30
31
 
31
- // === Python-friendly accessors ===
32
- inline void ClearCollected() {
33
- DFS.clear();
34
- }
32
+ extern std::vector<std::vector<int>> collected;
33
+
34
+ void ClearCollected();
35
+ const std::vector<std::vector<int>>& GetCollected();
35
36
 
36
- inline std::vector<std::vector<int>> GetCollected() {
37
- std::vector<std::vector<int>> patterns;
38
- for (const auto& p : DFS) {
39
- patterns.push_back(p.seq);
40
- }
41
- return patterns;
42
37
  }
43
38
 
44
- } // namespace btminer
39
+
40
+ // namespace btminer