effspm 0.2.2__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. effspm-0.2.6/MANIFEST.in +5 -0
  2. {effspm-0.2.2/effspm.egg-info → effspm-0.2.6}/PKG-INFO +1 -1
  3. effspm-0.2.6/effspm/__init__.py +11 -0
  4. {effspm-0.2.2 → effspm-0.2.6}/effspm/_effspm.cpp +90 -58
  5. {effspm-0.2.2 → effspm-0.2.6}/effspm/freq_miner.hpp +4 -1
  6. {effspm-0.2.2 → effspm-0.2.6}/effspm/htminer/src/load_inst.cpp +2 -0
  7. {effspm-0.2.2 → effspm-0.2.6}/effspm/largebm/src/freq_miner.cpp +13 -6
  8. {effspm-0.2.2 → effspm-0.2.6}/effspm/largebm/src/load_inst.cpp +72 -77
  9. {effspm-0.2.2 → effspm-0.2.6}/effspm/largehm/src/build_mdd.cpp +10 -9
  10. {effspm-0.2.2 → effspm-0.2.6}/effspm/largehm/src/freq_miner.cpp +15 -11
  11. {effspm-0.2.2 → effspm-0.2.6}/effspm/largehm/src/freq_miner.hpp +7 -7
  12. {effspm-0.2.2 → effspm-0.2.6}/effspm/largepp/src/utility.cpp +1 -0
  13. {effspm-0.2.2 → effspm-0.2.6}/effspm/largepp/src/utility.hpp +1 -0
  14. {effspm-0.2.2 → effspm-0.2.6}/effspm/load_inst.hpp +2 -1
  15. {effspm-0.2.2 → effspm-0.2.6/effspm.egg-info}/PKG-INFO +1 -1
  16. {effspm-0.2.2 → effspm-0.2.6}/effspm.egg-info/SOURCES.txt +0 -6
  17. {effspm-0.2.2 → effspm-0.2.6}/pyproject.toml +1 -2
  18. {effspm-0.2.2 → effspm-0.2.6}/setup.py +1 -1
  19. effspm-0.2.6/tests/test.py +22 -0
  20. effspm-0.2.6/tests/test_basic.py +19 -0
  21. effspm-0.2.2/MANIFEST.in +0 -12
  22. effspm-0.2.2/effspm/__init__.py +0 -3
  23. effspm-0.2.2/effspm/btminer/src/main.cpp +0 -92
  24. effspm-0.2.2/effspm/htminer/src/main.cpp +0 -96
  25. effspm-0.2.2/effspm/largebm/src/main.cpp +0 -95
  26. effspm-0.2.2/effspm/largehm/src/main.cpp +0 -95
  27. effspm-0.2.2/effspm/largepp/src/main.cpp +0 -108
  28. effspm-0.2.2/effspm/main.cpp +0 -103
  29. effspm-0.2.2/tests/test.py +0 -31
  30. effspm-0.2.2/tests/test_basic.py +0 -37
  31. {effspm-0.2.2 → effspm-0.2.6}/LICENSE +0 -0
  32. {effspm-0.2.2 → effspm-0.2.6}/README.md +0 -0
  33. {effspm-0.2.2 → effspm-0.2.6}/effspm/_core.cpp +0 -0
  34. {effspm-0.2.2 → effspm-0.2.6}/effspm/btminer/src/build_mdd.cpp +0 -0
  35. {effspm-0.2.2 → effspm-0.2.6}/effspm/btminer/src/build_mdd.hpp +0 -0
  36. {effspm-0.2.2 → effspm-0.2.6}/effspm/btminer/src/freq_miner.cpp +0 -0
  37. {effspm-0.2.2 → effspm-0.2.6}/effspm/btminer/src/freq_miner.hpp +0 -0
  38. {effspm-0.2.2 → effspm-0.2.6}/effspm/btminer/src/load_inst.cpp +0 -0
  39. {effspm-0.2.2 → effspm-0.2.6}/effspm/btminer/src/load_inst.hpp +0 -0
  40. {effspm-0.2.2 → effspm-0.2.6}/effspm/btminer/src/utility.cpp +0 -0
  41. {effspm-0.2.2 → effspm-0.2.6}/effspm/btminer/src/utility.hpp +0 -0
  42. {effspm-0.2.2 → effspm-0.2.6}/effspm/freq_miner.cpp +0 -0
  43. {effspm-0.2.2 → effspm-0.2.6}/effspm/htminer/src/build_mdd.cpp +0 -0
  44. {effspm-0.2.2 → effspm-0.2.6}/effspm/htminer/src/build_mdd.hpp +0 -0
  45. {effspm-0.2.2 → effspm-0.2.6}/effspm/htminer/src/freq_miner.cpp +0 -0
  46. {effspm-0.2.2 → effspm-0.2.6}/effspm/htminer/src/freq_miner.hpp +0 -0
  47. {effspm-0.2.2 → effspm-0.2.6}/effspm/htminer/src/load_inst.hpp +0 -0
  48. {effspm-0.2.2 → effspm-0.2.6}/effspm/htminer/src/utility.cpp +0 -0
  49. {effspm-0.2.2 → effspm-0.2.6}/effspm/htminer/src/utility.hpp +0 -0
  50. {effspm-0.2.2 → effspm-0.2.6}/effspm/largebm/src/build_mdd.cpp +0 -0
  51. {effspm-0.2.2 → effspm-0.2.6}/effspm/largebm/src/build_mdd.hpp +0 -0
  52. {effspm-0.2.2 → effspm-0.2.6}/effspm/largebm/src/freq_miner.hpp +0 -0
  53. {effspm-0.2.2 → effspm-0.2.6}/effspm/largebm/src/load_inst.hpp +0 -0
  54. {effspm-0.2.2 → effspm-0.2.6}/effspm/largebm/src/utility.cpp +0 -0
  55. {effspm-0.2.2 → effspm-0.2.6}/effspm/largebm/src/utility.hpp +0 -0
  56. {effspm-0.2.2 → effspm-0.2.6}/effspm/largehm/src/build_mdd.hpp +0 -0
  57. {effspm-0.2.2 → effspm-0.2.6}/effspm/largehm/src/load_inst.cpp +0 -0
  58. {effspm-0.2.2 → effspm-0.2.6}/effspm/largehm/src/load_inst.hpp +0 -0
  59. {effspm-0.2.2 → effspm-0.2.6}/effspm/largehm/src/utility.cpp +0 -0
  60. {effspm-0.2.2 → effspm-0.2.6}/effspm/largehm/src/utility.hpp +0 -0
  61. {effspm-0.2.2 → effspm-0.2.6}/effspm/largepp/src/freq_miner.cpp +0 -0
  62. {effspm-0.2.2 → effspm-0.2.6}/effspm/largepp/src/freq_miner.hpp +0 -0
  63. {effspm-0.2.2 → effspm-0.2.6}/effspm/largepp/src/load_inst.cpp +0 -0
  64. {effspm-0.2.2 → effspm-0.2.6}/effspm/largepp/src/load_inst.hpp +0 -0
  65. {effspm-0.2.2 → effspm-0.2.6}/effspm/load_inst.cpp +0 -0
  66. {effspm-0.2.2 → effspm-0.2.6}/effspm/utility.cpp +0 -0
  67. {effspm-0.2.2 → effspm-0.2.6}/effspm/utility.hpp +0 -0
  68. {effspm-0.2.2 → effspm-0.2.6}/effspm.egg-info/dependency_links.txt +0 -0
  69. {effspm-0.2.2 → effspm-0.2.6}/effspm.egg-info/not-zip-safe +0 -0
  70. {effspm-0.2.2 → effspm-0.2.6}/effspm.egg-info/requires.txt +0 -0
  71. {effspm-0.2.2 → effspm-0.2.6}/effspm.egg-info/top_level.txt +0 -0
  72. {effspm-0.2.2 → effspm-0.2.6}/setup.cfg +0 -0
@@ -0,0 +1,5 @@
1
+ include LICENSE
2
+ include README.md
3
+ include pyproject.toml
4
+ include setup.py
5
+ recursive-include effspm *.hpp
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: effspm
3
- Version: 0.2.2
3
+ Version: 0.2.6
4
4
  Summary: Prefix‑Projection and other sequential pattern mining algorithms
5
5
  Author: Yeswanth Vootla
6
6
  Author-email: yeshu999 <vootlayeswanth20@gmail.com>
@@ -0,0 +1,11 @@
1
+
2
+ from ._effspm import PrefixProjection, HTMiner, LargeHTMiner, BTMiner, LargeBTMiner, LargePrefixProjection
3
+
4
+ __all__ = [
5
+ "PrefixProjection",
6
+ "HTMiner",
7
+ "LargeHTMiner",
8
+ "BTMiner",
9
+ "LargeBTMiner",
10
+ "LargePrefixProjection",
11
+ ]
@@ -403,71 +403,103 @@ std::cout << " total patterns = "
403
403
  // );
404
404
 
405
405
 
406
+
406
407
  m.def("LargeBTMiner",
407
- [](py::object data,
408
- double minsup,
409
- unsigned int time_limit,
410
- bool preproc,
411
- bool use_dic,
412
- bool verbose,
413
- const std::string &out_file)
414
- {
415
- largebm::time_limit = time_limit;
416
- largebm::pre_pro = preproc;
417
- largebm::use_dic = use_dic;
418
- largebm::use_list = false; // <-- switch into “large” mode
419
- largebm::b_disp = verbose;
420
- largebm::b_write = !out_file.empty();
421
- largebm::out_file = out_file;
422
- largebm::just_build = false;
423
-
424
- // ── Build the inverse‐dictionary here ────────────────────────────
408
+ [](py::object data,
409
+ double minsup,
410
+ unsigned int time_limit,
411
+ bool preproc,
412
+ bool use_dic,
413
+ bool verbose,
414
+ const std::string &out_file)
425
415
  {
426
- std::vector<int> local_inv( largebm::item_dic.size() + 1 );
427
- for (int old = 1; old <= (int)largebm::item_dic.size(); ++old) {
428
- int cid = largebm::item_dic[old - 1];
429
- if (cid > 0)
430
- local_inv[cid] = old;
431
- }
432
- largebm::inv_item_dic = std::move(local_inv);
433
- }
434
- // ─std::cerr << "inv_item_dic size=" << largebm::inv_item_dic.size() << "\n";
435
- for (size_t i = 0; i < largebm::inv_item_dic.size(); ++i) {
436
- //std::cerr << i << "→" << largebm::inv_item_dic[i] << " ";
437
- }
438
- std::cerr << "\n";
416
+ // 0) Set global flags and timers
417
+ largebm::time_limit = time_limit;
418
+ largebm::pre_pro = preproc;
419
+ largebm::use_dic = use_dic;
420
+ largebm::use_list = false; // large‑mode → always MDD
421
+ largebm::b_disp = verbose;
422
+ largebm::b_write = !out_file.empty();
423
+ largebm::out_file = out_file;
424
+ largebm::just_build = false;
425
+
426
+ // 0.1) Clear any leftover data/state from previous runs
427
+ largebm::items.clear();
428
+ largebm::item_dic.clear();
429
+ largebm::inv_item_dic.clear();
430
+ largebm::Tree.clear();
431
+ largebm::DFS.clear();
432
+ largebm::ClearCollected();
433
+
434
+ // 1) Load sequences (either from filename or from Python list)
435
+ if (py::isinstance<py::str>(data)) {
436
+ // ─────────── FILE‑BASED MODE ───────────
437
+ std::string path = data.cast<std::string>();
438
+ if (!largebm::Load_instance(path, minsup))
439
+ throw std::runtime_error("Failed to load file: " + path);
439
440
 
440
- largebm::ClearCollected();
441
- largebm::start_time = std::clock();
441
+ } else {
442
+ // ────────── IN‑MEMORY MODE ──────────
443
+ auto seqs = data.cast<std::vector<std::vector<int>>>();
444
+ largebm::items = std::move(seqs);
445
+ largebm::N = largebm::items.size();
442
446
 
443
- if (py::isinstance<py::str>(data)) {
444
- // load from filename
445
- std::string path = data.cast<std::string>();
446
- if (!largebm::Load_instance(path, minsup))
447
- throw std::runtime_error("Failed to load file: " + path);
448
- }
449
- else {
450
- // load from in‐memory sequences
451
- largebm::items = std::move(data.cast<std::vector<std::vector<int>>>());
452
-
453
- }
447
+ // 1.1) Compute basic DB statistics (M, E, L) and absolute support θ
448
+ int max_id = 0;
449
+ largebm::M = 0;
450
+ largebm::E = 0;
451
+ for (auto &seq : largebm::items) {
452
+ largebm::M = std::max<unsigned int>(largebm::M, static_cast<unsigned int>(seq.size()));
453
+ largebm::E += static_cast<unsigned long long>(seq.size());
454
+ for (int x : seq) max_id = std::max(max_id, std::abs(x));
455
+ }
456
+ largebm::L = static_cast<unsigned int>(max_id);
457
+ largebm::theta = (minsup < 1.0)
458
+ ? static_cast<unsigned long long>(std::ceil(minsup * largebm::N))
459
+ : static_cast<unsigned long long>(minsup);
460
+
461
+ // 1.2) Initialize DFS buffer (size = L)
462
+ largebm::DFS.reserve(largebm::L);
463
+ for (unsigned int i = 0; i < largebm::L; ++i)
464
+ largebm::DFS.emplace_back(-static_cast<int>(i) - 1);
465
+
466
+ // 1.3) Build the MDD “Tree”
467
+ // Insert one dummy root node (item=0, freq=0, anct=0)
468
+ largebm::Tree.emplace_back(0, 0, 0);
469
+ for (auto &seq : largebm::items)
470
+ largebm::Build_MDD(const_cast<std::vector<int>&>(seq));
471
+ }
454
472
 
455
- largebm::Freq_miner();
473
+ // 2) Rebuild inverse‑dictionary from fresh item_dic
474
+ {
475
+ std::vector<int> inv(largebm::item_dic.size() + 1);
476
+ for (int old = 1; old <= static_cast<int>(largebm::item_dic.size()); ++old) {
477
+ int cid = largebm::item_dic[old - 1];
478
+ if (cid > 0) inv[cid] = old;
479
+ }
480
+ largebm::inv_item_dic = std::move(inv);
481
+ }
456
482
 
457
- py::dict out;
458
- out["patterns"] = largebm::GetCollected();
459
- out["time"] = largebm::give_time(std::clock() - largebm::start_time);
460
- return out;
461
- },
462
- py::arg("data"),
463
- py::arg("minsup") = 0.01,
464
- py::arg("time_limit") = 36000,
465
- py::arg("preproc") = false,
466
- py::arg("use_dic") = false,
467
- py::arg("verbose") = false,
468
- py::arg("out_file") = ""
469
- );
483
+ // 3) Start timing and run the miner
484
+ largebm::start_time = std::clock();
485
+ largebm::Freq_miner();
486
+
487
+ // 4) Collect results and elapsed time
488
+ py::dict out;
489
+ out["patterns"] = largebm::GetCollected();
490
+ out["time"] = largebm::give_time(std::clock() - largebm::start_time);
491
+ return out;
492
+ },
493
+ py::arg("data"),
494
+ py::arg("minsup") = 0.01,
495
+ py::arg("time_limit") = 36000,
496
+ py::arg("preproc") = false,
497
+ py::arg("use_dic") = false,
498
+ py::arg("verbose") = false,
499
+ py::arg("out_file") = ""
500
+ );
470
501
 
502
+
471
503
  m.def("LargeHTMiner",
472
504
  [](py::object data,
473
505
  double minsup,
@@ -3,6 +3,8 @@
3
3
  #include "load_inst.hpp"
4
4
  #include <cstdlib>
5
5
  #include <cmath>
6
+ #include <cstddef> // for std::size_t
7
+
6
8
  using namespace std;
7
9
  void Freq_miner();
8
10
  void Out_patt(std::vector<int>& seq, unsigned int freq);
@@ -22,7 +24,8 @@ public:
22
24
 
23
25
  Pattern(vector<int>& _seq, int item) {
24
26
  seq.reserve(_seq.size());
25
- for (int i = 0; i < _seq.size(); ++i)
27
+ for (std::size_t i = 0; i < _seq.size(); ++i)
28
+
26
29
  seq.push_back(_seq[i]);
27
30
  seq.push_back(item);
28
31
  freq = 0;
@@ -106,6 +106,8 @@ bool Load_instance(std::string& items_file, double thresh) {
106
106
  // << " M=" << M << " L=" << L << " E=" << E << std::endl;
107
107
  }
108
108
 
109
+
110
+
109
111
  std::cout << "\nMDD Database built in " << give_time(std::clock() - kk) << " seconds\n\n";
110
112
  std::cout << "Found " << N << " sequence, with max line len " << M
111
113
  << ", and " << L << " items, and " << E << " enteries\n";
@@ -1,5 +1,3 @@
1
- // File: effspm/largebm/src/freq_miner.cpp
2
-
3
1
  #include <vector>
4
2
  #include <algorithm>
5
3
  #include <iostream>
@@ -27,16 +25,25 @@ namespace largebm {
27
25
  Pattern _patt;
28
26
 
29
27
  void Freq_miner() {
28
+ // ─── RESET per‐run state ──────────────────────────────────────
29
+ collected.clear();
30
+ num_patt = 0;
31
+ // Ensure DFS has at least L entries (so DFS[i] is valid for 0..L-1)
32
+ if (static_cast<int>(DFS.size()) < static_cast<int>(L)) {
33
+ DFS.resize(L);
34
+ }
35
+ // ─────────────────────────────────────────────────────────────
36
+
30
37
  std::vector<int> list;
31
38
 
32
39
  if (use_list) {
33
- // List-based routine
40
+ // Listbased routine
34
41
  std::vector<int> empty_pref;
35
42
  Freq_miner_list(items, empty_pref, theta, collected);
36
43
  return;
37
44
  }
38
45
 
39
- // MDD-based initialization
46
+ // MDDbased initialization
40
47
  for (int i = 0; i < static_cast<int>(L); ++i) {
41
48
  if (DFS[i].freq >= theta) {
42
49
  list.push_back(-i - 1);
@@ -301,13 +308,13 @@ namespace largebm {
301
308
  return std::abs(a.first) < std::abs(b.first);
302
309
  });
303
310
 
304
- // 4) depth-first enumerate them
311
+ // 4) depthfirst enumerate them
305
312
  for (auto const& pr : cand) {
306
313
  int item = pr.first;
307
314
  prefix.push_back(item);
308
315
 
309
316
  if (use_dic) {
310
- // “un-compress” each pattern back to original IDs
317
+ // “uncompress” each pattern back to original IDs
311
318
  std::vector<int> unmapped;
312
319
  unmapped.reserve(prefix.size());
313
320
  for (int cid : prefix) {
@@ -1,16 +1,17 @@
1
- // ───── effspm/largebm/src/load_inst.cpp ─────────────────────────────
1
+
2
2
  #include <sstream>
3
3
  #include <algorithm>
4
4
  #include <cmath>
5
5
  #include <ctime>
6
+ #include <iostream> // for std::cout, std::endl
6
7
  #include <fstream>
7
8
  #include <vector>
8
9
  #include <string>
9
10
 
10
- #include "load_inst.hpp" // header with global declarations
11
- #include "build_mdd.hpp" // Arc & global Tree definitions
12
- #include "utility.hpp" // give_time()
13
- #include "freq_miner.hpp" // Pattern class
11
+ #include "load_inst.hpp"
12
+ #include "build_mdd.hpp"
13
+ #include "utility.hpp"
14
+ #include "freq_miner.hpp"
14
15
 
15
16
  namespace largebm {
16
17
 
@@ -29,20 +30,16 @@ std::clock_t start_time = 0;
29
30
 
30
31
  std::vector<int> item_dic;
31
32
  std::vector<Pattern> DFS;
32
- std::vector<std::vector<int>> items; // list‐mode working DB
33
- std::vector<std::vector<int>> collected; // patterns for Python
33
+ std::vector<std::vector<int>> items;
34
+ std::vector<std::vector<int>> collected;
34
35
  std::vector<int> inv_item_dic;
36
+
35
37
  std::string out_file, folder;
36
- void ClearCollected() { collected.clear(); }
37
- const std::vector<std::vector<int>>& GetCollected() { return collected; }
38
38
 
39
39
  // ───────────── helper for list‐mode DB build ─────────────────────
40
- static void Load_items_list(const std::string& fname)
41
- {
40
+ static void Load_items_list(const std::string& fname) {
42
41
  std::ifstream in(fname);
43
- if (!in.good()) {
44
- return;
45
- }
42
+ if (!in.good()) return;
46
43
  std::string line;
47
44
  while (std::getline(in, line)) {
48
45
  std::istringstream iss(line);
@@ -50,39 +47,39 @@ static void Load_items_list(const std::string& fname)
50
47
  int x;
51
48
  while (iss >> x) {
52
49
  int a = std::abs(x);
53
- if (a < 1 || a > static_cast<int>(item_dic.size())) {
54
- continue;
55
- }
56
- if (item_dic[a - 1] == -1) {
57
- continue;
58
- }
50
+ if (a < 1 || a > static_cast<int>(item_dic.size())) continue;
51
+ if (item_dic[a - 1] == -1) continue;
59
52
  seq.push_back(x);
60
53
  }
61
- if (!seq.empty()) {
62
- items.push_back(std::move(seq));
63
- }
54
+ if (!seq.empty()) items.push_back(std::move(seq));
64
55
  }
65
56
  }
66
57
 
67
58
  // ─────────────── main loader ─────────────────────────────────────
68
- bool Load_instance(const std::string& items_file, double minsup)
69
- {
59
+ bool Load_instance(const std::string& items_file, double minsup) {
60
+ // reset state
61
+ N = L = num_nodes = theta = M = E = 0;
70
62
  start_time = std::clock();
71
63
 
72
- if (use_list)
73
- {
74
- if (!Preprocess(items_file, minsup)) {
75
- return false;
76
- }
64
+ DFS.clear();
65
+ Tree.clear();
66
+ items.clear();
67
+ collected.clear();
68
+ item_dic.clear();
69
+ inv_item_dic.clear();
70
+ itmset_exists = false;
71
+
72
+ std::clock_t kk = start_time;
73
+ Tree.emplace_back(0, 0, 0); // root
77
74
 
75
+ if (use_list) {
76
+ if (!Preprocess(items_file, minsup)) return false;
78
77
  inv_item_dic.assign(L + 1, 0);
79
78
  for (int old = 1; old <= static_cast<int>(item_dic.size()); ++old) {
80
79
  int cid = item_dic[old - 1];
81
80
  if (cid > 0) inv_item_dic[cid] = old;
82
81
  }
83
-
84
82
  Load_items_list(items_file);
85
-
86
83
  N = items.size();
87
84
  theta = (minsup < 1.0)
88
85
  ? static_cast<unsigned long long>(std::ceil(minsup * N))
@@ -90,29 +87,39 @@ bool Load_instance(const std::string& items_file, double minsup)
90
87
  return true;
91
88
  }
92
89
 
93
- std::clock_t kk = std::clock();
94
- Tree.clear();
95
- Tree.emplace_back(0, 0, 0); // root node
96
-
90
+ // MDD build mode
97
91
  if (pre_pro) {
98
- if (!Preprocess(items_file, minsup)) {
99
- return false;
100
- }
92
+ if (!Preprocess(items_file, minsup)) return false;
93
+ std::cout << "\nPreprocess done in "
94
+ << give_time(std::clock() - kk)
95
+ << " seconds\n\n";
96
+ DFS.clear();
97
+ DFS.reserve(L);
98
+ for (unsigned int i = 0; i < L; ++i)
99
+ DFS.emplace_back(-int(i) - 1);
100
+ kk = std::clock();
101
101
  Load_items_pre(items_file);
102
102
  } else {
103
+ if (!Preprocess(items_file, 0.0)) return false;
104
+ kk = std::clock();
103
105
  Load_items(items_file);
104
106
  }
105
107
 
108
+ std::cout << "\nMDD Database built in "
109
+ << give_time(std::clock() - kk)
110
+ << " seconds\n\n";
111
+ std::cout << "Found " << N
112
+ << " sequences, with max line len " << M
113
+ << ", and " << L << " items, and " << E << " entries\n";
114
+ std::cout << "Total MDD nodes: " << Tree.size() << std::endl;
115
+
106
116
  return true;
107
117
  }
108
118
 
109
119
  // ────────────── Preprocess (list mode) ───────────────────────────
110
- bool Preprocess(const std::string& inst, double thresh)
111
- {
120
+ bool Preprocess(const std::string& inst, double thresh) {
112
121
  std::ifstream file(inst);
113
- if (!file.good()) {
114
- return false;
115
- }
122
+ if (!file.good()) return false;
116
123
 
117
124
  std::vector<unsigned long long> freq(1000000);
118
125
  std::vector<unsigned long long> counted(1000000, 0);
@@ -154,9 +161,7 @@ bool Preprocess(const std::string& inst, double thresh)
154
161
  // Load_items_pre: MDD insert from file
155
162
  void Load_items_pre(const std::string& inst_name) {
156
163
  std::ifstream file(inst_name);
157
- if (!file.good()) {
158
- return;
159
- }
164
+ if (!file.good()) return;
160
165
 
161
166
  std::string line;
162
167
  while (std::getline(file, line)) {
@@ -166,11 +171,7 @@ void Load_items_pre(const std::string& inst_name) {
166
171
  bool sgn = false;
167
172
  while (word >> itm) {
168
173
  int ditem;
169
- try {
170
- ditem = std::stoi(itm);
171
- } catch (...) {
172
- continue;
173
- }
174
+ try { ditem = std::stoi(itm); } catch (...) { continue; }
174
175
  int absidx = std::abs(ditem) - 1;
175
176
  if (absidx < 0 || absidx >= static_cast<int>(item_dic.size())) {
176
177
  if (!sgn && ditem < 0) sgn = true;
@@ -180,23 +181,13 @@ void Load_items_pre(const std::string& inst_name) {
180
181
  if (!sgn && ditem < 0) sgn = true;
181
182
  continue;
182
183
  }
183
- if (ditem > 0) {
184
- ditem = item_dic[ditem - 1];
185
- itmset_exists = true;
186
- } else {
187
- ditem = -item_dic[-ditem - 1];
188
- }
189
- if (sgn) {
190
- if (ditem > 0) ditem = -ditem;
191
- sgn = false;
192
- }
184
+ if (ditem > 0) { ditem = item_dic[ditem - 1]; itmset_exists = true; }
185
+ else { ditem = -item_dic[-ditem - 1]; }
186
+ if (sgn) { if (ditem > 0) ditem = -ditem; sgn = false; }
193
187
  temp_vec.push_back(ditem);
194
188
  }
195
- if (temp_vec.empty()) {
196
- continue;
197
- }
198
-
199
- N++;
189
+ if (temp_vec.empty()) continue;
190
+ ++N;
200
191
  M = std::max<unsigned>(M, temp_vec.size());
201
192
  Build_MDD(temp_vec);
202
193
  }
@@ -205,9 +196,7 @@ void Load_items_pre(const std::string& inst_name) {
205
196
  // Load_items: full MDD build
206
197
  bool Load_items(const std::string& inst_name) {
207
198
  std::ifstream file(inst_name);
208
- if (!file.good()) {
209
- return false;
210
- }
199
+ if (!file.good()) return false;
211
200
 
212
201
  std::string line;
213
202
  while (std::getline(file, line)) {
@@ -217,19 +206,25 @@ bool Load_items(const std::string& inst_name) {
217
206
  std::vector<int> temp_vec;
218
207
  while (word >> itm) {
219
208
  int ditem;
220
- try {
221
- ditem = std::stoi(itm);
222
- } catch (...) {
223
- continue;
224
- }
209
+ try { ditem = std::stoi(itm); } catch (...) { continue; }
225
210
  if (ditem > 0) itmset_exists = true;
226
- L = std::max(L, static_cast<unsigned>(std::abs(ditem)));
211
+ unsigned int ad = static_cast<unsigned int>(std::abs(ditem));
212
+ if (L < ad) {
213
+ L = ad;
214
+ DFS.reserve(L);
215
+ while (DFS.size() < L)
216
+ DFS.emplace_back(-int(DFS.size()) - 1);
217
+ }
227
218
  temp_vec.push_back(ditem);
228
219
  }
229
- M = std::max<unsigned>(M, temp_vec.size());
220
+ if (temp_vec.size() > M) M = temp_vec.size();
230
221
  Build_MDD(temp_vec);
231
222
  }
232
223
  return true;
233
224
  }
234
225
 
226
+ void ClearCollected() { collected.clear(); }
227
+ const std::vector<std::vector<int>>& GetCollected() { return collected; }
228
+
235
229
  } // namespace largebm
230
+
@@ -11,6 +11,7 @@ std::vector<largehm::CArc> largehm::CTree;
11
11
  #include <iostream>
12
12
  #include <cmath> // for std::abs
13
13
  #include <unordered_map>
14
+ #include <cstdint> // for std::uint64_t
14
15
  #include "load_inst.hpp"
15
16
  #include "freq_miner.hpp"
16
17
  #include "utility.hpp"
@@ -23,8 +24,8 @@ namespace largehm {
23
24
  void Build_MDD(std::vector<int>& items, std::vector<int>& items_lim) {
24
25
  // SANITY CHECK: show sizes before building
25
26
 
26
- std::unordered_map<int, unsigned long long int> ancest_map;
27
- unsigned long long int last_arc = 0;
27
+ std::unordered_map<int, std::uint64_t> ancest_map;
28
+ std::uint64_t last_arc = 0;
28
29
  int itmset = 0;
29
30
 
30
31
  // Insert each prefix item as an arc
@@ -43,9 +44,9 @@ void Build_MDD(std::vector<int>& items, std::vector<int>& items_lim) {
43
44
  // ─── Add_arc: insert a single “item” into the MDD under parent last_arc. ──────
44
45
  //
45
46
  int Add_arc(int item,
46
- unsigned long long int last_arc,
47
+ std::uint64_t last_arc,
47
48
  int& itmset,
48
- std::unordered_map<int, unsigned long long int>& ancest_map)
49
+ std::unordered_map<int, std::uint64_t>& ancest_map)
49
50
  {
50
51
  // Ensure DFS is at least size |item|
51
52
  size_t needed = static_cast<size_t>(std::abs(item));
@@ -67,7 +68,7 @@ int Add_arc(int item,
67
68
  ++itmset;
68
69
  }
69
70
 
70
- unsigned long long int last_sibl = Tree[last_arc].chld;
71
+ std::uint64_t last_sibl = Tree[last_arc].chld;
71
72
  if (last_sibl == 0) {
72
73
  // No child yet: create a new Arc
73
74
  Tree.emplace_back(item, itmset, anct);
@@ -98,7 +99,7 @@ int Add_arc(int item,
98
99
  }
99
100
  ++Tree[last_sibl].freq;
100
101
  ancest_map[std::abs(item)] = last_sibl;
101
- return last_sibl;
102
+ return static_cast<int>(last_sibl);
102
103
  }
103
104
 
104
105
 
@@ -106,8 +107,8 @@ int Add_arc(int item,
106
107
  // ─── Add_vec: attach the “items_lim” vector as children/vertical arcs ─────────
107
108
  //
108
109
  void Add_vec(std::vector<int>& items_lim,
109
- std::unordered_map<int, unsigned long long int>& ancest_map,
110
- unsigned long long int last_arc,
110
+ std::unordered_map<int, std::uint64_t>& ancest_map,
111
+ std::uint64_t last_arc,
111
112
  int itmset)
112
113
  {
113
114
  // Ensure VDFS and DFS are at least size L
@@ -131,7 +132,7 @@ void Add_vec(std::vector<int>& items_lim,
131
132
 
132
133
  // If this node has positive itmset (>0) or no CTree child yet, create first child entry
133
134
  if (Tree[last_arc].itmset > 0 || Tree[last_arc].chld == 0) {
134
- std::vector<unsigned long long int> ancest(L + 1, 0ULL);
135
+ std::vector<std::uint64_t> ancest(L + 1, 0ULL);
135
136
  for (auto& kv : ancest_map) {
136
137
  ancest[kv.first - 1] = kv.second;
137
138
  counted[kv.first - 1] = true;
@@ -1,12 +1,16 @@
1
+
2
+ #include <cstdint>
3
+ #include <vector>
4
+
1
5
  #include <iostream>
2
6
  #include <time.h>
3
- #include <vector> // for std::vector
7
+ // for std::vector
4
8
  #include <cmath> // for std::ceil
5
9
 
6
10
  #include "freq_miner.hpp"
7
11
  #include "build_mdd.hpp"
8
12
  #include "utility.hpp"
9
-
13
+ std::vector<std::uint64_t> ancest_base;
10
14
  namespace largehm {
11
15
 
12
16
  void Out_patt(std::vector<int>& seq, unsigned int freq);
@@ -14,7 +18,7 @@ void Extend_patt(Pattern& _patt);
14
18
  void Mine_vec(unsigned long long int seq_ID,
15
19
  int pos,
16
20
  int num_found,
17
- std::vector<unsigned long long int>& ancest,
21
+
18
22
  std::vector<int>& items,
19
23
  unsigned long long int inod,
20
24
  int sgn);
@@ -27,7 +31,7 @@ std::vector<bool> slist;
27
31
  std::vector<Pattern> pot_patt;
28
32
  std::vector<VPattern> pot_vpatt;
29
33
  std::vector<unsigned long long int> last_strpnt;
30
- std::vector<unsigned long long int> ancest_base;
34
+
31
35
  std::vector<int> DFS_numfound;
32
36
 
33
37
  Pattern _patt;
@@ -354,13 +358,13 @@ void Extend_patt(Pattern& _pattern) {
354
358
  }
355
359
 
356
360
 
357
- void Mine_vec(unsigned long long int seq_ID,
358
- int pos,
359
- int num_found,
360
- std::vector<unsigned long long int>& ancest,
361
- std::vector<int>& items,
362
- unsigned long long int pnt,
363
- int sgn)
361
+ void Mine_vec(std::uint64_t seq_ID,
362
+ int pos,
363
+ int num_found,
364
+ std::vector<std::uint64_t>& ancest,
365
+ std::vector<int>& items,
366
+ std::uint64_t pnt,
367
+ int sgn)
364
368
  {
365
369
  std::vector<bool> found(L + L * (ilist_nempty ? 1 : 0), false);
366
370