bm-preprocessing 0.2.1__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. bm_preprocessing-0.3.0/.gitignore +24 -0
  2. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/PKG-INFO +4 -1
  3. bm_preprocessing-0.3.0/USAGE.md +96 -0
  4. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/pyproject.toml +4 -1
  5. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/src/bm_preprocessing/DM/__init__.py +3 -1
  6. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/src/bm_preprocessing/DM/all.py +5 -5
  7. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/src/bm_preprocessing/DM/apriori.py +5 -5
  8. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/src/bm_preprocessing/DM/hash.py +5 -5
  9. bm_preprocessing-0.3.0/src/bm_preprocessing/DM/hunts.py +30 -0
  10. bm_preprocessing-0.3.0/src/bm_preprocessing/DM/hunts_test.py +30 -0
  11. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/src/bm_preprocessing/DM/preprocessing.py +5 -5
  12. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/src/bm_preprocessing/DM/sources/all.py +16 -14
  13. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/src/bm_preprocessing/DM/sources/apriori.py +18 -12
  14. bm_preprocessing-0.3.0/src/bm_preprocessing/DM/sources/data.csv +11 -0
  15. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/src/bm_preprocessing/DM/sources/hash.py +33 -17
  16. bm_preprocessing-0.3.0/src/bm_preprocessing/DM/sources/hunts.py +96 -0
  17. bm_preprocessing-0.3.0/src/bm_preprocessing/DM/sources/hunts_test.py +101 -0
  18. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/src/bm_preprocessing/DM/sources/preprocessing.py +12 -5
  19. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/src/bm_preprocessing/IR/all.py +5 -5
  20. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/src/bm_preprocessing/IR/sources/all.py +32 -20
  21. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/src/bm_preprocessing/__init__.py +1 -2
  22. bm_preprocessing-0.2.1/.gitignore +0 -10
  23. bm_preprocessing-0.2.1/uv.lock +0 -1100
  24. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/README.md +0 -0
  25. {bm_preprocessing-0.2.1 → bm_preprocessing-0.3.0}/src/bm_preprocessing/IR/__init__.py +0 -0
@@ -0,0 +1,24 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ *.egg
8
+
9
+ # Virtual environment
10
+ .venv/
11
+
12
+ # IDE
13
+ .vscode/
14
+ .idea/
15
+
16
+ # Generated images
17
+ *.png
18
+
19
+ # OS files
20
+ Thumbs.db
21
+ .DS_Store
22
+
23
+ # UV
24
+ uv.lock
@@ -1,9 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bm-preprocessing
3
- Version: 0.2.1
3
+ Version: 0.3.0
4
4
  Summary: A package to preprocess text data
5
5
  Requires-Python: >=3.8
6
6
  Requires-Dist: build>=1.2.2.post1
7
+ Requires-Dist: graphviz>=0.20.3
8
+ Requires-Dist: matplotlib>=3.7.5
9
+ Requires-Dist: pandas>=2.0.3
7
10
  Requires-Dist: twine>=6.1.0
8
11
  Description-Content-Type: text/markdown
9
12
 
@@ -0,0 +1,96 @@
1
+ # bm-preprocessing Usage Guide
2
+
3
+ ## Installation
4
+
5
+ ```bash
6
+ pip install bm-preprocessing
7
+ ```
8
+
9
+ ---
10
+
11
+ ## Usage in Python File
12
+
13
+ Create a file `example.py`:
14
+
15
+ ```python
16
+ # Import modules
17
+ from bm_preprocessing.IR import all
18
+ from bm_preprocessing.DM import apriori, hash, hunts, hunts_test, preprocessing
19
+
20
+ # Print the source code
21
+ print("=== IR All Module ===")
22
+ print(all)
23
+
24
+ print("\n=== DM Apriori Module ===")
25
+ print(apriori)
26
+
27
+ print("\n=== DM Hash Module ===")
28
+ print(hash)
29
+
30
+ print("\n=== DM Hunts Module ===")
31
+ print(hunts)
32
+
33
+ print("\n=== DM Hunts Test Module ===")
34
+ print(hunts_test)
35
+
36
+ print("\n=== DM Preprocessing Module ===")
37
+ print(preprocessing)
38
+ ```
39
+
40
+ Run it:
41
+ ```bash
42
+ python example.py
43
+ ```
44
+
45
+ ---
46
+
47
+ ## Usage in Terminal (Interactive Python)
48
+
49
+ ```bash
50
+ python
51
+ ```
52
+
53
+ Then in the Python REPL:
54
+
55
+ ```python
56
+ >>> from bm_preprocessing.IR import all
57
+ >>> print(all)
58
+ # Prints entire IR/all.py source code
59
+
60
+ >>> from bm_preprocessing.DM import apriori
61
+ >>> print(apriori)
62
+ # Prints entire DM/apriori.py source code
63
+
64
+ >>> from bm_preprocessing.DM import hunts, hunts_test
65
+ >>> print(hunts)
66
+ # Prints entire DM/hunts.py source code
67
+ >>> print(hunts_test)
68
+ # Prints entire DM/hunts_test.py source code
69
+ ```
70
+
71
+ ---
72
+
73
+ ## One-liner in Terminal
74
+
75
+ ```bash
76
+ python -c "from bm_preprocessing.IR import all; print(all)"
77
+ python -c "from bm_preprocessing.DM import apriori; print(apriori)"
78
+ python -c "from bm_preprocessing.DM import hash; print(hash)"
79
+ python -c "from bm_preprocessing.DM import hunts; print(hunts)"
80
+ python -c "from bm_preprocessing.DM import hunts_test; print(hunts_test)"
81
+ python -c "from bm_preprocessing.DM import preprocessing; print(preprocessing)"
82
+ ```
83
+
84
+ ---
85
+
86
+ ## Available Modules
87
+
88
+ | Import | Description |
89
+ |--------|-------------|
90
+ | `from bm_preprocessing.IR import all` | Information Retrieval (BM25, TF-IDF, Boolean) |
91
+ | `from bm_preprocessing.DM import all` | Data Mining algorithms |
92
+ | `from bm_preprocessing.DM import apriori` | Apriori algorithm |
93
+ | `from bm_preprocessing.DM import hash` | Hash-based mining |
94
+ | `from bm_preprocessing.DM import hunts` | Hunt's decision tree algorithm |
95
+ | `from bm_preprocessing.DM import hunts_test` | Hunt's decision tree with visualization |
96
+ | `from bm_preprocessing.DM import preprocessing` | Data preprocessing utilities |
@@ -4,12 +4,15 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "bm-preprocessing"
7
- version = "0.2.1"
7
+ version = "0.3.0"
8
8
  description = "A package to preprocess text data"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
11
11
  dependencies = [
12
12
  "build>=1.2.2.post1",
13
+ "graphviz>=0.20.3",
14
+ "matplotlib>=3.7.5",
15
+ "pandas>=2.0.3",
13
16
  "twine>=6.1.0",
14
17
  ]
15
18
 
@@ -3,6 +3,8 @@
3
3
  from .all import all
4
4
  from .apriori import apriori
5
5
  from .hash import hash
6
+ from .hunts import hunts
7
+ from .hunts_test import hunts_test
6
8
  from .preprocessing import preprocessing
7
9
 
8
- __all__ = ["all", "apriori", "hash", "preprocessing"]
10
+ __all__ = ["all", "apriori", "hash", "hunts", "hunts_test", "preprocessing"]
@@ -5,22 +5,22 @@ from pathlib import Path
5
5
 
6
6
  class SourceCodeModule:
7
7
  """A class that displays source code when printed."""
8
-
8
+
9
9
  def __init__(self, name: str, source_path: Path):
10
10
  self.name = name
11
11
  self._source_path = source_path
12
12
  self._source_code = None
13
-
13
+
14
14
  @property
15
15
  def source_code(self) -> str:
16
16
  """Lazily load source code."""
17
17
  if self._source_code is None:
18
- self._source_code = self._source_path.read_text(encoding='utf-8')
18
+ self._source_code = self._source_path.read_text(encoding="utf-8")
19
19
  return self._source_code
20
-
20
+
21
21
  def __repr__(self) -> str:
22
22
  return self.source_code
23
-
23
+
24
24
  def __str__(self) -> str:
25
25
  return self.source_code
26
26
 
@@ -5,22 +5,22 @@ from pathlib import Path
5
5
 
6
6
  class SourceCodeModule:
7
7
  """A class that displays source code when printed."""
8
-
8
+
9
9
  def __init__(self, name: str, source_path: Path):
10
10
  self.name = name
11
11
  self._source_path = source_path
12
12
  self._source_code = None
13
-
13
+
14
14
  @property
15
15
  def source_code(self) -> str:
16
16
  """Lazily load source code."""
17
17
  if self._source_code is None:
18
- self._source_code = self._source_path.read_text(encoding='utf-8')
18
+ self._source_code = self._source_path.read_text(encoding="utf-8")
19
19
  return self._source_code
20
-
20
+
21
21
  def __repr__(self) -> str:
22
22
  return self.source_code
23
-
23
+
24
24
  def __str__(self) -> str:
25
25
  return self.source_code
26
26
 
@@ -5,22 +5,22 @@ from pathlib import Path
5
5
 
6
6
  class SourceCodeModule:
7
7
  """A class that displays source code when printed."""
8
-
8
+
9
9
  def __init__(self, name: str, source_path: Path):
10
10
  self.name = name
11
11
  self._source_path = source_path
12
12
  self._source_code = None
13
-
13
+
14
14
  @property
15
15
  def source_code(self) -> str:
16
16
  """Lazily load source code."""
17
17
  if self._source_code is None:
18
- self._source_code = self._source_path.read_text(encoding='utf-8')
18
+ self._source_code = self._source_path.read_text(encoding="utf-8")
19
19
  return self._source_code
20
-
20
+
21
21
  def __repr__(self) -> str:
22
22
  return self.source_code
23
-
23
+
24
24
  def __str__(self) -> str:
25
25
  return self.source_code
26
26
 
@@ -0,0 +1,30 @@
1
+ """Source code loader for DM/hunts.py"""
2
+
3
+ from pathlib import Path
4
+
5
+
6
+ class SourceCodeModule:
7
+ """A class that displays source code when printed."""
8
+
9
+ def __init__(self, name: str, source_path: Path):
10
+ self.name = name
11
+ self._source_path = source_path
12
+ self._source_code = None
13
+
14
+ @property
15
+ def source_code(self) -> str:
16
+ """Lazily load source code."""
17
+ if self._source_code is None:
18
+ self._source_code = self._source_path.read_text(encoding="utf-8")
19
+ return self._source_code
20
+
21
+ def __repr__(self) -> str:
22
+ return self.source_code
23
+
24
+ def __str__(self) -> str:
25
+ return self.source_code
26
+
27
+
28
+ # Get the path to the source file
29
+ _source_file = Path(__file__).parent / "sources" / "hunts.py"
30
+ hunts = SourceCodeModule("DM.hunts", _source_file)
@@ -0,0 +1,30 @@
1
+ """Source code loader for DM/hunts_test.py"""
2
+
3
+ from pathlib import Path
4
+
5
+
6
+ class SourceCodeModule:
7
+ """A class that displays source code when printed."""
8
+
9
+ def __init__(self, name: str, source_path: Path):
10
+ self.name = name
11
+ self._source_path = source_path
12
+ self._source_code = None
13
+
14
+ @property
15
+ def source_code(self) -> str:
16
+ """Lazily load source code."""
17
+ if self._source_code is None:
18
+ self._source_code = self._source_path.read_text(encoding="utf-8")
19
+ return self._source_code
20
+
21
+ def __repr__(self) -> str:
22
+ return self.source_code
23
+
24
+ def __str__(self) -> str:
25
+ return self.source_code
26
+
27
+
28
+ # Get the path to the source file
29
+ _source_file = Path(__file__).parent / "sources" / "hunts_test.py"
30
+ hunts_test = SourceCodeModule("DM.hunts_test", _source_file)
@@ -5,22 +5,22 @@ from pathlib import Path
5
5
 
6
6
  class SourceCodeModule:
7
7
  """A class that displays source code when printed."""
8
-
8
+
9
9
  def __init__(self, name: str, source_path: Path):
10
10
  self.name = name
11
11
  self._source_path = source_path
12
12
  self._source_code = None
13
-
13
+
14
14
  @property
15
15
  def source_code(self) -> str:
16
16
  """Lazily load source code."""
17
17
  if self._source_code is None:
18
- self._source_code = self._source_path.read_text(encoding='utf-8')
18
+ self._source_code = self._source_path.read_text(encoding="utf-8")
19
19
  return self._source_code
20
-
20
+
21
21
  def __repr__(self) -> str:
22
22
  return self.source_code
23
-
23
+
24
24
  def __str__(self) -> str:
25
25
  return self.source_code
26
26
 
@@ -1,21 +1,21 @@
1
- from itertools import combinations, chain
2
1
  from collections import defaultdict
2
+ from itertools import chain, combinations
3
3
 
4
4
  min_support = 2
5
5
  min_conf = 0.7
6
6
 
7
7
  transactions = {
8
- "T1": {"I1","I2","I4","I5","I6"},
9
- "T2": {"I2","I4","I6"},
10
- "T3": {"I2","I3"},
11
- "T4": {"I1","I2","I4"},
12
- "T5": {"I1","I2","I3"},
13
- "T6": {"I2","I3"},
14
- "T7": {"I1","I3"},
15
- "T8": {"I1","I2","I3","I5"},
16
- "T9": {"I1","I2","I3"},
17
- "T10": {"I1","I2","I4","I5"},
18
- "T11": {"I5","I6"}
8
+ "T1": {"I1", "I2", "I4", "I5", "I6"},
9
+ "T2": {"I2", "I4", "I6"},
10
+ "T3": {"I2", "I3"},
11
+ "T4": {"I1", "I2", "I4"},
12
+ "T5": {"I1", "I2", "I3"},
13
+ "T6": {"I2", "I3"},
14
+ "T7": {"I1", "I3"},
15
+ "T8": {"I1", "I2", "I3", "I5"},
16
+ "T9": {"I1", "I2", "I3"},
17
+ "T10": {"I1", "I2", "I4", "I5"},
18
+ "T11": {"I5", "I6"},
19
19
  }
20
20
 
21
21
  genL = lambda C: {k: v for k, v in C.items() if v >= min_support}
@@ -85,13 +85,15 @@ for k, v in C.items():
85
85
  "Empty" if not v else "\n".join(f"{set(x)} : {y}" for x, y in v.items()),
86
86
  )
87
87
  for k, v in L.items():
88
- print(f"\nL{k}:\n", "Empty" if not v else "\n".join(f"{set(x)} : {v[x]}" for x in v))
88
+ print(
89
+ f"\nL{k}:\n", "Empty" if not v else "\n".join(f"{set(x)} : {v[x]}" for x in v)
90
+ )
89
91
 
90
92
  print(f"\nAssociation Rules (conf >= {min_conf:.0%}):")
91
93
  for a, c, s, conf in rules:
92
94
  print(f"{set(a)} => {set(c)} | support: {s:.2f}, confidence: {conf:.2f}")
93
95
 
94
- from itertools import combinations, chain
96
+ from itertools import chain, combinations
95
97
 
96
98
  transactions = {
97
99
  "10": {"A", "C", "D"},
@@ -1,11 +1,13 @@
1
1
  from collections import defaultdict
2
2
  from itertools import combinations
3
3
 
4
+
4
5
  def print_table(data, title):
5
6
  print(f"\n--- {title} ---")
6
7
  for itemset, count in data.items():
7
8
  print(f"{itemset}: {count}")
8
9
 
10
+
9
11
  C = {}
10
12
  L = {}
11
13
 
@@ -18,26 +20,26 @@ def generate_candidates(prev_frequent_itemsets, k):
18
20
  union_set = set(itemset1).union(set(itemset2))
19
21
  if len(union_set) == k:
20
22
  candidates.add(tuple(sorted(union_set)))
21
-
23
+
22
24
  return sorted(list(candidates))
23
25
 
24
26
 
25
27
  def count_candidates(candidates, transactions):
26
-
28
+
27
29
  candidate_count = defaultdict(int)
28
-
30
+
29
31
  for candidate in candidates:
30
32
  for transaction in transactions.values():
31
33
  if all(item in transaction for item in candidate):
32
34
  candidate_count[candidate] += 1
33
-
35
+
34
36
  return candidate_count
35
37
 
36
38
 
37
39
  def prune_candidates(candidate_count, min_support, prev_freq_itemsets=None):
38
40
 
39
41
  filtered_candidates = {}
40
-
42
+
41
43
  for itemset, count in candidate_count.items():
42
44
 
43
45
  if count >= min_support:
@@ -45,7 +47,9 @@ def prune_candidates(candidate_count, min_support, prev_freq_itemsets=None):
45
47
  filtered_candidates[itemset] = count
46
48
  else:
47
49
  subsets = combinations(itemset, len(itemset) - 1)
48
- if all(tuple(sorted(subset)) in prev_freq_itemsets for subset in subsets):
50
+ if all(
51
+ tuple(sorted(subset)) in prev_freq_itemsets for subset in subsets
52
+ ):
49
53
  filtered_candidates[itemset] = count
50
54
 
51
55
  return filtered_candidates
@@ -53,12 +57,14 @@ def prune_candidates(candidate_count, min_support, prev_freq_itemsets=None):
53
57
 
54
58
  def apriori(transactions, min_support):
55
59
 
56
- items = sorted(set(item for transaction in transactions.values() for item in transaction))
60
+ items = sorted(
61
+ set(item for transaction in transactions.values() for item in transaction)
62
+ )
57
63
  c1_list = [(item,) for item in items]
58
-
64
+
59
65
  C[1] = count_candidates(c1_list, transactions)
60
66
  L[1] = prune_candidates(C[1], min_support)
61
-
67
+
62
68
  print_table(C[1], "Candidate 1-itemsets (C1)")
63
69
  print_table(L[1], "Frequent 1-itemsets (L1)")
64
70
 
@@ -66,12 +72,12 @@ def apriori(transactions, min_support):
66
72
 
67
73
  while True:
68
74
 
69
- candidates = generate_candidates(L[k-1].keys(), k)
75
+ candidates = generate_candidates(L[k - 1].keys(), k)
70
76
  if not candidates:
71
77
  break
72
78
 
73
79
  C[k] = count_candidates(candidates, transactions)
74
- L[k] = prune_candidates(C[k], min_support, L[k-1].keys())
80
+ L[k] = prune_candidates(C[k], min_support, L[k - 1].keys())
75
81
 
76
82
  if not L[k]:
77
83
  print_table(C[k], f"Candidate {k}-itemsets (C{k})")
@@ -99,7 +105,7 @@ def main():
99
105
  }
100
106
 
101
107
  min_support = 2
102
-
108
+
103
109
  apriori(transactions, min_support)
104
110
 
105
111
 
@@ -0,0 +1,11 @@
1
+ Tid,Home Owner,Marital Status,Annual Income,Default id
2
+ 1,Yes,Single,125K,No
3
+ 2,No,Married,100K,No
4
+ 3,No,Single,70K,No
5
+ 4,Yes,Married,120K,No
6
+ 5,No,Divorced,95K,Yes
7
+ 6,No,Married,60K,No
8
+ 7,Yes,Divorced,220K,No
9
+ 8,No,Single,85K,Yes
10
+ 9,No,Married,75K,No
11
+ 10,No,Single,90K,Yes
@@ -1,14 +1,17 @@
1
1
  from collections import defaultdict
2
2
  from itertools import combinations
3
3
 
4
+
4
5
  def print_table(data, title):
5
6
  print(f"\n--- {title} ---")
6
7
  for itemset, count in data.items():
7
8
  print(f"{itemset}: {count}")
8
9
 
10
+
9
11
  C = {}
10
12
  L = {}
11
13
 
14
+
12
15
  class Bucket:
13
16
  def __init__(self):
14
17
  self.address: int
@@ -24,26 +27,26 @@ def generate_candidates(prev_frequent_itemsets, k):
24
27
  union_set = set(itemset1).union(set(itemset2))
25
28
  if len(union_set) == k:
26
29
  candidates.add(tuple(sorted(union_set)))
27
-
30
+
28
31
  return sorted(list(candidates))
29
32
 
30
33
 
31
34
  def count_candidates(candidates, transactions):
32
-
35
+
33
36
  candidate_count = defaultdict(int)
34
-
37
+
35
38
  for candidate in candidates:
36
39
  for transaction in transactions.values():
37
40
  if all(item in transaction for item in candidate):
38
41
  candidate_count[candidate] += 1
39
-
42
+
40
43
  return candidate_count
41
44
 
42
45
 
43
46
  def prune_candidates(candidate_count, min_support, prev_freq_itemsets=None):
44
47
 
45
48
  filtered_candidates = {}
46
-
49
+
47
50
  for itemset, count in candidate_count.items():
48
51
 
49
52
  if count >= min_support:
@@ -51,7 +54,9 @@ def prune_candidates(candidate_count, min_support, prev_freq_itemsets=None):
51
54
  filtered_candidates[itemset] = count
52
55
  else:
53
56
  subsets = combinations(itemset, len(itemset) - 1)
54
- if all(tuple(sorted(subset)) in prev_freq_itemsets for subset in subsets):
57
+ if all(
58
+ tuple(sorted(subset)) in prev_freq_itemsets for subset in subsets
59
+ ):
55
60
  filtered_candidates[itemset] = count
56
61
 
57
62
  return filtered_candidates
@@ -59,12 +64,14 @@ def prune_candidates(candidate_count, min_support, prev_freq_itemsets=None):
59
64
 
60
65
  def apriori(transactions, min_support):
61
66
 
62
- items = sorted(set(item for transaction in transactions.values() for item in transaction))
67
+ items = sorted(
68
+ set(item for transaction in transactions.values() for item in transaction)
69
+ )
63
70
  c1_list = [(item,) for item in items]
64
-
71
+
65
72
  C[1] = count_candidates(c1_list, transactions)
66
73
  L[1] = prune_candidates(C[1], min_support)
67
-
74
+
68
75
  print_table(C[1], "Candidate 1-itemsets (C1)")
69
76
  print_table(L[1], "Frequent 1-itemsets (L1)")
70
77
 
@@ -77,7 +84,9 @@ def apriori(transactions, min_support):
77
84
 
78
85
  buckets = [Bucket(addr) for addr in range(7)]
79
86
 
80
- items_list = sorted(set(item for transaction in transactions.values() for item in transaction))
87
+ items_list = sorted(
88
+ set(item for transaction in transactions.values() for item in transaction)
89
+ )
81
90
  ranks = {item: idx + 1 for idx, item in enumerate(items_list)}
82
91
 
83
92
  hash_fn = lambda item1, item2: (ranks[item1] * 10 + ranks[item2]) % 7
@@ -91,25 +100,32 @@ def apriori(transactions, min_support):
91
100
 
92
101
  print("\n--- Hash Table Buckets ---")
93
102
  for bucket in buckets:
94
- print(f"Address: {bucket.address}, Count: {bucket.count}, Itemsets: {bucket.itemsets}")
95
-
103
+ print(
104
+ f"Address: {bucket.address}, Count: {bucket.count}, Itemsets: {bucket.itemsets}"
105
+ )
106
+
96
107
  # Filter
97
- L2 = {itemset: bucket.count for bucket in buckets for itemset in bucket.itemsets if bucket.count >= min_support}
108
+ L2 = {
109
+ itemset: bucket.count
110
+ for bucket in buckets
111
+ for itemset in bucket.itemsets
112
+ if bucket.count >= min_support
113
+ }
98
114
  print_table(L2, "Frequent 2-itemsets after Hashing (L2)")
99
115
 
100
- C["2"] = generate_candidates(L[k-1].keys(), k)
116
+ C["2"] = generate_candidates(L[k - 1].keys(), k)
101
117
  L["2"] = L2
102
118
 
103
119
  k = 3
104
120
 
105
121
  while True:
106
122
 
107
- candidates = generate_candidates(L[k-1].keys(), k)
123
+ candidates = generate_candidates(L[k - 1].keys(), k)
108
124
  if not candidates:
109
125
  break
110
126
 
111
127
  C[k] = count_candidates(candidates, transactions)
112
- L[k] = prune_candidates(C[k], min_support, L[k-1].keys())
128
+ L[k] = prune_candidates(C[k], min_support, L[k - 1].keys())
113
129
 
114
130
  if not L[k]:
115
131
  print_table(C[k], f"Candidate {k}-itemsets (C{k})")
@@ -137,7 +153,7 @@ def main():
137
153
  }
138
154
 
139
155
  min_support = 2
140
-
156
+
141
157
  apriori(transactions, min_support)
142
158
 
143
159