PyPI - bm-preprocessing - Versions diffs - 1.2.0__tar.gz → 1.3.0__tar.gz - Mend

bm-preprocessing 1.2.0tar.gz → 1.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

{bm_preprocessing-1.2.0 → bm_preprocessing-1.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bm-preprocessing
-Version: 1.2.0
+Version: 1.3.0
 Summary: A package to preprocess text data
 Requires-Python: >=3.8
 Requires-Dist: build>=1.2.2.post1

{bm_preprocessing-1.2.0 → bm_preprocessing-1.3.0}/USAGE.md RENAMED Viewed

@@ -8,52 +8,33 @@ pip install bm-preprocessing
 ---
-## Usage in Python File
+## Usage in Python Scripts
-Create a file `example.py`:
+You can directly import and utilize the predefined Data Mining (DM) and Information Retrieval (IR) configurations. Be sure to import from either `bm_preprocessing.DM` or `bm_preprocessing.IR` depending on the subject.
 ```python
-# Import modules
-from bm_preprocessing.IR import all, all_vis, eval_metrics, ndd, rel
-from bm_preprocessing.DM import adaboost, apriori, bagging, hash, hunts, hunts_test, id3, id3_test, lib_doc, metrics, preprocessing
+# Import cohesive all-in-one modules
+from bm_preprocessing.IR import all, all_vis
 from bm_preprocessing.DM import all, all_vis
-# Print the source code
+# Import DM-specific algorithms
+from bm_preprocessing.DM import adaboost, apriori, bagging, hash, hunts, hunts_test, id3, id3_test, lib_doc, metrics, preprocessing, python_doc
+# Import IR-specific algorithms
+from bm_preprocessing.IR import eval_metrics, ndd, rel
+# Print the source code directly
 print("=== IR All Module ===")
 print(all)
-print("\n=== DM Apriori Module ===")
-print(apriori)
+print("\n=== IR Near Duplicate Documents ===")
+print(ndd)
 print("\n=== DM AdaBoost Module ===")
 print(adaboost)
-print("\n=== DM Bagging Module ===")
-print(bagging)
-print("\n=== DM Hash Module ===")
-print(hash)
-print("\n=== DM Hunts Module ===")
-print(hunts)
-print("\n=== DM Hunts Test Module ===")
-print(hunts_test)
-print("\n=== DM ID3 Module ===")
-print(id3)
-print("\n=== DM ID3 Test Module ===")
-print(id3_test)
-print("\n=== DM Metrics Module ===")
-print(metrics)
-print("\n=== DM Preprocessing Module ===")
-print(preprocessing)
 ```
-Run it:
+Run it locally:
 ```bash
 python example.py
 ```
@@ -62,56 +43,45 @@ python example.py
 ## Usage in Terminal (Interactive Python)
+If you just need quick access to the source code during an exam or practical, spin up the Python REPL:
 ```bash
 python
 ```
-Then in the Python REPL:
+Then drop into the REPL to retrieve the code:
 ```python
+# Returns entire IR source code cohesive module
 >>> from bm_preprocessing.IR import all
 >>> print(all)
-# Prints entire IR/all.py source code
->>> from bm_preprocessing.DM import apriori
->>> print(apriori)
-# Prints entire DM/apriori.py source code
+# Returns Data Mining AdaBoost source code
 >>> from bm_preprocessing.DM import adaboost
 >>> print(adaboost)
-# Prints entire DM/adaboost.py source code
->>> from bm_preprocessing.DM import bagging
->>> print(bagging)
-# Prints entire DM/bagging.py source code
->>> from bm_preprocessing.DM import hunts, hunts_test
->>> print(hunts)
-# Prints entire DM/hunts.py source code
->>> print(hunts_test)
-# Prints entire DM/hunts_test.py source code
->>> from bm_preprocessing.DM import id3, id3_test
->>> print(id3)
-# Prints entire DM/id3.py source code
->>> print(id3_test)
-# Prints entire DM/id3_test.py source code
->>> from bm_preprocessing.DM import metrics
->>> print(metrics)
-# Prints entire DM/metrics.py source code
+# Returns minhash and LSH source code
+>>> from bm_preprocessing.IR import ndd
+>>> print(ndd)
 ```
 ---
 ## One-liner in Terminal
+If you want the terminal to automatically print the file contents for you without entering the REPL, you can execute these one-liners directly in your Bash/PowerShell:
+### Information Retrieval (IR)
 ```bash
 python -c "from bm_preprocessing.IR import all; print(all)"
 python -c "from bm_preprocessing.IR import all_vis; print(all_vis)"
-python -c "from bm_preprocessing.IR import eval_metrics; print(eval_metrics)"
 python -c "from bm_preprocessing.IR import ndd; print(ndd)"
 python -c "from bm_preprocessing.IR import rel; print(rel)"
+python -c "from bm_preprocessing.IR import eval_metrics; print(eval_metrics)"
+```
+### Data Mining (DM)
+```bash
 python -c "from bm_preprocessing.DM import all; print(all)"
 python -c "from bm_preprocessing.DM import all_vis; print(all_vis)"
 python -c "from bm_preprocessing.DM import apriori; print(apriori)"
@@ -123,31 +93,35 @@ python -c "from bm_preprocessing.DM import hunts_test; print(hunts_test)"
 python -c "from bm_preprocessing.DM import id3; print(id3)"
 python -c "from bm_preprocessing.DM import id3_test; print(id3_test)"
 python -c "from bm_preprocessing.DM import metrics; print(metrics)"
-python -c "from bm_preprocessing.DM import lib_doc; print(lib_doc)"
 python -c "from bm_preprocessing.DM import preprocessing; print(preprocessing)"
+python -c "from bm_preprocessing.DM import lib_doc; print(lib_doc)"
+python -c "from bm_preprocessing.DM import python_doc; print(python_doc)"
 ```
 ---
-## Available Modules
+## Available Modules Reference
-| Import | Description |
-|--------|-------------|
-| `from bm_preprocessing.IR import all` | Information Retrieval (MinHash, LSH, Rocchio, Jaccard, VS) |
-| `from bm_preprocessing.IR import all_vis` | IR algorithms with Matplotlib visualizations |
-| `from bm_preprocessing.IR import eval_metrics` | Jaccard, PRF, Compression Ratio, MAP metrics & plots |
+| Import Path | Description |
+|-------------|-------------|
+| **Information Retrieval (IR)** | |
+| `from bm_preprocessing.IR import all` | Cohesive IR File: MinHash, LSH, Rocchio, Jaccard, VS |
+| `from bm_preprocessing.IR import all_vis` | Cohesive IR File + Matplotlib visualizations & Heatmaps |
 | `from bm_preprocessing.IR import ndd` | Near Duplicate Documents (MinHash & LSH) |
 | `from bm_preprocessing.IR import rel` | Relevance feedback & query expansion (Rocchio & LCA) |
-| `from bm_preprocessing.DM import all` | All DM algorithms (Hunt's, ID3, Bagging, AdaBoost, metrics) |
-| `from bm_preprocessing.DM import all_vis` | All DM algorithms + graphviz & full visualization |
+| `from bm_preprocessing.IR import eval_metrics` | Jaccard, PRF, Compression Ratios, MAP metrics & plots |
+| **Data Mining (DM)** | |
+| `from bm_preprocessing.DM import all` | Cohesive DM File: Hunt's, ID3, Bagging, AdaBoost, Metrics |
+| `from bm_preprocessing.DM import all_vis` | Cohesive DM File + Graphviz & Matplotlib visualizations |
 | `from bm_preprocessing.DM import apriori` | Apriori algorithm |
 | `from bm_preprocessing.DM import adaboost` | Bagging & AdaBoost ensemble classifiers |
 | `from bm_preprocessing.DM import bagging` | Bagging ensemble classifier |
 | `from bm_preprocessing.DM import hash` | Hash-based mining |
 | `from bm_preprocessing.DM import hunts` | Hunt's decision tree algorithm |
-| `from bm_preprocessing.DM import hunts_test` | Hunt's decision tree with visualization |
+| `from bm_preprocessing.DM import hunts_test` | Hunt's decision tree with dataset visualization |
 | `from bm_preprocessing.DM import id3` | ID3 decision tree algorithm |
-| `from bm_preprocessing.DM import id3_test` | ID3 decision tree with visualization |
+| `from bm_preprocessing.DM import id3_test` | ID3 decision tree with dataset visualization |
 | `from bm_preprocessing.DM import metrics` | Classification metrics & curves |
-| `from bm_preprocessing.DM import lib_doc` | Pandas/NumPy/Sklearn/DM/IR cheat sheet |
 | `from bm_preprocessing.DM import preprocessing` | Data preprocessing utilities |
+| `from bm_preprocessing.DM import lib_doc` | Pandas, NumPy, Sklearn cheat sheet (DM & IR logic) |
+| `from bm_preprocessing.DM import python_doc` | Python Basics cheat sheet (Sets, Dicts, Comprehensions, etc.) |

{bm_preprocessing-1.2.0 → bm_preprocessing-1.3.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "bm-preprocessing"
-version = "1.2.0"
+version = "1.3.0"
 description = "A package to preprocess text data"
 readme = "README.md"
 requires-python = ">=3.8"

{bm_preprocessing-1.2.0 → bm_preprocessing-1.3.0}/src/bm_preprocessing/DM/__init__.py RENAMED Viewed

@@ -13,6 +13,7 @@ from .id3_test import id3_test
 from .lib_doc import lib_doc
 from .metrics import metrics
 from .preprocessing import preprocessing
+from .python_doc import python_doc
-__all__ = ["adaboost", "all", "all_vis", "apriori", "bagging", "hash", "hunts", "hunts_test", "id3", "id3_test", "lib_doc", "metrics", "preprocessing"]
+__all__ = ["adaboost", "all", "all_vis", "apriori", "bagging", "hash", "hunts", "hunts_test", "id3", "id3_test", "lib_doc", "metrics", "preprocessing", "python_doc"]

bm_preprocessing-1.3.0/src/bm_preprocessing/DM/python_doc.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""Source code loader for DM/python_doc.py"""
+from pathlib import Path
+class SourceCodeModule:
+    """A class that displays source code when printed."""
+    def __init__(self, name: str, source_path: Path):
+        self.name = name
+        self._source_path = source_path
+        self._source_code = None
+    @property
+    def source_code(self) -> str:
+        """Lazily load source code."""
+        if self._source_code is None:
+            self._source_code = self._source_path.read_text(encoding="utf-8")
+        return self._source_code
+    def __repr__(self) -> str:
+        return self.source_code
+    def __str__(self) -> str:
+        return self.source_code
+# Get the path to the source file
+_source_file = Path(__file__).parent / "sources" / "python_doc.py"
+python_doc = SourceCodeModule("DM.python_doc", _source_file)

bm_preprocessing-1.3.0/src/bm_preprocessing/DM/sources/python_doc.py ADDED Viewed

@@ -0,0 +1,174 @@
+"""
+=============================================================================
+PYTHON BASICS CHEAT SHEET
+=============================================================================
+A quick reference guide for core Python concepts, data structures, and features.
+"""
+# =============================================================================
+# 1. LISTS (Mutable, Ordered)
+# =============================================================================
+my_list = [1, 2, 3, 'a', 'b']
+# Operations
+my_list.append(4)           # Add to end: [1, 2, 3, 'a', 'b', 4]
+my_list.insert(0, 0)        # Insert at index: [0, 1, 2, 3, 'a', 'b', 4]
+my_list.extend([5, 6])      # Append multiple: [0, 1, 2, 3, 'a', 'b', 4, 5, 6]
+my_list.pop()               # Remove & return last item (6)
+my_list.pop(1)              # Remove & return item at index 1 (1)
+my_list.remove('a')         # Remove first occurrence of 'a'
+my_list.reverse()           # Reverse in place
+# my_list.sort()            # Sort in place (requires same types)
+# sorted(my_list)           # Return new sorted list
+my_list.clear()             # Empty the list
+count = my_list.count(2)    # Count occurrences
+idx = my_list.index(3)      # Find index of first occurrence
+# Slicing: list[start:stop:step]
+# my_list[1:4] (index 1 to 3), my_list[::-1] (reverse)
+# =============================================================================
+# 2. SETS (Mutable, Unordered, Unique Elements)
+# =============================================================================
+my_set = {1, 2, 3}
+empty_set = set()           # Note: {} creates an empty dict, not a set
+# Operations
+my_set.add(4)               # Add element
+my_set.update([5, 6])       # Add multiple elements
+my_set.remove(6)            # Remove element (raises KeyError if not found)
+my_set.discard(10)          # Remove element (safe, no error if not found)
+my_set.pop()                # Remove & return arbitrary element
+my_set.clear()              # Empty the set
+set_a, set_b = {1, 2}, {2, 3}
+union = set_a | set_b               # {1, 2, 3} (or set_a.union(set_b))
+intersection = set_a & set_b        # {2} (or set_a.intersection(set_b))
+diff = set_a - set_b                # {1} (or set_a.difference(set_b))
+sym_diff = set_a ^ set_b            # {1, 3} (or set_a.symmetric_difference(set_b))
+# =============================================================================
+# 3. TUPLES (Immutable, Ordered)
+# =============================================================================
+my_tuple = (1, 2, 3, 2)
+single_tuple = (1,)         # Comma needed for single-element tuple
+# Operations (Very limited since immutable)
+count = my_tuple.count(2)   # Count occurrences (2)
+idx = my_tuple.index(3)     # Find index of first occurrence (2)
+# Tuples support unpacking: a, b, c, d = my_tuple
+# =============================================================================
+# 4. DICTIONARIES (Mutable, Key-Value Pairs, Unordered before Python 3.7)
+# =============================================================================
+my_dict = {'name': 'Alice', 'age': 25}
+# Operations
+my_dict['city'] = 'NYC'     # Add or update key
+val = my_dict.get('age')    # Safe get (returns None if not found, instead of KeyError)
+val = my_dict.get('x', 0)   # Safe get with default value
+keys = my_dict.keys()       # dict_keys(['name', 'age', 'city'])
+values = my_dict.values()   # dict_values(['Alice', 25, 'NYC'])
+items = my_dict.items()     # dict_items([('name', 'Alice'), ...])
+# Removal
+popped_val = my_dict.pop('age')         # Remove key 'age' and return value
+popped_item = my_dict.popitem()         # Remove & return last key-value pair as tuple
+# del my_dict['name']                   # Delete key
+my_dict.clear()                         # Empty dict
+my_dict.update({'a': 1, 'b': 2})        # Merge / Update with another dict
+# =============================================================================
+# 5. LIST / DICT / SET COMPREHENSIONS
+# =============================================================================
+# List Comprehension: [expression for item in iterable if condition]
+squares = [x**2 for x in range(10) if x % 2 == 0]     # [0, 4, 16, 36, 64]
+# Dict Comprehension: {key_expr: val_expr for item in iterable if condition}
+sq_dict = {x: x**2 for x in range(5)}                 # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
+# Set Comprehension: {expression for item in iterable if condition}
+sq_set = {x**2 for x in [-1, 1, 2]}                   # {1, 4}
+# Generator Expression: (expression for item in iterable if condition)
+gen = (x**2 for x in range(10))                       # Lazy evaluation
+# =============================================================================
+# 6. LAMBDA FUNCTIONS, MAP, FILTER, REDUCE
+# =============================================================================
+# lambda arguments: expression
+add = lambda x, y: x + y
+print(add(2, 3))  # 5
+nums = [1, 2, 3, 4]
+# map: apply function to all items
+mapped = list(map(lambda x: x*2, nums))               # [2, 4, 6, 8]
+# filter: keep items where function returns True
+filtered = list(filter(lambda x: x % 2 == 0, nums))   # [2, 4]
+# reduce (requires functools): cumulative application
+from functools import reduce
+product = reduce(lambda x, y: x * y, nums)            # 24
+# Sort with lambda key
+words = ["apple", "banana", "cherry"]
+words.sort(key=lambda w: len(w))                      # Sort by length
+# =============================================================================
+# 7. CLASSES AND OBJECTS (OOP)
+# =============================================================================
+class Animal:
+    """Base class for animals."""
+    species_count = 0  # Class attribute
+    def __init__(self, name):
+        self.name = name  # Instance attribute
+        Animal.species_count += 1
+    def speak(self):
+        """Instance method"""
+        return "Some sound"
+    @classmethod
+    def get_count(cls):
+        """Class method: takes class as first arg"""
+        return cls.species_count
+    @staticmethod
+    def is_alive():
+        """Static method: no implicit self or cls args"""
+        return True
+# Inheritance
+class Dog(Animal):
+    def __init__(self, name, breed):
+        super().__init__(name)  # Call parent constructor
+        self.breed = breed
+    def speak(self):            # Method Overriding
+        return "Woof!"
+dog = Dog("Buddy", "Golden Retriever")
+print(dog.speak())              # "Woof!"
+print(Animal.get_count())       # 1
+# =============================================================================
+# 8. FILE HANDLING
+# =============================================================================
+# Using 'with' is a best practice, as it automatically closes the file
+# Modes: 'r' (read), 'w' (write, truncates), 'a' (append), 'r+' (read & write), 'b' (binary)
+# Write to file
+with open("example.txt", "w", encoding="utf-8") as file:
+    file.write("Hello World\nLine 2")
+# Read from file
+with open("example.txt", "r", encoding="utf-8") as file:
+    content = file.read()       # Read entire file
+    # file.seek(0)              # Reset cursor to start
+    # lines = file.readlines()  # Read lines into a list
+    # for line in file:         # Iterate line by line (memory efficient)
+    #     print(line.strip())
+# Note: file is automatically closed outside the 'with' block.