PyPI - gsppy - Versions diffs - 2.1.0__py3-none-any.whl → 2.2.0__py3-none-any.whl - Mend

gsppy 2.1.0py3-none-any.whl → 2.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

gsppy/cli.py +46 -18
gsppy/gsp.py +23 -20
gsppy/utils.py +12 -9
{gsppy-2.1.0.dist-info → gsppy-2.2.0.dist-info}/METADATA +80 -24
gsppy-2.2.0.dist-info/RECORD +9 -0
{gsppy-2.1.0.dist-info → gsppy-2.2.0.dist-info}/WHEEL +1 -2
gsppy/tests/__init__.py +0 -0
gsppy/tests/test_cli.py +0 -326
gsppy/tests/test_gsp.py +0 -250
gsppy/tests/test_utils.py +0 -91
gsppy-2.1.0.dist-info/RECORD +0 -14
gsppy-2.1.0.dist-info/top_level.txt +0 -1
{gsppy-2.1.0.dist-info → gsppy-2.2.0.dist-info}/entry_points.txt +0 -0
{gsppy-2.1.0.dist-info → gsppy-2.2.0.dist-info/licenses}/LICENSE +0 -0

gsppy/cli.py CHANGED Viewed

@@ -27,17 +27,37 @@ Key Features:
 This CLI empowers users to perform sequential pattern mining on transactional data efficiently through
 a simple command-line interface.
 """
-import argparse
+import os
 import csv
+import sys
 import json
 import logging
-import os
-from typing import List
+import argparse
+from typing import Dict, List, Tuple
 from gsppy.gsp import GSP
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(message)s",  # Simplified to keep CLI output clean
+    handlers=[logging.StreamHandler(sys.stdout)],
+)
+logger = logging.getLogger(__name__)
-def read_transactions_from_json(file_path: str) -> List[List]:
+def setup_logging(verbose: bool) -> None:
+    """
+    Set the logging level based on the verbosity of the CLI output.
+    :param verbose: Whether to enable verbose logging.
+    """
+    if verbose:
+        logger.setLevel(logging.DEBUG)
+    else:
+        logger.setLevel(logging.INFO)
+def read_transactions_from_json(file_path: str) -> List[List[str]]:
     """
     Read transactions from a JSON file.
@@ -52,9 +72,7 @@ def read_transactions_from_json(file_path: str) -> List[List]:
     """
     try:
         with open(file_path, 'r', encoding='utf-8') as f:
-            transactions = json.load(f)
-            if not isinstance(transactions, list) or not all(isinstance(t, list) for t in transactions):
-                raise ValueError("File should contain a JSON array of transaction lists.")
+            transactions: List[List[str]] = json.load(f)
         return transactions
     except Exception as e:
         msg = f"Error reading transaction data from JSON file '{file_path}': {e}"
@@ -62,7 +80,7 @@ def read_transactions_from_json(file_path: str) -> List[List]:
         raise ValueError(msg) from e
-def read_transactions_from_csv(file_path: str) -> List[List]:
+def read_transactions_from_csv(file_path: str) -> List[List[str]]:
     """
     Read transactions from a CSV file.
@@ -76,7 +94,7 @@ def read_transactions_from_csv(file_path: str) -> List[List]:
         ValueError: If the file cannot be read or contains invalid data.
     """
     try:
-        transactions = []
+        transactions: List[List[str]] = []
         with open(file_path, newline='', encoding='utf-8') as csvfile:
             reader = csv.reader(csvfile)
             for row in reader:
@@ -92,7 +110,7 @@ def read_transactions_from_csv(file_path: str) -> List[List]:
         raise ValueError(msg) from e
-def detect_and_read_file(file_path: str) -> List[List]:
+def detect_and_read_file(file_path: str) -> List[List[str]]:
     """
     Detect file format (CSV or JSON) and read transactions.
@@ -120,7 +138,7 @@ def detect_and_read_file(file_path: str) -> List[List]:
     raise ValueError("Unsupported file format. Please provide a JSON or CSV file.")
-def main():
+def main() -> None:
     """
     Main function to handle CLI input and run the GSP algorithm.
@@ -150,32 +168,42 @@ def main():
         help="Minimum support threshold as a fraction of total transactions (default: 0.2)"
     )
+    # Verbose output argument
+    parser.add_argument(
+        '--verbose',
+        action='store_true',
+        help='Enable verbose output for debugging purposes.'
+    )
     # Parse arguments
     args = parser.parse_args()
+    # Setup logging verbosity
+    setup_logging(args.verbose)
     # Automatically detect and load transactions
     try:
         transactions = detect_and_read_file(args.file)
     except ValueError as e:
-        print(f"Error: {e}")
+        logger.error(f"Error: {e}")
         return
     # Check min_support
     if args.min_support <= 0.0 or args.min_support > 1.0:
-        print("Error: min_support must be in the range (0.0, 1.0].")
+        logger.error("Error: min_support must be in the range (0.0, 1.0].")
         return
     # Initialize and run GSP algorithm
     try:
         gsp = GSP(transactions)
-        patterns = gsp.search(min_support=args.min_support)
-        print("Frequent Patterns Found:")
+        patterns: List[Dict[Tuple[str, ...], int]] = gsp.search(min_support=args.min_support)
+        logger.info("Frequent Patterns Found:")
         for i, level in enumerate(patterns, start=1):
-            print(f"\n{i}-Sequence Patterns:")
+            logger.info(f"\n{i}-Sequence Patterns:")
             for pattern, support in level.items():
-                print(f"Pattern: {pattern}, Support: {support}")
+                logger.info(f"Pattern: {pattern}, Support: {support}")
     except Exception as e:
-        print(f"Error executing GSP algorithm: {e}")
+        logger.error(f"Error executing GSP algorithm: {e}")
 if __name__ == '__main__':

gsppy/gsp.py CHANGED Viewed

@@ -86,9 +86,9 @@ Version:
 """
 import logging
 import multiprocessing as mp
-from collections import Counter
+from typing import Any, Dict, List, Tuple
 from itertools import chain
-from typing import List, Dict, Tuple
+from collections import Counter
 from gsppy.utils import split_into_batches, is_subsequence_in_list, generate_candidates_from_previous
@@ -114,7 +114,7 @@ class GSP:
                         k-sequence for pattern generation.
     """
-    def __init__(self, raw_transactions: List[List]):
+    def __init__(self, raw_transactions: List[List[str]]):
         """
         Initialize the GSP algorithm with raw transactional data.
@@ -132,10 +132,10 @@ class GSP:
             ValueError: If the input transaction dataset is empty, contains
                         fewer than two transactions, or is not properly formatted.
         """
-        self.freq_patterns = []
+        self.freq_patterns: List[Dict[Tuple[str, ...], int]] = []
         self._pre_processing(raw_transactions)
-    def _pre_processing(self, raw_transactions: List[List]):
+    def _pre_processing(self, raw_transactions: List[List[str]]) -> None:
         """
         Validate and preprocess the input transactional dataset.
@@ -167,20 +167,19 @@ class GSP:
             logger.error(msg)
             raise ValueError(msg)
-        if not all(isinstance(item, list) for item in raw_transactions):
-            msg = "The dataset must be a list of transactions."
-            logger.error(msg)
-            raise ValueError(msg)
         logger.info("Pre-processing transactions...")
         self.max_size = max(len(item) for item in raw_transactions)
-        self.transactions = [tuple(transaction) for transaction in raw_transactions]
-        counts = Counter(chain.from_iterable(raw_transactions))
-        self.unique_candidates = [(item,) for item in counts.keys()]
+        self.transactions: List[Tuple[str, ...]] = [tuple(transaction) for transaction in raw_transactions]
+        counts: Counter[str] = Counter(chain.from_iterable(raw_transactions))
+        self.unique_candidates: list[tuple[str, Any]] = [(item,) for item in counts.keys()]
         logger.debug("Unique candidates: %s", self.unique_candidates)
     @staticmethod
-    def _worker_batch(batch: List[Tuple], transactions: List[Tuple], min_support: int) -> List[Tuple[Tuple, int]]:
+    def _worker_batch(
+        batch: List[Tuple[str, ...]],
+        transactions: List[Tuple[str, ...]],
+        min_support: int
+    ) -> List[Tuple[Tuple[str, ...], int]]:
         """
         Evaluate a batch of candidate sequences to compute their support.
@@ -198,14 +197,17 @@ class GSP:
                                      - A candidate sequence.
                                      - The candidate's support count.
         """
-        results = []
+        results: List[Tuple[Tuple[str, ...], int]] = []
         for item in batch:
             frequency = sum(1 for t in transactions if is_subsequence_in_list(item, t))
             if frequency >= min_support:
                 results.append((item, frequency))
         return results
-    def _support(self, items: List[Tuple], min_support: float = 0, batch_size: int = 100) -> Dict[Tuple, int]:
+    def _support(
+        self,
+        items: List[Tuple[str, ...]], min_support: float = 0, batch_size: int = 100
+    ) -> Dict[Tuple[str, ...], int]:
         """
         Calculate support counts for candidate sequences, using parallel processing.
@@ -235,7 +237,7 @@ class GSP:
         # Flatten the list of results and convert to a dictionary
         return {item: freq for batch in batch_results for item, freq in batch}
-    def _print_status(self, run: int, candidates: List[Tuple]):
+    def _print_status(self, run: int, candidates: List[Tuple[str, ...]]) -> None:
         """
         Log progress information for the current GSP iteration.
@@ -249,7 +251,7 @@ class GSP:
         logger.info("Run %d: %d candidates filtered to %d.",
                     run, len(candidates), len(self.freq_patterns[run - 1]))
-    def search(self, min_support: float = 0.2) -> List[Dict[Tuple, int]]:
+    def search(self, min_support: float = 0.2) -> List[Dict[Tuple[str, ...], int]]:
         """
         Execute the Generalized Sequential Pattern (GSP) mining algorithm.
@@ -263,8 +265,9 @@ class GSP:
                                      appears in at least 30% of all transactions.
         Returns:
-            List[Dict[Tuple, int]]: A list where each element corresponds to a k-sequence-level
-                                    dictionary, mapping frequent patterns to their support counts.
+            List[Dict[Tuple[str, ...], int]]: A list of dictionaries containing frequent patterns
+                                              at each k-sequence level, with patterns as keys
+                                              and their support counts as values.
         Raises:
             ValueError: If the minimum support threshold is not in the range `(0.0, 1.0]`.

gsppy/utils.py CHANGED Viewed

@@ -20,34 +20,35 @@ Main functionalities:
 These utilities are designed to support sequence processing tasks and can be
 adapted to various domains, such as data mining, recommendation systems, and sequence analysis.
 """
+from typing import Dict, List, Tuple, Sequence, Generator
 from functools import lru_cache
 from itertools import product
-from typing import List, Tuple, Generator, Dict
-def split_into_batches(items: List[Tuple], batch_size: int) -> Generator[List[Tuple], None, None]:
+def split_into_batches(
+    items: Sequence[Tuple[str, ...]], batch_size: int
+) -> Generator[Sequence[Tuple[str, ...]], None, None]:
     """
     Split the list of items into smaller batches.
     Parameters:
-        items (List[Tuple]): The list of candidate items.
+        items (Sequence[Tuple]): A sequence of items to be batched.
         batch_size (int): The maximum size of each batch.
     Returns:
-        List[List[Tuple]]: A list of batches, where each batch contains a subset of candidate items.
+        Generator[Sequence[Tuple], None, None]: A generator yielding batches of items.
     """
     for i in range(0, len(items), batch_size):
         yield items[i:i + batch_size]
-# Cache the results of the slice comparison function to avoid redundant calculations
 @lru_cache(maxsize=None)
-def is_subsequence_in_list(subsequence: Tuple, sequence: Tuple) -> bool:
+def is_subsequence_in_list(subsequence: Tuple[str, ...], sequence: Tuple[str, ...]) -> bool:
     """
     Check if a subsequence exists within a sequence as a contiguous subsequence.
     Parameters:
-        subsequence: Tuple (tuple): The sequence to search for.
+        subsequence: (tuple): The sequence to search for.
         sequence (tuple): The sequence to search within.
     Returns:
@@ -67,12 +68,14 @@ def is_subsequence_in_list(subsequence: Tuple, sequence: Tuple) -> bool:
     return any(sequence[i:i + len_sub] == subsequence for i in range(len_seq - len_sub + 1))
-def generate_candidates_from_previous(prev_patterns: Dict[Tuple, int]) -> List[Tuple]:
+def generate_candidates_from_previous(
+    prev_patterns: Dict[Tuple[str, ...], int]
+) -> List[Tuple[str, ...]]:
     """
     Generate joined candidates from the previous level's frequent patterns.
     Parameters:
-        prev_patterns (Dict[Tuple, int]): Frequent patterns at the previous level.
+        prev_patterns (Dict[Tuple, int]): A dictionary of frequent patterns from the previous level.
     Returns:
         List[Tuple]: Candidate patterns for the next level.

{gsppy-2.1.0.dist-info → gsppy-2.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,32 +1,59 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: gsppy
-Version: 2.1.0
+Version: 2.2.0
 Summary: GSP (Generalized Sequence Pattern) algorithm in Python
-Home-page: https://github.com/jacksonpradolima/gsp-py
-Author: Jackson Antonio do Prado Lima
-Author-email: jacksonpradolima@gmail.com
-Maintainer: Jackson Antonio do Prado Lima
-Maintainer-email: jacksonpradolima@gmail.com
-License: MIT
-Keywords: GSP,sequential patterns,data analysis,sequence mining
+Project-URL: Homepage, https://github.com/jacksonpradolima/gsp-py
+Author-email: Jackson Antonio do Prado Lima <jacksonpradolima@gmail.com>
+Maintainer-email: Jackson Antonio do Prado Lima <jacksonpradolima@gmail.com>
+License: MIT License
+        Copyright (c) 2024 Jackson Antonio do Prado Lima
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+License-File: LICENSE
+Keywords: GSP,data analysis,sequence mining,sequential patterns
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Natural Language :: English
+Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Classifier: Operating System :: OS Independent
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Intended Audience :: Science/Research
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering :: Information Analysis
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Classifier: Natural Language :: English
 Requires-Python: >=3.8
-Description-Content-Type: text/markdown
-License-File: LICENSE
 Provides-Extra: dev
-Requires-Dist: pylint==3.3.3; extra == "dev"
-Requires-Dist: pytest==8.3.4; extra == "dev"
-Requires-Dist: pytest-benchmark==5.1.0; extra == "dev"
-Requires-Dist: pytest-cov==6.0.0; extra == "dev"
+Requires-Dist: cython==3.0.11; extra == 'dev'
+Requires-Dist: hatch==1.14.0; extra == 'dev'
+Requires-Dist: hatchling==1.27.0; extra == 'dev'
+Requires-Dist: mypy==1.14.0; extra == 'dev'
+Requires-Dist: pylint==3.3.3; extra == 'dev'
+Requires-Dist: pyright==1.1.391; extra == 'dev'
+Requires-Dist: pytest-benchmark==5.1.0; extra == 'dev'
+Requires-Dist: pytest-cov==6.0.0; extra == 'dev'
+Requires-Dist: pytest==8.3.4; extra == 'dev'
+Requires-Dist: ruff==0.8.4; extra == 'dev'
+Requires-Dist: tox==4.23.2; extra == 'dev'
+Description-Content-Type: text/markdown
 [![PyPI License](https://img.shields.io/pypi/l/gsppy.svg?style=flat-square)]()
 ![](https://img.shields.io/badge/python-3.8+-blue.svg)
@@ -123,16 +150,45 @@ pip install gsppy
 ## 🛠️ Developer Installation
-For contributors and developers, GSP-Py provides additional dependencies for development purposes (e.g., testing and
-linting).
+This project uses [Rye](https://github.com/mitsuhiko/rye) for managing dependencies, running scripts, and setting up the environment. Follow these steps to install and set up Rye for this project:
-To install the package along with development dependencies, use:
+#### 1. Install Rye
+Run the following command to install Rye:
 ```bash
-pip install .[dev]
+curl -sSf https://rye.astral.sh/get | bash
 ```
-The `dev` category includes tools such as `pytest`, `pylint`, and others to ensure code quality and maintainability.
+If the `~/.rye/bin` directory is not in your PATH, add the following line to your shell configuration file (e.g., `~/.bashrc`, `~/.zshrc`, etc.):
+```bash
+export PATH="$HOME/.rye/bin:$PATH"
+```
+Reload your shell configuration file:
+```bash
+source ~/.bashrc  # or `source ~/.zshrc`
+```
+#### 2. Set Up the Project Environment
+To configure the project environment and install its dependencies, run:
+```bash
+rye sync
+```
+#### 3. Use Rye Scripts
+Once the environment is set up, you can run the following commands to simplify project tasks:
+- Run tests: `rye run test`
+- Format code: `rye run format`
+- Lint code: `rye run lint`
+- Type-check: `rye run typecheck`
+#### Notes
+- Rye automatically reads dependencies and scripts from the `pyproject.toml` file.
+- No need for `requirements.txt`, as Rye manages all dependencies!
 ## 💡 Usage

gsppy-2.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+gsppy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+gsppy/cli.py,sha256=YxBL341LJzb6EN-RBkhW3o4ZCexOGiQXq_aRovKccA8,6790
+gsppy/gsp.py,sha256=CUCC1W5GGlGbWkC_td0qDfnSJiuzbWoMapR0qciejw8,13800
+gsppy/utils.py,sha256=gOT3USxmC0MrBnSHOQ8avxghWmjQe59hS4jNQ3eiENQ,3363
+gsppy-2.2.0.dist-info/METADATA,sha256=1Y8LcuU7engLWoCWFIKRwRMNsgkAawnpvX6s1BoXP_8,12485
+gsppy-2.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+gsppy-2.2.0.dist-info/entry_points.txt,sha256=smvmcIWk424ARIGKOC_BM42hpT_SptKPcIeqs-8u8lM,41
+gsppy-2.2.0.dist-info/licenses/LICENSE,sha256=co1jy5VZd1wXOPdUC2uk1hn7zsBm6aJNgVmhPOZ47g8,1086
+gsppy-2.2.0.dist-info/RECORD,,

{gsppy-2.1.0.dist-info → gsppy-2.2.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,4 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.6.0)
+Generator: hatchling 1.27.0
 Root-Is-Purelib: true
 Tag: py3-none-any

gsppy/tests/__init__.py DELETED Viewed

File without changes

gsppy/tests/test_cli.py DELETED Viewed

@@ -1,326 +0,0 @@
-"""
-This module contains unit tests for the CLI-related functionality of the `gsppy` package
-and the Generalized Sequential Pattern (GSP) mining algorithm. The tests ensure correctness,
-robustness, and error handling for both file handling and the GSP algorithm implementation.
-The tests include:
-1. Validating file input handling for both JSON and CSV formats.
-2. Ensuring proper error handling for invalid or malformed files (JSON, CSV) and unsupported formats.
-3. Testing exceptions for non-existent files.
-4. Verifying the behavior of the GSP algorithm when given valid inputs and configurations.
-5. Checking for appropriate error handling when invalid parameters (e.g., `min_support`)
-   are provided to the GSP algorithm.
-Key components tested:
-- `detect_and_read_file`: A method to detect the file type (JSON/CSV) and read transactions from it.
-- `GSP.search`: Validates the sequential pattern mining functionality for valid and invalid `min_support` parameters.
-Fixtures are used to create temporary files (valid/invalid JSON and CSV) for reliable testing
-without affecting the file system.
-Pytest is utilized for parametrized testing to improve coverage and reduce redundancy in test cases.
-"""
-import json
-import os
-import runpy
-import sys
-import tempfile
-from unittest.mock import patch
-import pytest
-from gsppy.cli import detect_and_read_file, main
-from gsppy.gsp import GSP
-def test_invalid_json_structure():
-    """
-    Test if a JSON file with an invalid structure raises an error.
-    """
-    # Create an invalid JSON structure that does not adhere to the expected format
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
-        temp_file.write(json.dumps({"invalid": "data"}))
-        temp_file_name = temp_file.name
-    # Attempt to read the invalid JSON file
-    with pytest.raises(ValueError, match="File should contain a JSON array of transaction lists."):
-        detect_and_read_file(temp_file_name)
-    # Cleanup
-    os.unlink(temp_file_name)
-@pytest.fixture
-def valid_json_file():
-    """Fixture to create a valid JSON file."""
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
-        json.dump([["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]], temp_file)
-        temp_file_name = temp_file.name
-    yield temp_file_name
-    os.unlink(temp_file_name)
-@pytest.fixture
-def valid_csv_file():
-    """Fixture to create a valid CSV file."""
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_file:
-        temp_file.write(b"Bread,Milk\nMilk,Diaper\nBread,Diaper,Beer\n")
-        temp_file_name = temp_file.name
-    yield temp_file_name
-    os.unlink(temp_file_name)
-@pytest.fixture
-def invalid_json_file():
-    """Fixture to create an invalid JSON file."""
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as temp_file:
-        temp_file.write(b"{invalid_json: true")  # Malformed JSON
-        temp_file_name = temp_file.name
-    yield temp_file_name
-    os.unlink(temp_file_name)
-@pytest.fixture
-def invalid_csv_file():
-    """Fixture to create an invalid CSV file."""
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_file:
-        temp_file.write(b",,\nBread,,Milk\n")  # Broken format
-        temp_file_name = temp_file.name
-    yield temp_file_name
-    os.unlink(temp_file_name)
-@pytest.fixture
-def unsupported_file():
-    """Fixture to create an unsupported file."""
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
-        temp_file.write(b"This is a plain text file.")
-        temp_file_name = temp_file.name
-    yield temp_file_name
-    os.unlink(temp_file_name)
-def test_valid_json_file(valid_json_file):
-    """Test if a valid JSON file is correctly read."""
-    transactions = detect_and_read_file(valid_json_file)
-    assert transactions == [["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]]
-def test_valid_csv_file(valid_csv_file):
-    """Test if a valid CSV file is correctly read."""
-    transactions = detect_and_read_file(valid_csv_file)
-    assert transactions == [["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]]
-def test_invalid_json_file(invalid_json_file):
-    """Test if an invalid JSON file raises an error."""
-    with pytest.raises(ValueError, match="Error reading transaction data from JSON file"):
-        detect_and_read_file(invalid_json_file)
-def test_invalid_csv_file(invalid_csv_file):
-    """Test if an invalid CSV file raises an error."""
-    with pytest.raises(ValueError, match="Error reading transaction data from CSV file"):
-        detect_and_read_file(invalid_csv_file)
-def test_unsupported_file_format(unsupported_file):
-    """Test if an unsupported file format raises an error."""
-    with pytest.raises(ValueError, match="Unsupported file format"):
-        detect_and_read_file(unsupported_file)
-def test_non_existent_file():
-    """Test if a non-existent file raises an error."""
-    with pytest.raises(ValueError, match="File 'non_existent_file.json' does not exist."):
-        detect_and_read_file("non_existent_file.json")
-@pytest.mark.parametrize("min_support", [-0.1, 1.1])
-def test_invalid_min_support_gsp(min_support):
-    """Test if invalid min_support values raise an error."""
-    transactions = [["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]]
-    gsp = GSP(transactions)
-    with pytest.raises(ValueError):
-        gsp.search(min_support=min_support)
-@pytest.mark.parametrize("min_support", [0.5])
-def test_valid_min_support_gsp(min_support):
-    """Test if valid min_support values work with the GSP algorithm."""
-    transactions = [["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]]
-    gsp = GSP(transactions)
-    patterns = gsp.search(min_support=min_support)
-    assert len(patterns) > 0  # Ensure at least some patterns are found
-    assert patterns[0]  # Ensure frequent patterns are not empty
-def test_main_invalid_json_file(monkeypatch, capfd):
-    """
-    Test `main()` with a JSON file that has an invalid structure.
-    """
-    # Create an invalid JSON file
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
-        temp_file.write(json.dumps({"invalid": "data"}))
-        temp_file_name = temp_file.name
-    # Mock CLI arguments
-    monkeypatch.setattr(
-        'sys.argv', ['main', '--file', temp_file_name, '--min_support', '0.2']
-    )
-    main()
-    # Capture output
-    captured = capfd.readouterr()
-    assert "File should contain a JSON array of transaction lists." in captured.out
-    # Cleanup
-    os.unlink(temp_file_name)
-def test_main_non_existent_file(monkeypatch, capfd):
-    """
-    Test `main()` with a file that does not exist.
-    """
-    # Mock CLI arguments
-    monkeypatch.setattr(
-        'sys.argv', ['main', '--file', 'non_existent.json', '--min_support', '0.2']
-    )
-    main()
-    # Capture output
-    captured = capfd.readouterr()
-    assert "File 'non_existent.json' does not exist." in captured.out
-def test_main_valid_json_file(monkeypatch, capfd):
-    """
-    Test `main()` with a valid JSON file.
-    """
-    # Create a valid JSON file
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
-        json.dump([["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]], temp_file)
-        temp_file_name = temp_file.name
-    # Mock CLI arguments
-    monkeypatch.setattr(
-        'sys.argv', ['main', '--file', temp_file_name, '--min_support', '0.2']
-    )
-    main()
-    # Capture output
-    captured = capfd.readouterr()
-    assert "Frequent Patterns Found:" in captured.out
-    # Cleanup
-    os.unlink(temp_file_name)
-def test_main_invalid_min_support(monkeypatch, capfd):
-    """
-    Test `main()` with an invalid `min_support` value.
-    """
-    # Create a valid JSON file
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
-        json.dump([["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]], temp_file)
-        temp_file_name = temp_file.name
-    # Mock CLI arguments
-    monkeypatch.setattr(
-        'sys.argv', ['main', '--file', temp_file_name, '--min_support', '-1.0']  # Invalid min_support
-    )
-    main()
-    # Capture output
-    captured = capfd.readouterr()
-    assert "Error: min_support must be in the range (0.0, 1.0]." in captured.out
-    # Cleanup
-    os.unlink(temp_file_name)
-def test_main_entry_point(monkeypatch, capfd):
-    """
-    Test the script entry point (`if __name__ == '__main__': main()`).
-    """
-    # Create a valid JSON file
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
-        json.dump([["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]], temp_file)
-        temp_file_name = temp_file.name
-    # Mock CLI arguments - Simulating script call
-    monkeypatch.setattr(
-        'sys.argv', ['gsppy.cli', '--file', temp_file_name, '--min_support', '0.2']
-    )
-    # Remove the module from sys.modules before running it
-    if 'gsppy.cli' in sys.modules:
-        del sys.modules['gsppy.cli']
-    # Use `runpy` to execute the script as if it were run from the command line
-    runpy.run_module('gsppy.cli', run_name='__main__')
-    # Capture the output
-    captured = capfd.readouterr()
-    assert "Frequent Patterns Found:" in captured.out
-    # Cleanup
-    os.unlink(temp_file_name)
-def test_main_edge_case_min_support(monkeypatch, capfd):
-    """
-    Test `main()` with edge-case values for `min_support` (valid and invalid).
-    """
-    # Create a valid JSON
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
-        json.dump([["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]], temp_file)
-        temp_file_name = temp_file.name
-    # Case 1: `min_support` = 1.0 (Valid Edge Case)
-    monkeypatch.setattr(
-        'sys.argv', ['main', '--file', temp_file_name, '--min_support', '1.0']
-    )
-    main()
-    captured = capfd.readouterr()
-    assert "Frequent Patterns Found:" in captured.out
-    # Case 2: `min_support` = -1.0 (Invalid Edge Case)
-    monkeypatch.setattr(
-        'sys.argv', ['main', '--file', temp_file_name, '--min_support', '-1.0']
-    )
-    main()
-    captured = capfd.readouterr()
-    assert "Error: min_support must be in the range (0.0, 1.0]." in captured.out
-    # Cleanup
-    os.unlink(temp_file_name)
-def test_main_gsp_exception(monkeypatch, capfd):
-    """
-    Test `main()` when the GSP algorithm raises an exception.
-    """
-    # Step 1: Create a valid JSON file
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
-        json.dump([["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]], temp_file)
-        temp_file_name = temp_file.name
-    # Step 2: Mock CLI arguments
-    monkeypatch.setattr(
-        'sys.argv', ['main', '--file', temp_file_name, '--min_support', '0.2']
-    )
-    # Step 3: Mock GSP.search to raise an exception
-    with patch('gsppy.gsp.GSP.search', side_effect=Exception("Simulated GSP failure")):
-        main()
-    # Step 4: Capture output and assert the error message
-    captured = capfd.readouterr()
-    assert "Error executing GSP algorithm: Simulated GSP failure" in captured.out
-    # Step 5: Cleanup
-    os.unlink(temp_file_name)

gsppy/tests/test_gsp.py DELETED Viewed

@@ -1,250 +0,0 @@
-"""
-Unit tests for the GSP (Generalized Sequential Pattern) algorithm.
-This module contains tests for various scenarios including edge cases,
-benchmarking, and normal use cases of the GSP algorithm. The tests use
-`pytest` for assertions and include fixtures for reusable data.
-Tests include:
-- Empty transactions.
-- Single transaction.
-- High minimum support filtering.
-- Typical supermarket transactions with known frequent patterns.
-- Randomly generated transactions for flexibility.
-- Large transactions with repetitive items.
-- Partial matches and benchmarking.
-Author: Jackson Antonio do Prado Lima
-Email: jacksonpradolima@gmail.com
-"""
-import random
-import re
-import pytest
-from gsppy.gsp import GSP
-@pytest.fixture
-def supermarket_transactions():
-    """
-    Fixture to provide a dataset representing supermarket transactions.
-    Returns:
-        list: A list of transactions, where each transaction is a list of items.
-    """
-    return [
-        ['Bread', 'Milk'],
-        ['Bread', 'Diaper', 'Beer', 'Eggs'],
-        ['Milk', 'Diaper', 'Beer', 'Coke'],
-        ['Bread', 'Milk', 'Diaper', 'Beer'],
-        ['Bread', 'Milk', 'Diaper', 'Coke']
-    ]
-@pytest.fixture
-def random_transactions():
-    """
-    Fixture to generate a random dataset of transactions.
-    Returns:
-        list: A list of transactions with random items and varying lengths.
-    """
-    return [[random.choice(['A', 'B', 'C', 'D', 'E']) for _ in range(random.randint(2, 10))] for _ in range(100)]
-def test_empty_transactions():
-    """
-    Test the GSP algorithm with an empty dataset.
-    Asserts:
-        - A ValueError is raised indicating that the dataset is empty.
-    """
-    transactions = []
-    with pytest.raises(ValueError, match="Input transactions are empty"):
-        GSP(transactions)
-def test_single_transaction():
-    """
-    Test the GSP algorithm with a single transaction.
-    Asserts:
-        - A ValueError is raised indicating that GSP requires multiple transactions.
-    """
-    transactions = [['A', 'B', 'C']]
-    with pytest.raises(ValueError, match="GSP requires multiple transactions"):
-        GSP(transactions)
-def test_invalid_transaction_format():
-    """
-    Test the GSP algorithm with invalid transaction formats.
-    Asserts:
-        - A ValueError is raised indicating that the transactions must be lists of lists.
-    """
-    invalid_data = ["A", "B"]  # Invalid format: not a list of lists
-    with pytest.raises(ValueError, match="The dataset must be a list of transactions."):
-        GSP(invalid_data)
-@pytest.mark.parametrize(
-    "min_support, expected_error",
-    [
-        (-0.1, re.escape("Minimum support must be in the range (0.0, 1.0]")),
-        (0.0, re.escape("Minimum support must be in the range (0.0, 1.0]")),
-        (1.1, re.escape("Minimum support must be in the range (0.0, 1.0]")),
-    ]
-)
-def test_invalid_min_support(supermarket_transactions, min_support, expected_error):
-    """
-    Test the GSP algorithm with invalid minimum support values.
-    Asserts:
-        - A ValueError is raised if the min_support is outside the valid range.
-    """
-    gsp = GSP(supermarket_transactions)
-    with pytest.raises(ValueError, match=expected_error):
-        gsp.search(min_support=min_support)
-def test_valid_min_support_edge(supermarket_transactions):
-    """
-    Test the GSP algorithm with a valid edge value for min_support.
-    Asserts:
-        - The algorithm runs successfully when min_support is set to 1.0.
-    """
-    gsp = GSP(supermarket_transactions)
-    result = gsp.search(min_support=1.0)  # Only patterns supported by ALL transactions should remain
-    assert not result, "Expected no frequent patterns with min_support = 1.0"
-def test_min_support_valid(supermarket_transactions):
-    """
-    Test the GSP algorithm with a minimum support set just above 0.0.
-    Asserts:
-        - Frequent patterns are generated correctly for a low min_support threshold.
-    """
-    gsp = GSP(supermarket_transactions)
-    result = gsp.search(min_support=0.2)  # At least 1 transaction should support the pattern
-    # All items should appear as 1-item patterns
-    level_1_patterns = {('Bread',), ('Milk',), ('Diaper',), ('Beer',), ('Coke',), ('Eggs',)}
-    result_level_1 = set(result[0].keys())  # Extract patterns from Level 1
-    assert result_level_1 == level_1_patterns, f"Level 1 patterns mismatch. Got {result_level_1}"
-def test_no_frequent_items(supermarket_transactions):
-    """
-    Test the GSP algorithm with a high minimum support value.
-    Asserts:
-        - The result should be an empty list due to filtering out all items.
-    """
-    gsp = GSP(supermarket_transactions)
-    result = gsp.search(min_support=0.9)  # High minimum support
-    assert not result, "High minimum support should filter out all items."
-def test_worker_batch_static_method(supermarket_transactions):
-    """
-    Test the _worker_batch method directly for checkpoint validation.
-    Asserts:
-        - Candidates below the minimum support are filtered out.
-        - Candidates meeting the minimum support are returned with correct counts.
-    """
-    batch = [('Bread',), ('Milk',), ('Diaper',), ('Eggs',)]  # 1-sequence candidates
-    transactions = [tuple(t) for t in supermarket_transactions]
-    min_support = 3  # Absolute support count
-    expected = [(('Bread',), 4), (('Milk',), 4), (('Diaper',), 4)]
-    # Call the '_worker_batch' method
-    # This test accesses `_worker_batch` to test internal functionality
-    results = GSP._worker_batch(batch, transactions, min_support) # pylint: disable=protected-access
-    assert results == expected, f"Expected results {expected}, but got {results}"
-def test_frequent_patterns(supermarket_transactions):
-    """
-    Test the GSP algorithm with supermarket transactions and a realistic minimum support.
-    Asserts:
-        - The frequent patterns should match the expected result.
-    """
-    gsp = GSP(supermarket_transactions)
-    result = gsp.search(min_support=0.3)
-    expected = [
-        {('Bread',): 4, ('Milk',): 4, ('Diaper',): 4, ('Beer',): 3, ('Coke',): 2},
-        {('Bread', 'Milk'): 3, ('Milk', 'Diaper'): 3, ('Diaper', 'Beer'): 3},
-        {('Bread', 'Milk', 'Diaper'): 2, ('Milk', 'Diaper', 'Beer'): 2}
-    ]
-    assert result == expected, "Frequent patterns do not match expected results."
-def test_random_transactions(random_transactions):
-    """
-    Test the GSP algorithm with a random dataset.
-    Asserts:
-        - The result should contain some frequent patterns with a low minimum support.
-    """
-    gsp = GSP(random_transactions)
-    result = gsp.search(min_support=0.1)  # Low support to ensure some patterns emerge
-    assert len(result) > 0, "Random transactions should yield some frequent patterns with low min_support."
-def test_large_transactions():
-    """
-    Test the GSP algorithm with a large single transaction.
-    Asserts:
-        - A ValueError is raised indicating that GSP requires multiple transactions.
-    """
-    transactions = [['A'] * 1000]  # Single transaction with 1000 identical items
-    with pytest.raises(ValueError, match="GSP requires multiple transactions to find meaningful patterns."):
-        GSP(transactions)
-def test_partial_match(supermarket_transactions):
-    """
-    Test the GSP algorithm with additional partial matches.
-    Asserts:
-        - Frequent patterns are generated correctly for the given transactions.
-    """
-    transactions = supermarket_transactions + [['Diaper', 'Milk']]
-    gsp = GSP(transactions)
-    result = gsp.search(min_support=0.3)  # Adjusted minimum support to match more patterns
-    # Debug output to inspect generated frequent patterns
-    print("Generated frequent patterns:", result)
-    # Check for the presence of valid frequent patterns
-    expected_patterns_level_1 = {('Bread',), ('Milk',), ('Diaper',), ('Beer',)}
-    expected_patterns_level_2 = {('Bread', 'Milk'), ('Milk', 'Diaper'), ('Diaper', 'Beer')}
-    # Convert results to sets for easier comparison
-    result_level_1 = set(result[0].keys())
-    assert result_level_1 >= expected_patterns_level_1, f"Level 1 patterns mismatch. Got {result_level_1}"
-    # Add a condition to avoid IndexError for empty results
-    if len(result) > 1:
-        result_level_2 = set(result[1].keys())
-        assert result_level_2 >= expected_patterns_level_2, f"Level 2 patterns mismatch. Got {result_level_2}"
-@pytest.mark.parametrize("min_support", [0.1, 0.2, 0.3, 0.4, 0.5])
-def test_benchmark(benchmark, supermarket_transactions, min_support):
-    """
-    Benchmark the GSP algorithm's performance using the supermarket dataset.
-    Uses:
-        pytest-benchmark: To measure execution time.
-    """
-    gsp = GSP(supermarket_transactions)
-    benchmark(gsp.search, min_support=min_support)

gsppy/tests/test_utils.py DELETED Viewed

@@ -1,91 +0,0 @@
-"""
-Test suite for utility functions in the utils module.
-This module tests the following functions:
-1. `split_into_batches`: Ensures a list of items is properly split into smaller batches for efficient processing.
-2. `is_subsequence_in_list`: Validates the detection of subsequences within a given list.
-3. `generate_joined_candidates`: Tests the logic for generating candidate sequences by joining frequent patterns.
-Each function is tested for standard cases, edge cases, and error handling to ensure robustness.
-"""
-from gsppy.utils import split_into_batches, is_subsequence_in_list, generate_candidates_from_previous
-def test_split_into_batches():
-    """
-    Test the `split_into_batches` utility function.
-    """
-    # Test with exact batches
-    items = [(1,), (2,), (3,), (4,), (5,)]
-    batch_size = 2
-    result = list(split_into_batches(items, batch_size))
-    assert result == [[(1,), (2,)], [(3,), (4,)], [(5,)]], "Failed exact batch split"
-    # Test with a batch size greater than the number of items
-    batch_size = 10
-    result = list(split_into_batches(items, batch_size))
-    assert result == [items], "Failed large batch size handling"
-    # Test with batch size of 1
-    batch_size = 1
-    result = list(split_into_batches(items, batch_size))
-    assert result == [[(1,)], [(2,)], [(3,)], [(4,)], [(5,)]], "Failed batch size of 1"
-    # Test empty input
-    items = []
-    batch_size = 3
-    result = list(split_into_batches(items, batch_size))
-    assert not result, "Failed empty input"
-def test_is_subsequence_in_list():
-    """
-    Test the `is_subsequence_in_list` utility function.
-    """
-    # Test when the subsequence is present
-    assert is_subsequence_in_list((1, 2), (0, 1, 2, 3)), "Failed to find subsequence"
-    assert is_subsequence_in_list((3,), (0, 1, 2, 3)), "Failed single-element subsequence"
-    # Test when the subsequence is not present
-    assert not is_subsequence_in_list((1, 3), (0, 1, 2, 3)), "Incorrectly found non-contiguous subsequence"
-    assert not is_subsequence_in_list((4,), (0, 1, 2, 3)), "Incorrectly found non-existent subsequence"
-    # Test when input sequence or subsequence is empty
-    assert not is_subsequence_in_list((), (0, 1, 2, 3)), "Incorrect positive result for empty subsequence"
-    assert not is_subsequence_in_list((1,), ()), "Incorrect positive result for empty sequence"
-    # Test when subsequence length exceeds sequence
-    assert not is_subsequence_in_list((1, 2, 3, 4), (1, 2, 3)), "Failed to reject long subsequence"
-def test_generate_candidates_from_previous():
-    """
-    Test the `generate_candidates_from_previous` utility function.
-    """
-    # Test if candidates are generated correctly
-    prev_patterns = {
-        (1, 2): 3,
-        (2, 3): 4,
-        (3, 4): 5,
-        (1, 3): 2  # Non-joinable with others as a k-1 match
-    }
-    result = set(generate_candidates_from_previous(prev_patterns))
-    # Expected candidates: joining (1, 2) with (2, 3) and (2, 3) with (3, 4)
-    expected = {(1, 2, 3), (2, 3, 4)}
-    assert expected.issubset(result), f"Missing expected candidates. Got {result}, expected at least {expected}"
-    # Test with no joinable patterns
-    prev_patterns = {
-        (1,): 3,
-        (2,): 4
-    }
-    result = set(generate_candidates_from_previous(prev_patterns))
-    # For single-element disjoint patterns, candidates may still be generated but GSP will filter later
-    assert result == {(1, 2), (2, 1)}, f"Unexpected disjoint candidates. Got {result}"
-    # Test with empty patterns
-    prev_patterns = {}
-    result = set(generate_candidates_from_previous(prev_patterns))
-    assert result == set(), f"Failed empty input handling. Got {result}"

gsppy-2.1.0.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-gsppy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-gsppy/cli.py,sha256=3G20xK79LeIq3jA_UnajEjcuVAebBuZwzefNRnW1o9Q,6090
-gsppy/gsp.py,sha256=wTsVPziYqYuGP0tkeUBwV-nIo1OF83tzv7WF_VVAbZM,13641
-gsppy/utils.py,sha256=K-oIwE6XDi-dsIlFzBlVPa1f2A_2DrUZW4hMiThRPAo,3350
-gsppy/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-gsppy/tests/test_cli.py,sha256=HpzCiESyIA_wsCQh7NMlHbT4k3GQ72TU9J7rEdllP5I,11425
-gsppy/tests/test_gsp.py,sha256=SnWw4hlp-F453zJGnWUHW3A9iqhPyUTYHrmGPH5fTm4,8794
-gsppy/tests/test_utils.py,sha256=Z27IefPYSVKg-dGOmnUO9tvAcd5OQMDyKVq3HAy3XtQ,3697
-gsppy-2.1.0.dist-info/LICENSE,sha256=co1jy5VZd1wXOPdUC2uk1hn7zsBm6aJNgVmhPOZ47g8,1086
-gsppy-2.1.0.dist-info/METADATA,sha256=icNi2oENwovji0hXCbYq0jMc-FmgzQGGGipOw1LncnY,10028
-gsppy-2.1.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-gsppy-2.1.0.dist-info/entry_points.txt,sha256=smvmcIWk424ARIGKOC_BM42hpT_SptKPcIeqs-8u8lM,41
-gsppy-2.1.0.dist-info/top_level.txt,sha256=sovAgdiFF0V3Dz2pPAwAdIkHeR-ShUchyrH3q8qU120,6
-gsppy-2.1.0.dist-info/RECORD,,

gsppy-2.1.0.dist-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- gsppy

{gsppy-2.1.0.dist-info → gsppy-2.2.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{gsppy-2.1.0.dist-info → gsppy-2.2.0.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

gsppy 2.1.0__py3-none-any.whl → 2.2.0__py3-none-any.whl

gsppy 2.1.0py3-none-any.whl → 2.2.0py3-none-any.whl