gsppy 2.1.0__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gsppy/cli.py CHANGED
@@ -27,17 +27,37 @@ Key Features:
27
27
  This CLI empowers users to perform sequential pattern mining on transactional data efficiently through
28
28
  a simple command-line interface.
29
29
  """
30
- import argparse
30
+ import os
31
31
  import csv
32
+ import sys
32
33
  import json
33
34
  import logging
34
- import os
35
- from typing import List
35
+ import argparse
36
+ from typing import Dict, List, Tuple
36
37
 
37
38
  from gsppy.gsp import GSP
38
39
 
40
+ # Configure logging
41
+ logging.basicConfig(
42
+ level=logging.INFO,
43
+ format="%(message)s", # Simplified to keep CLI output clean
44
+ handlers=[logging.StreamHandler(sys.stdout)],
45
+ )
46
+ logger = logging.getLogger(__name__)
39
47
 
40
- def read_transactions_from_json(file_path: str) -> List[List]:
48
+
49
+ def setup_logging(verbose: bool) -> None:
50
+ """
51
+ Set the logging level based on the verbosity of the CLI output.
52
+ :param verbose: Whether to enable verbose logging.
53
+ """
54
+ if verbose:
55
+ logger.setLevel(logging.DEBUG)
56
+ else:
57
+ logger.setLevel(logging.INFO)
58
+
59
+
60
+ def read_transactions_from_json(file_path: str) -> List[List[str]]:
41
61
  """
42
62
  Read transactions from a JSON file.
43
63
 
@@ -52,9 +72,7 @@ def read_transactions_from_json(file_path: str) -> List[List]:
52
72
  """
53
73
  try:
54
74
  with open(file_path, 'r', encoding='utf-8') as f:
55
- transactions = json.load(f)
56
- if not isinstance(transactions, list) or not all(isinstance(t, list) for t in transactions):
57
- raise ValueError("File should contain a JSON array of transaction lists.")
75
+ transactions: List[List[str]] = json.load(f)
58
76
  return transactions
59
77
  except Exception as e:
60
78
  msg = f"Error reading transaction data from JSON file '{file_path}': {e}"
@@ -62,7 +80,7 @@ def read_transactions_from_json(file_path: str) -> List[List]:
62
80
  raise ValueError(msg) from e
63
81
 
64
82
 
65
- def read_transactions_from_csv(file_path: str) -> List[List]:
83
+ def read_transactions_from_csv(file_path: str) -> List[List[str]]:
66
84
  """
67
85
  Read transactions from a CSV file.
68
86
 
@@ -76,7 +94,7 @@ def read_transactions_from_csv(file_path: str) -> List[List]:
76
94
  ValueError: If the file cannot be read or contains invalid data.
77
95
  """
78
96
  try:
79
- transactions = []
97
+ transactions: List[List[str]] = []
80
98
  with open(file_path, newline='', encoding='utf-8') as csvfile:
81
99
  reader = csv.reader(csvfile)
82
100
  for row in reader:
@@ -92,7 +110,7 @@ def read_transactions_from_csv(file_path: str) -> List[List]:
92
110
  raise ValueError(msg) from e
93
111
 
94
112
 
95
- def detect_and_read_file(file_path: str) -> List[List]:
113
+ def detect_and_read_file(file_path: str) -> List[List[str]]:
96
114
  """
97
115
  Detect file format (CSV or JSON) and read transactions.
98
116
 
@@ -120,7 +138,7 @@ def detect_and_read_file(file_path: str) -> List[List]:
120
138
  raise ValueError("Unsupported file format. Please provide a JSON or CSV file.")
121
139
 
122
140
 
123
- def main():
141
+ def main() -> None:
124
142
  """
125
143
  Main function to handle CLI input and run the GSP algorithm.
126
144
 
@@ -150,32 +168,42 @@ def main():
150
168
  help="Minimum support threshold as a fraction of total transactions (default: 0.2)"
151
169
  )
152
170
 
171
+ # Verbose output argument
172
+ parser.add_argument(
173
+ '--verbose',
174
+ action='store_true',
175
+ help='Enable verbose output for debugging purposes.'
176
+ )
177
+
153
178
  # Parse arguments
154
179
  args = parser.parse_args()
155
180
 
181
+ # Setup logging verbosity
182
+ setup_logging(args.verbose)
183
+
156
184
  # Automatically detect and load transactions
157
185
  try:
158
186
  transactions = detect_and_read_file(args.file)
159
187
  except ValueError as e:
160
- print(f"Error: {e}")
188
+ logger.error(f"Error: {e}")
161
189
  return
162
190
 
163
191
  # Check min_support
164
192
  if args.min_support <= 0.0 or args.min_support > 1.0:
165
- print("Error: min_support must be in the range (0.0, 1.0].")
193
+ logger.error("Error: min_support must be in the range (0.0, 1.0].")
166
194
  return
167
195
 
168
196
  # Initialize and run GSP algorithm
169
197
  try:
170
198
  gsp = GSP(transactions)
171
- patterns = gsp.search(min_support=args.min_support)
172
- print("Frequent Patterns Found:")
199
+ patterns: List[Dict[Tuple[str, ...], int]] = gsp.search(min_support=args.min_support)
200
+ logger.info("Frequent Patterns Found:")
173
201
  for i, level in enumerate(patterns, start=1):
174
- print(f"\n{i}-Sequence Patterns:")
202
+ logger.info(f"\n{i}-Sequence Patterns:")
175
203
  for pattern, support in level.items():
176
- print(f"Pattern: {pattern}, Support: {support}")
204
+ logger.info(f"Pattern: {pattern}, Support: {support}")
177
205
  except Exception as e:
178
- print(f"Error executing GSP algorithm: {e}")
206
+ logger.error(f"Error executing GSP algorithm: {e}")
179
207
 
180
208
 
181
209
  if __name__ == '__main__':
gsppy/gsp.py CHANGED
@@ -86,9 +86,9 @@ Version:
86
86
  """
87
87
  import logging
88
88
  import multiprocessing as mp
89
- from collections import Counter
89
+ from typing import Any, Dict, List, Tuple
90
90
  from itertools import chain
91
- from typing import List, Dict, Tuple
91
+ from collections import Counter
92
92
 
93
93
  from gsppy.utils import split_into_batches, is_subsequence_in_list, generate_candidates_from_previous
94
94
 
@@ -114,7 +114,7 @@ class GSP:
114
114
  k-sequence for pattern generation.
115
115
  """
116
116
 
117
- def __init__(self, raw_transactions: List[List]):
117
+ def __init__(self, raw_transactions: List[List[str]]):
118
118
  """
119
119
  Initialize the GSP algorithm with raw transactional data.
120
120
 
@@ -132,10 +132,10 @@ class GSP:
132
132
  ValueError: If the input transaction dataset is empty, contains
133
133
  fewer than two transactions, or is not properly formatted.
134
134
  """
135
- self.freq_patterns = []
135
+ self.freq_patterns: List[Dict[Tuple[str, ...], int]] = []
136
136
  self._pre_processing(raw_transactions)
137
137
 
138
- def _pre_processing(self, raw_transactions: List[List]):
138
+ def _pre_processing(self, raw_transactions: List[List[str]]) -> None:
139
139
  """
140
140
  Validate and preprocess the input transactional dataset.
141
141
 
@@ -167,20 +167,19 @@ class GSP:
167
167
  logger.error(msg)
168
168
  raise ValueError(msg)
169
169
 
170
- if not all(isinstance(item, list) for item in raw_transactions):
171
- msg = "The dataset must be a list of transactions."
172
- logger.error(msg)
173
- raise ValueError(msg)
174
-
175
170
  logger.info("Pre-processing transactions...")
176
171
  self.max_size = max(len(item) for item in raw_transactions)
177
- self.transactions = [tuple(transaction) for transaction in raw_transactions]
178
- counts = Counter(chain.from_iterable(raw_transactions))
179
- self.unique_candidates = [(item,) for item in counts.keys()]
172
+ self.transactions: List[Tuple[str, ...]] = [tuple(transaction) for transaction in raw_transactions]
173
+ counts: Counter[str] = Counter(chain.from_iterable(raw_transactions))
174
+ self.unique_candidates: list[tuple[str, Any]] = [(item,) for item in counts.keys()]
180
175
  logger.debug("Unique candidates: %s", self.unique_candidates)
181
176
 
182
177
  @staticmethod
183
- def _worker_batch(batch: List[Tuple], transactions: List[Tuple], min_support: int) -> List[Tuple[Tuple, int]]:
178
+ def _worker_batch(
179
+ batch: List[Tuple[str, ...]],
180
+ transactions: List[Tuple[str, ...]],
181
+ min_support: int
182
+ ) -> List[Tuple[Tuple[str, ...], int]]:
184
183
  """
185
184
  Evaluate a batch of candidate sequences to compute their support.
186
185
 
@@ -198,14 +197,17 @@ class GSP:
198
197
  - A candidate sequence.
199
198
  - The candidate's support count.
200
199
  """
201
- results = []
200
+ results: List[Tuple[Tuple[str, ...], int]] = []
202
201
  for item in batch:
203
202
  frequency = sum(1 for t in transactions if is_subsequence_in_list(item, t))
204
203
  if frequency >= min_support:
205
204
  results.append((item, frequency))
206
205
  return results
207
206
 
208
- def _support(self, items: List[Tuple], min_support: float = 0, batch_size: int = 100) -> Dict[Tuple, int]:
207
+ def _support(
208
+ self,
209
+ items: List[Tuple[str, ...]], min_support: float = 0, batch_size: int = 100
210
+ ) -> Dict[Tuple[str, ...], int]:
209
211
  """
210
212
  Calculate support counts for candidate sequences, using parallel processing.
211
213
 
@@ -235,7 +237,7 @@ class GSP:
235
237
  # Flatten the list of results and convert to a dictionary
236
238
  return {item: freq for batch in batch_results for item, freq in batch}
237
239
 
238
- def _print_status(self, run: int, candidates: List[Tuple]):
240
+ def _print_status(self, run: int, candidates: List[Tuple[str, ...]]) -> None:
239
241
  """
240
242
  Log progress information for the current GSP iteration.
241
243
 
@@ -249,7 +251,7 @@ class GSP:
249
251
  logger.info("Run %d: %d candidates filtered to %d.",
250
252
  run, len(candidates), len(self.freq_patterns[run - 1]))
251
253
 
252
- def search(self, min_support: float = 0.2) -> List[Dict[Tuple, int]]:
254
+ def search(self, min_support: float = 0.2) -> List[Dict[Tuple[str, ...], int]]:
253
255
  """
254
256
  Execute the Generalized Sequential Pattern (GSP) mining algorithm.
255
257
 
@@ -263,8 +265,9 @@ class GSP:
263
265
  appears in at least 30% of all transactions.
264
266
 
265
267
  Returns:
266
- List[Dict[Tuple, int]]: A list where each element corresponds to a k-sequence-level
267
- dictionary, mapping frequent patterns to their support counts.
268
+ List[Dict[Tuple[str, ...], int]]: A list of dictionaries containing frequent patterns
269
+ at each k-sequence level, with patterns as keys
270
+ and their support counts as values.
268
271
 
269
272
  Raises:
270
273
  ValueError: If the minimum support threshold is not in the range `(0.0, 1.0]`.
gsppy/utils.py CHANGED
@@ -20,34 +20,35 @@ Main functionalities:
20
20
  These utilities are designed to support sequence processing tasks and can be
21
21
  adapted to various domains, such as data mining, recommendation systems, and sequence analysis.
22
22
  """
23
+ from typing import Dict, List, Tuple, Sequence, Generator
23
24
  from functools import lru_cache
24
25
  from itertools import product
25
- from typing import List, Tuple, Generator, Dict
26
26
 
27
27
 
28
- def split_into_batches(items: List[Tuple], batch_size: int) -> Generator[List[Tuple], None, None]:
28
+ def split_into_batches(
29
+ items: Sequence[Tuple[str, ...]], batch_size: int
30
+ ) -> Generator[Sequence[Tuple[str, ...]], None, None]:
29
31
  """
30
32
  Split the list of items into smaller batches.
31
33
 
32
34
  Parameters:
33
- items (List[Tuple]): The list of candidate items.
35
+ items (Sequence[Tuple]): A sequence of items to be batched.
34
36
  batch_size (int): The maximum size of each batch.
35
37
 
36
38
  Returns:
37
- List[List[Tuple]]: A list of batches, where each batch contains a subset of candidate items.
39
+ Generator[Sequence[Tuple], None, None]: A generator yielding batches of items.
38
40
  """
39
41
  for i in range(0, len(items), batch_size):
40
42
  yield items[i:i + batch_size]
41
43
 
42
44
 
43
- # Cache the results of the slice comparison function to avoid redundant calculations
44
45
  @lru_cache(maxsize=None)
45
- def is_subsequence_in_list(subsequence: Tuple, sequence: Tuple) -> bool:
46
+ def is_subsequence_in_list(subsequence: Tuple[str, ...], sequence: Tuple[str, ...]) -> bool:
46
47
  """
47
48
  Check if a subsequence exists within a sequence as a contiguous subsequence.
48
49
 
49
50
  Parameters:
50
- subsequence: Tuple (tuple): The sequence to search for.
51
+ subsequence: (tuple): The sequence to search for.
51
52
  sequence (tuple): The sequence to search within.
52
53
 
53
54
  Returns:
@@ -67,12 +68,14 @@ def is_subsequence_in_list(subsequence: Tuple, sequence: Tuple) -> bool:
67
68
  return any(sequence[i:i + len_sub] == subsequence for i in range(len_seq - len_sub + 1))
68
69
 
69
70
 
70
- def generate_candidates_from_previous(prev_patterns: Dict[Tuple, int]) -> List[Tuple]:
71
+ def generate_candidates_from_previous(
72
+ prev_patterns: Dict[Tuple[str, ...], int]
73
+ ) -> List[Tuple[str, ...]]:
71
74
  """
72
75
  Generate joined candidates from the previous level's frequent patterns.
73
76
 
74
77
  Parameters:
75
- prev_patterns (Dict[Tuple, int]): Frequent patterns at the previous level.
78
+ prev_patterns (Dict[Tuple, int]): A dictionary of frequent patterns from the previous level.
76
79
 
77
80
  Returns:
78
81
  List[Tuple]: Candidate patterns for the next level.
@@ -1,32 +1,59 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: gsppy
3
- Version: 2.1.0
3
+ Version: 2.2.0
4
4
  Summary: GSP (Generalized Sequence Pattern) algorithm in Python
5
- Home-page: https://github.com/jacksonpradolima/gsp-py
6
- Author: Jackson Antonio do Prado Lima
7
- Author-email: jacksonpradolima@gmail.com
8
- Maintainer: Jackson Antonio do Prado Lima
9
- Maintainer-email: jacksonpradolima@gmail.com
10
- License: MIT
11
- Keywords: GSP,sequential patterns,data analysis,sequence mining
5
+ Project-URL: Homepage, https://github.com/jacksonpradolima/gsp-py
6
+ Author-email: Jackson Antonio do Prado Lima <jacksonpradolima@gmail.com>
7
+ Maintainer-email: Jackson Antonio do Prado Lima <jacksonpradolima@gmail.com>
8
+ License: MIT License
9
+
10
+ Copyright (c) 2024 Jackson Antonio do Prado Lima
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+ License-File: LICENSE
30
+ Keywords: GSP,data analysis,sequence mining,sequential patterns
31
+ Classifier: Intended Audience :: Science/Research
32
+ Classifier: License :: OSI Approved :: MIT License
33
+ Classifier: Natural Language :: English
34
+ Classifier: Operating System :: OS Independent
12
35
  Classifier: Programming Language :: Python :: 3.8
13
36
  Classifier: Programming Language :: Python :: 3.9
14
37
  Classifier: Programming Language :: Python :: 3.10
15
38
  Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Operating System :: OS Independent
17
- Classifier: License :: OSI Approved :: MIT License
18
- Classifier: Intended Audience :: Science/Research
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Programming Language :: Python :: 3.13
19
41
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
20
42
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
- Classifier: Natural Language :: English
22
43
  Requires-Python: >=3.8
23
- Description-Content-Type: text/markdown
24
- License-File: LICENSE
25
44
  Provides-Extra: dev
26
- Requires-Dist: pylint==3.3.3; extra == "dev"
27
- Requires-Dist: pytest==8.3.4; extra == "dev"
28
- Requires-Dist: pytest-benchmark==5.1.0; extra == "dev"
29
- Requires-Dist: pytest-cov==6.0.0; extra == "dev"
45
+ Requires-Dist: cython==3.0.11; extra == 'dev'
46
+ Requires-Dist: hatch==1.14.0; extra == 'dev'
47
+ Requires-Dist: hatchling==1.27.0; extra == 'dev'
48
+ Requires-Dist: mypy==1.14.0; extra == 'dev'
49
+ Requires-Dist: pylint==3.3.3; extra == 'dev'
50
+ Requires-Dist: pyright==1.1.391; extra == 'dev'
51
+ Requires-Dist: pytest-benchmark==5.1.0; extra == 'dev'
52
+ Requires-Dist: pytest-cov==6.0.0; extra == 'dev'
53
+ Requires-Dist: pytest==8.3.4; extra == 'dev'
54
+ Requires-Dist: ruff==0.8.4; extra == 'dev'
55
+ Requires-Dist: tox==4.23.2; extra == 'dev'
56
+ Description-Content-Type: text/markdown
30
57
 
31
58
  [![PyPI License](https://img.shields.io/pypi/l/gsppy.svg?style=flat-square)]()
32
59
  ![](https://img.shields.io/badge/python-3.8+-blue.svg)
@@ -123,16 +150,45 @@ pip install gsppy
123
150
 
124
151
  ## 🛠️ Developer Installation
125
152
 
126
- For contributors and developers, GSP-Py provides additional dependencies for development purposes (e.g., testing and
127
- linting).
153
+ This project uses [Rye](https://github.com/mitsuhiko/rye) for managing dependencies, running scripts, and setting up the environment. Follow these steps to install and set up Rye for this project:
128
154
 
129
- To install the package along with development dependencies, use:
155
+ #### 1. Install Rye
156
+ Run the following command to install Rye:
130
157
 
131
158
  ```bash
132
- pip install .[dev]
159
+ curl -sSf https://rye.astral.sh/get | bash
133
160
  ```
134
161
 
135
- The `dev` category includes tools such as `pytest`, `pylint`, and others to ensure code quality and maintainability.
162
+ If the `~/.rye/bin` directory is not in your PATH, add the following line to your shell configuration file (e.g., `~/.bashrc`, `~/.zshrc`, etc.):
163
+
164
+ ```bash
165
+ export PATH="$HOME/.rye/bin:$PATH"
166
+ ```
167
+
168
+ Reload your shell configuration file:
169
+
170
+ ```bash
171
+ source ~/.bashrc # or `source ~/.zshrc`
172
+ ```
173
+
174
+ #### 2. Set Up the Project Environment
175
+ To configure the project environment and install its dependencies, run:
176
+
177
+ ```bash
178
+ rye sync
179
+ ```
180
+
181
+ #### 3. Use Rye Scripts
182
+ Once the environment is set up, you can run the following commands to simplify project tasks:
183
+
184
+ - Run tests: `rye run test`
185
+ - Format code: `rye run format`
186
+ - Lint code: `rye run lint`
187
+ - Type-check: `rye run typecheck`
188
+
189
+ #### Notes
190
+ - Rye automatically reads dependencies and scripts from the `pyproject.toml` file.
191
+ - No need for `requirements.txt`, as Rye manages all dependencies!
136
192
 
137
193
  ## 💡 Usage
138
194
 
@@ -0,0 +1,9 @@
1
+ gsppy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ gsppy/cli.py,sha256=YxBL341LJzb6EN-RBkhW3o4ZCexOGiQXq_aRovKccA8,6790
3
+ gsppy/gsp.py,sha256=CUCC1W5GGlGbWkC_td0qDfnSJiuzbWoMapR0qciejw8,13800
4
+ gsppy/utils.py,sha256=gOT3USxmC0MrBnSHOQ8avxghWmjQe59hS4jNQ3eiENQ,3363
5
+ gsppy-2.2.0.dist-info/METADATA,sha256=1Y8LcuU7engLWoCWFIKRwRMNsgkAawnpvX6s1BoXP_8,12485
6
+ gsppy-2.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
7
+ gsppy-2.2.0.dist-info/entry_points.txt,sha256=smvmcIWk424ARIGKOC_BM42hpT_SptKPcIeqs-8u8lM,41
8
+ gsppy-2.2.0.dist-info/licenses/LICENSE,sha256=co1jy5VZd1wXOPdUC2uk1hn7zsBm6aJNgVmhPOZ47g8,1086
9
+ gsppy-2.2.0.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
gsppy/tests/__init__.py DELETED
File without changes
gsppy/tests/test_cli.py DELETED
@@ -1,326 +0,0 @@
1
- """
2
- This module contains unit tests for the CLI-related functionality of the `gsppy` package
3
- and the Generalized Sequential Pattern (GSP) mining algorithm. The tests ensure correctness,
4
- robustness, and error handling for both file handling and the GSP algorithm implementation.
5
-
6
- The tests include:
7
- 1. Validating file input handling for both JSON and CSV formats.
8
- 2. Ensuring proper error handling for invalid or malformed files (JSON, CSV) and unsupported formats.
9
- 3. Testing exceptions for non-existent files.
10
- 4. Verifying the behavior of the GSP algorithm when given valid inputs and configurations.
11
- 5. Checking for appropriate error handling when invalid parameters (e.g., `min_support`)
12
- are provided to the GSP algorithm.
13
-
14
- Key components tested:
15
- - `detect_and_read_file`: A method to detect the file type (JSON/CSV) and read transactions from it.
16
- - `GSP.search`: Validates the sequential pattern mining functionality for valid and invalid `min_support` parameters.
17
-
18
- Fixtures are used to create temporary files (valid/invalid JSON and CSV) for reliable testing
19
- without affecting the file system.
20
- Pytest is utilized for parametrized testing to improve coverage and reduce redundancy in test cases.
21
- """
22
- import json
23
- import os
24
- import runpy
25
- import sys
26
- import tempfile
27
- from unittest.mock import patch
28
-
29
- import pytest
30
-
31
- from gsppy.cli import detect_and_read_file, main
32
- from gsppy.gsp import GSP
33
-
34
-
35
- def test_invalid_json_structure():
36
- """
37
- Test if a JSON file with an invalid structure raises an error.
38
- """
39
- # Create an invalid JSON structure that does not adhere to the expected format
40
- with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
41
- temp_file.write(json.dumps({"invalid": "data"}))
42
- temp_file_name = temp_file.name
43
-
44
- # Attempt to read the invalid JSON file
45
- with pytest.raises(ValueError, match="File should contain a JSON array of transaction lists."):
46
- detect_and_read_file(temp_file_name)
47
-
48
- # Cleanup
49
- os.unlink(temp_file_name)
50
-
51
-
52
- @pytest.fixture
53
- def valid_json_file():
54
- """Fixture to create a valid JSON file."""
55
- with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
56
- json.dump([["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]], temp_file)
57
- temp_file_name = temp_file.name
58
- yield temp_file_name
59
- os.unlink(temp_file_name)
60
-
61
-
62
- @pytest.fixture
63
- def valid_csv_file():
64
- """Fixture to create a valid CSV file."""
65
- with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_file:
66
- temp_file.write(b"Bread,Milk\nMilk,Diaper\nBread,Diaper,Beer\n")
67
- temp_file_name = temp_file.name
68
- yield temp_file_name
69
- os.unlink(temp_file_name)
70
-
71
-
72
- @pytest.fixture
73
- def invalid_json_file():
74
- """Fixture to create an invalid JSON file."""
75
- with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as temp_file:
76
- temp_file.write(b"{invalid_json: true") # Malformed JSON
77
- temp_file_name = temp_file.name
78
- yield temp_file_name
79
- os.unlink(temp_file_name)
80
-
81
-
82
- @pytest.fixture
83
- def invalid_csv_file():
84
- """Fixture to create an invalid CSV file."""
85
- with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_file:
86
- temp_file.write(b",,\nBread,,Milk\n") # Broken format
87
- temp_file_name = temp_file.name
88
- yield temp_file_name
89
- os.unlink(temp_file_name)
90
-
91
-
92
- @pytest.fixture
93
- def unsupported_file():
94
- """Fixture to create an unsupported file."""
95
- with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
96
- temp_file.write(b"This is a plain text file.")
97
- temp_file_name = temp_file.name
98
- yield temp_file_name
99
- os.unlink(temp_file_name)
100
-
101
-
102
- def test_valid_json_file(valid_json_file):
103
- """Test if a valid JSON file is correctly read."""
104
- transactions = detect_and_read_file(valid_json_file)
105
- assert transactions == [["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]]
106
-
107
-
108
- def test_valid_csv_file(valid_csv_file):
109
- """Test if a valid CSV file is correctly read."""
110
- transactions = detect_and_read_file(valid_csv_file)
111
- assert transactions == [["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]]
112
-
113
-
114
- def test_invalid_json_file(invalid_json_file):
115
- """Test if an invalid JSON file raises an error."""
116
- with pytest.raises(ValueError, match="Error reading transaction data from JSON file"):
117
- detect_and_read_file(invalid_json_file)
118
-
119
-
120
- def test_invalid_csv_file(invalid_csv_file):
121
- """Test if an invalid CSV file raises an error."""
122
- with pytest.raises(ValueError, match="Error reading transaction data from CSV file"):
123
- detect_and_read_file(invalid_csv_file)
124
-
125
-
126
- def test_unsupported_file_format(unsupported_file):
127
- """Test if an unsupported file format raises an error."""
128
- with pytest.raises(ValueError, match="Unsupported file format"):
129
- detect_and_read_file(unsupported_file)
130
-
131
-
132
- def test_non_existent_file():
133
- """Test if a non-existent file raises an error."""
134
- with pytest.raises(ValueError, match="File 'non_existent_file.json' does not exist."):
135
- detect_and_read_file("non_existent_file.json")
136
-
137
-
138
- @pytest.mark.parametrize("min_support", [-0.1, 1.1])
139
- def test_invalid_min_support_gsp(min_support):
140
- """Test if invalid min_support values raise an error."""
141
- transactions = [["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]]
142
- gsp = GSP(transactions)
143
- with pytest.raises(ValueError):
144
- gsp.search(min_support=min_support)
145
-
146
-
147
- @pytest.mark.parametrize("min_support", [0.5])
148
- def test_valid_min_support_gsp(min_support):
149
- """Test if valid min_support values work with the GSP algorithm."""
150
- transactions = [["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]]
151
- gsp = GSP(transactions)
152
- patterns = gsp.search(min_support=min_support)
153
- assert len(patterns) > 0 # Ensure at least some patterns are found
154
- assert patterns[0] # Ensure frequent patterns are not empty
155
-
156
-
157
- def test_main_invalid_json_file(monkeypatch, capfd):
158
- """
159
- Test `main()` with a JSON file that has an invalid structure.
160
- """
161
- # Create an invalid JSON file
162
- with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
163
- temp_file.write(json.dumps({"invalid": "data"}))
164
- temp_file_name = temp_file.name
165
-
166
- # Mock CLI arguments
167
- monkeypatch.setattr(
168
- 'sys.argv', ['main', '--file', temp_file_name, '--min_support', '0.2']
169
- )
170
-
171
- main()
172
-
173
- # Capture output
174
- captured = capfd.readouterr()
175
- assert "File should contain a JSON array of transaction lists." in captured.out
176
-
177
- # Cleanup
178
- os.unlink(temp_file_name)
179
-
180
-
181
- def test_main_non_existent_file(monkeypatch, capfd):
182
- """
183
- Test `main()` with a file that does not exist.
184
- """
185
- # Mock CLI arguments
186
- monkeypatch.setattr(
187
- 'sys.argv', ['main', '--file', 'non_existent.json', '--min_support', '0.2']
188
- )
189
-
190
- main()
191
-
192
- # Capture output
193
- captured = capfd.readouterr()
194
- assert "File 'non_existent.json' does not exist." in captured.out
195
-
196
-
197
- def test_main_valid_json_file(monkeypatch, capfd):
198
- """
199
- Test `main()` with a valid JSON file.
200
- """
201
- # Create a valid JSON file
202
- with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
203
- json.dump([["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]], temp_file)
204
- temp_file_name = temp_file.name
205
-
206
- # Mock CLI arguments
207
- monkeypatch.setattr(
208
- 'sys.argv', ['main', '--file', temp_file_name, '--min_support', '0.2']
209
- )
210
-
211
- main()
212
-
213
- # Capture output
214
- captured = capfd.readouterr()
215
- assert "Frequent Patterns Found:" in captured.out
216
-
217
- # Cleanup
218
- os.unlink(temp_file_name)
219
-
220
-
221
- def test_main_invalid_min_support(monkeypatch, capfd):
222
- """
223
- Test `main()` with an invalid `min_support` value.
224
- """
225
- # Create a valid JSON file
226
- with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
227
- json.dump([["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]], temp_file)
228
- temp_file_name = temp_file.name
229
-
230
- # Mock CLI arguments
231
- monkeypatch.setattr(
232
- 'sys.argv', ['main', '--file', temp_file_name, '--min_support', '-1.0'] # Invalid min_support
233
- )
234
-
235
- main()
236
-
237
- # Capture output
238
- captured = capfd.readouterr()
239
- assert "Error: min_support must be in the range (0.0, 1.0]." in captured.out
240
-
241
- # Cleanup
242
- os.unlink(temp_file_name)
243
-
244
-
245
- def test_main_entry_point(monkeypatch, capfd):
246
- """
247
- Test the script entry point (`if __name__ == '__main__': main()`).
248
- """
249
- # Create a valid JSON file
250
- with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
251
- json.dump([["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]], temp_file)
252
- temp_file_name = temp_file.name
253
-
254
- # Mock CLI arguments - Simulating script call
255
- monkeypatch.setattr(
256
- 'sys.argv', ['gsppy.cli', '--file', temp_file_name, '--min_support', '0.2']
257
- )
258
-
259
- # Remove the module from sys.modules before running it
260
- if 'gsppy.cli' in sys.modules:
261
- del sys.modules['gsppy.cli']
262
-
263
- # Use `runpy` to execute the script as if it were run from the command line
264
- runpy.run_module('gsppy.cli', run_name='__main__')
265
-
266
- # Capture the output
267
- captured = capfd.readouterr()
268
- assert "Frequent Patterns Found:" in captured.out
269
-
270
- # Cleanup
271
- os.unlink(temp_file_name)
272
-
273
-
274
- def test_main_edge_case_min_support(monkeypatch, capfd):
275
- """
276
- Test `main()` with edge-case values for `min_support` (valid and invalid).
277
- """
278
- # Create a valid JSON
279
- with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
280
- json.dump([["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]], temp_file)
281
- temp_file_name = temp_file.name
282
-
283
- # Case 1: `min_support` = 1.0 (Valid Edge Case)
284
- monkeypatch.setattr(
285
- 'sys.argv', ['main', '--file', temp_file_name, '--min_support', '1.0']
286
- )
287
- main()
288
- captured = capfd.readouterr()
289
- assert "Frequent Patterns Found:" in captured.out
290
-
291
- # Case 2: `min_support` = -1.0 (Invalid Edge Case)
292
- monkeypatch.setattr(
293
- 'sys.argv', ['main', '--file', temp_file_name, '--min_support', '-1.0']
294
- )
295
- main()
296
- captured = capfd.readouterr()
297
- assert "Error: min_support must be in the range (0.0, 1.0]." in captured.out
298
-
299
- # Cleanup
300
- os.unlink(temp_file_name)
301
-
302
-
303
- def test_main_gsp_exception(monkeypatch, capfd):
304
- """
305
- Test `main()` when the GSP algorithm raises an exception.
306
- """
307
- # Step 1: Create a valid JSON file
308
- with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as temp_file:
309
- json.dump([["Bread", "Milk"], ["Milk", "Diaper"], ["Bread", "Diaper", "Beer"]], temp_file)
310
- temp_file_name = temp_file.name
311
-
312
- # Step 2: Mock CLI arguments
313
- monkeypatch.setattr(
314
- 'sys.argv', ['main', '--file', temp_file_name, '--min_support', '0.2']
315
- )
316
-
317
- # Step 3: Mock GSP.search to raise an exception
318
- with patch('gsppy.gsp.GSP.search', side_effect=Exception("Simulated GSP failure")):
319
- main()
320
-
321
- # Step 4: Capture output and assert the error message
322
- captured = capfd.readouterr()
323
- assert "Error executing GSP algorithm: Simulated GSP failure" in captured.out
324
-
325
- # Step 5: Cleanup
326
- os.unlink(temp_file_name)
gsppy/tests/test_gsp.py DELETED
@@ -1,250 +0,0 @@
1
- """
2
- Unit tests for the GSP (Generalized Sequential Pattern) algorithm.
3
-
4
- This module contains tests for various scenarios including edge cases,
5
- benchmarking, and normal use cases of the GSP algorithm. The tests use
6
- `pytest` for assertions and include fixtures for reusable data.
7
-
8
- Tests include:
9
- - Empty transactions.
10
- - Single transaction.
11
- - High minimum support filtering.
12
- - Typical supermarket transactions with known frequent patterns.
13
- - Randomly generated transactions for flexibility.
14
- - Large transactions with repetitive items.
15
- - Partial matches and benchmarking.
16
-
17
- Author: Jackson Antonio do Prado Lima
18
- Email: jacksonpradolima@gmail.com
19
- """
20
- import random
21
- import re
22
-
23
- import pytest
24
-
25
- from gsppy.gsp import GSP
26
-
27
-
28
- @pytest.fixture
29
- def supermarket_transactions():
30
- """
31
- Fixture to provide a dataset representing supermarket transactions.
32
-
33
- Returns:
34
- list: A list of transactions, where each transaction is a list of items.
35
- """
36
- return [
37
- ['Bread', 'Milk'],
38
- ['Bread', 'Diaper', 'Beer', 'Eggs'],
39
- ['Milk', 'Diaper', 'Beer', 'Coke'],
40
- ['Bread', 'Milk', 'Diaper', 'Beer'],
41
- ['Bread', 'Milk', 'Diaper', 'Coke']
42
- ]
43
-
44
-
45
- @pytest.fixture
46
- def random_transactions():
47
- """
48
- Fixture to generate a random dataset of transactions.
49
-
50
- Returns:
51
- list: A list of transactions with random items and varying lengths.
52
- """
53
- return [[random.choice(['A', 'B', 'C', 'D', 'E']) for _ in range(random.randint(2, 10))] for _ in range(100)]
54
-
55
-
56
- def test_empty_transactions():
57
- """
58
- Test the GSP algorithm with an empty dataset.
59
-
60
- Asserts:
61
- - A ValueError is raised indicating that the dataset is empty.
62
- """
63
- transactions = []
64
- with pytest.raises(ValueError, match="Input transactions are empty"):
65
- GSP(transactions)
66
-
67
-
68
- def test_single_transaction():
69
- """
70
- Test the GSP algorithm with a single transaction.
71
-
72
- Asserts:
73
- - A ValueError is raised indicating that GSP requires multiple transactions.
74
- """
75
- transactions = [['A', 'B', 'C']]
76
- with pytest.raises(ValueError, match="GSP requires multiple transactions"):
77
- GSP(transactions)
78
-
79
-
80
- def test_invalid_transaction_format():
81
- """
82
- Test the GSP algorithm with invalid transaction formats.
83
-
84
- Asserts:
85
- - A ValueError is raised indicating that the transactions must be lists of lists.
86
- """
87
- invalid_data = ["A", "B"] # Invalid format: not a list of lists
88
- with pytest.raises(ValueError, match="The dataset must be a list of transactions."):
89
- GSP(invalid_data)
90
-
91
-
92
- @pytest.mark.parametrize(
93
- "min_support, expected_error",
94
- [
95
- (-0.1, re.escape("Minimum support must be in the range (0.0, 1.0]")),
96
- (0.0, re.escape("Minimum support must be in the range (0.0, 1.0]")),
97
- (1.1, re.escape("Minimum support must be in the range (0.0, 1.0]")),
98
- ]
99
- )
100
- def test_invalid_min_support(supermarket_transactions, min_support, expected_error):
101
- """
102
- Test the GSP algorithm with invalid minimum support values.
103
-
104
- Asserts:
105
- - A ValueError is raised if the min_support is outside the valid range.
106
- """
107
- gsp = GSP(supermarket_transactions)
108
- with pytest.raises(ValueError, match=expected_error):
109
- gsp.search(min_support=min_support)
110
-
111
-
112
- def test_valid_min_support_edge(supermarket_transactions):
113
- """
114
- Test the GSP algorithm with a valid edge value for min_support.
115
-
116
- Asserts:
117
- - The algorithm runs successfully when min_support is set to 1.0.
118
- """
119
- gsp = GSP(supermarket_transactions)
120
- result = gsp.search(min_support=1.0) # Only patterns supported by ALL transactions should remain
121
- assert not result, "Expected no frequent patterns with min_support = 1.0"
122
-
123
-
124
- def test_min_support_valid(supermarket_transactions):
125
- """
126
- Test the GSP algorithm with a minimum support set just above 0.0.
127
-
128
- Asserts:
129
- - Frequent patterns are generated correctly for a low min_support threshold.
130
- """
131
- gsp = GSP(supermarket_transactions)
132
- result = gsp.search(min_support=0.2) # At least 1 transaction should support the pattern
133
-
134
- # All items should appear as 1-item patterns
135
- level_1_patterns = {('Bread',), ('Milk',), ('Diaper',), ('Beer',), ('Coke',), ('Eggs',)}
136
- result_level_1 = set(result[0].keys()) # Extract patterns from Level 1
137
-
138
- assert result_level_1 == level_1_patterns, f"Level 1 patterns mismatch. Got {result_level_1}"
139
-
140
-
141
- def test_no_frequent_items(supermarket_transactions):
142
- """
143
- Test the GSP algorithm with a high minimum support value.
144
-
145
- Asserts:
146
- - The result should be an empty list due to filtering out all items.
147
- """
148
- gsp = GSP(supermarket_transactions)
149
- result = gsp.search(min_support=0.9) # High minimum support
150
- assert not result, "High minimum support should filter out all items."
151
-
152
-
153
- def test_worker_batch_static_method(supermarket_transactions):
154
- """
155
- Test the _worker_batch method directly for checkpoint validation.
156
-
157
- Asserts:
158
- - Candidates below the minimum support are filtered out.
159
- - Candidates meeting the minimum support are returned with correct counts.
160
- """
161
- batch = [('Bread',), ('Milk',), ('Diaper',), ('Eggs',)] # 1-sequence candidates
162
- transactions = [tuple(t) for t in supermarket_transactions]
163
- min_support = 3 # Absolute support count
164
- expected = [(('Bread',), 4), (('Milk',), 4), (('Diaper',), 4)]
165
-
166
- # Call the '_worker_batch' method
167
- # This test accesses `_worker_batch` to test internal functionality
168
- results = GSP._worker_batch(batch, transactions, min_support) # pylint: disable=protected-access
169
- assert results == expected, f"Expected results {expected}, but got {results}"
170
-
171
-
172
- def test_frequent_patterns(supermarket_transactions):
173
- """
174
- Test the GSP algorithm with supermarket transactions and a realistic minimum support.
175
-
176
- Asserts:
177
- - The frequent patterns should match the expected result.
178
- """
179
- gsp = GSP(supermarket_transactions)
180
- result = gsp.search(min_support=0.3)
181
- expected = [
182
- {('Bread',): 4, ('Milk',): 4, ('Diaper',): 4, ('Beer',): 3, ('Coke',): 2},
183
- {('Bread', 'Milk'): 3, ('Milk', 'Diaper'): 3, ('Diaper', 'Beer'): 3},
184
- {('Bread', 'Milk', 'Diaper'): 2, ('Milk', 'Diaper', 'Beer'): 2}
185
- ]
186
- assert result == expected, "Frequent patterns do not match expected results."
187
-
188
-
189
- def test_random_transactions(random_transactions):
190
- """
191
- Test the GSP algorithm with a random dataset.
192
-
193
- Asserts:
194
- - The result should contain some frequent patterns with a low minimum support.
195
- """
196
- gsp = GSP(random_transactions)
197
- result = gsp.search(min_support=0.1) # Low support to ensure some patterns emerge
198
- assert len(result) > 0, "Random transactions should yield some frequent patterns with low min_support."
199
-
200
-
201
- def test_large_transactions():
202
- """
203
- Test the GSP algorithm with a large single transaction.
204
-
205
- Asserts:
206
- - A ValueError is raised indicating that GSP requires multiple transactions.
207
- """
208
- transactions = [['A'] * 1000] # Single transaction with 1000 identical items
209
- with pytest.raises(ValueError, match="GSP requires multiple transactions to find meaningful patterns."):
210
- GSP(transactions)
211
-
212
-
213
- def test_partial_match(supermarket_transactions):
214
- """
215
- Test the GSP algorithm with additional partial matches.
216
-
217
- Asserts:
218
- - Frequent patterns are generated correctly for the given transactions.
219
- """
220
- transactions = supermarket_transactions + [['Diaper', 'Milk']]
221
- gsp = GSP(transactions)
222
- result = gsp.search(min_support=0.3) # Adjusted minimum support to match more patterns
223
-
224
- # Debug output to inspect generated frequent patterns
225
- print("Generated frequent patterns:", result)
226
-
227
- # Check for the presence of valid frequent patterns
228
- expected_patterns_level_1 = {('Bread',), ('Milk',), ('Diaper',), ('Beer',)}
229
- expected_patterns_level_2 = {('Bread', 'Milk'), ('Milk', 'Diaper'), ('Diaper', 'Beer')}
230
-
231
- # Convert results to sets for easier comparison
232
- result_level_1 = set(result[0].keys())
233
- assert result_level_1 >= expected_patterns_level_1, f"Level 1 patterns mismatch. Got {result_level_1}"
234
-
235
- # Add a condition to avoid IndexError for empty results
236
- if len(result) > 1:
237
- result_level_2 = set(result[1].keys())
238
- assert result_level_2 >= expected_patterns_level_2, f"Level 2 patterns mismatch. Got {result_level_2}"
239
-
240
-
241
- @pytest.mark.parametrize("min_support", [0.1, 0.2, 0.3, 0.4, 0.5])
242
- def test_benchmark(benchmark, supermarket_transactions, min_support):
243
- """
244
- Benchmark the GSP algorithm's performance using the supermarket dataset.
245
-
246
- Uses:
247
- pytest-benchmark: To measure execution time.
248
- """
249
- gsp = GSP(supermarket_transactions)
250
- benchmark(gsp.search, min_support=min_support)
gsppy/tests/test_utils.py DELETED
@@ -1,91 +0,0 @@
1
- """
2
- Test suite for utility functions in the utils module.
3
-
4
- This module tests the following functions:
5
- 1. `split_into_batches`: Ensures a list of items is properly split into smaller batches for efficient processing.
6
- 2. `is_subsequence_in_list`: Validates the detection of subsequences within a given list.
7
- 3. `generate_joined_candidates`: Tests the logic for generating candidate sequences by joining frequent patterns.
8
-
9
- Each function is tested for standard cases, edge cases, and error handling to ensure robustness.
10
- """
11
- from gsppy.utils import split_into_batches, is_subsequence_in_list, generate_candidates_from_previous
12
-
13
-
14
- def test_split_into_batches():
15
- """
16
- Test the `split_into_batches` utility function.
17
- """
18
- # Test with exact batches
19
- items = [(1,), (2,), (3,), (4,), (5,)]
20
- batch_size = 2
21
- result = list(split_into_batches(items, batch_size))
22
- assert result == [[(1,), (2,)], [(3,), (4,)], [(5,)]], "Failed exact batch split"
23
-
24
- # Test with a batch size greater than the number of items
25
- batch_size = 10
26
- result = list(split_into_batches(items, batch_size))
27
- assert result == [items], "Failed large batch size handling"
28
-
29
- # Test with batch size of 1
30
- batch_size = 1
31
- result = list(split_into_batches(items, batch_size))
32
- assert result == [[(1,)], [(2,)], [(3,)], [(4,)], [(5,)]], "Failed batch size of 1"
33
-
34
- # Test empty input
35
- items = []
36
- batch_size = 3
37
- result = list(split_into_batches(items, batch_size))
38
- assert not result, "Failed empty input"
39
-
40
-
41
- def test_is_subsequence_in_list():
42
- """
43
- Test the `is_subsequence_in_list` utility function.
44
- """
45
- # Test when the subsequence is present
46
- assert is_subsequence_in_list((1, 2), (0, 1, 2, 3)), "Failed to find subsequence"
47
- assert is_subsequence_in_list((3,), (0, 1, 2, 3)), "Failed single-element subsequence"
48
-
49
- # Test when the subsequence is not present
50
- assert not is_subsequence_in_list((1, 3), (0, 1, 2, 3)), "Incorrectly found non-contiguous subsequence"
51
- assert not is_subsequence_in_list((4,), (0, 1, 2, 3)), "Incorrectly found non-existent subsequence"
52
-
53
- # Test when input sequence or subsequence is empty
54
- assert not is_subsequence_in_list((), (0, 1, 2, 3)), "Incorrect positive result for empty subsequence"
55
- assert not is_subsequence_in_list((1,), ()), "Incorrect positive result for empty sequence"
56
-
57
- # Test when subsequence length exceeds sequence
58
- assert not is_subsequence_in_list((1, 2, 3, 4), (1, 2, 3)), "Failed to reject long subsequence"
59
-
60
-
61
- def test_generate_candidates_from_previous():
62
- """
63
- Test the `generate_candidates_from_previous` utility function.
64
- """
65
- # Test if candidates are generated correctly
66
- prev_patterns = {
67
- (1, 2): 3,
68
- (2, 3): 4,
69
- (3, 4): 5,
70
- (1, 3): 2 # Non-joinable with others as a k-1 match
71
- }
72
- result = set(generate_candidates_from_previous(prev_patterns))
73
-
74
- # Expected candidates: joining (1, 2) with (2, 3) and (2, 3) with (3, 4)
75
- expected = {(1, 2, 3), (2, 3, 4)}
76
- assert expected.issubset(result), f"Missing expected candidates. Got {result}, expected at least {expected}"
77
-
78
- # Test with no joinable patterns
79
- prev_patterns = {
80
- (1,): 3,
81
- (2,): 4
82
- }
83
- result = set(generate_candidates_from_previous(prev_patterns))
84
-
85
- # For single-element disjoint patterns, candidates may still be generated but GSP will filter later
86
- assert result == {(1, 2), (2, 1)}, f"Unexpected disjoint candidates. Got {result}"
87
-
88
- # Test with empty patterns
89
- prev_patterns = {}
90
- result = set(generate_candidates_from_previous(prev_patterns))
91
- assert result == set(), f"Failed empty input handling. Got {result}"
@@ -1,14 +0,0 @@
1
- gsppy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- gsppy/cli.py,sha256=3G20xK79LeIq3jA_UnajEjcuVAebBuZwzefNRnW1o9Q,6090
3
- gsppy/gsp.py,sha256=wTsVPziYqYuGP0tkeUBwV-nIo1OF83tzv7WF_VVAbZM,13641
4
- gsppy/utils.py,sha256=K-oIwE6XDi-dsIlFzBlVPa1f2A_2DrUZW4hMiThRPAo,3350
5
- gsppy/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- gsppy/tests/test_cli.py,sha256=HpzCiESyIA_wsCQh7NMlHbT4k3GQ72TU9J7rEdllP5I,11425
7
- gsppy/tests/test_gsp.py,sha256=SnWw4hlp-F453zJGnWUHW3A9iqhPyUTYHrmGPH5fTm4,8794
8
- gsppy/tests/test_utils.py,sha256=Z27IefPYSVKg-dGOmnUO9tvAcd5OQMDyKVq3HAy3XtQ,3697
9
- gsppy-2.1.0.dist-info/LICENSE,sha256=co1jy5VZd1wXOPdUC2uk1hn7zsBm6aJNgVmhPOZ47g8,1086
10
- gsppy-2.1.0.dist-info/METADATA,sha256=icNi2oENwovji0hXCbYq0jMc-FmgzQGGGipOw1LncnY,10028
11
- gsppy-2.1.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
12
- gsppy-2.1.0.dist-info/entry_points.txt,sha256=smvmcIWk424ARIGKOC_BM42hpT_SptKPcIeqs-8u8lM,41
13
- gsppy-2.1.0.dist-info/top_level.txt,sha256=sovAgdiFF0V3Dz2pPAwAdIkHeR-ShUchyrH3q8qU120,6
14
- gsppy-2.1.0.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- gsppy