gsppy 3.3.0__py3-none-any.whl → 3.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gsppy/__init__.py CHANGED
@@ -6,7 +6,12 @@ implementation, CLI helpers for loading transactional data, and the package vers
6
6
 
7
7
  from importlib import metadata as importlib_metadata
8
8
 
9
- from gsppy.cli import detect_and_read_file, read_transactions_from_csv, read_transactions_from_json, setup_logging
9
+ from gsppy.cli import (
10
+ setup_logging,
11
+ detect_and_read_file,
12
+ read_transactions_from_csv,
13
+ read_transactions_from_json,
14
+ )
10
15
  from gsppy.gsp import GSP
11
16
 
12
17
  try:
gsppy/accelerate.py CHANGED
@@ -28,11 +28,14 @@ try: # pragma: no cover - optional dependency path
28
28
  cp = cast(Any, _cp_mod)
29
29
 
30
30
  try:
31
- _gpu_available = cp.cuda.runtime.getDeviceCount() > 0 # type: ignore[attr-defined]
31
+ if cp is not None:
32
+ _gpu_available = cp.cuda.runtime.getDeviceCount() > 0
33
+ else:
34
+ _gpu_available = False
32
35
  except Exception:
33
36
  _gpu_available = False
34
37
  except Exception: # pragma: no cover - optional dependency path
35
- cp = None # type: ignore[assignment]
38
+ cp = None
36
39
  _gpu_available = False
37
40
 
38
41
  # Simple per-process cache for encoded transactions keyed by the list object's id
gsppy/cli.py CHANGED
@@ -33,49 +33,123 @@ import csv
33
33
  import sys
34
34
  import json
35
35
  import logging
36
- from typing import Dict, List, Tuple
36
+ from typing import Any, Dict, List, Tuple, Union, Optional, cast
37
37
 
38
38
  import click
39
39
 
40
40
  from gsppy.gsp import GSP
41
-
42
- # Configure logging
43
- logging.basicConfig(
44
- level=logging.INFO,
45
- format="%(message)s", # Simplified to keep CLI output clean
46
- handlers=[logging.StreamHandler(sys.stdout)],
47
- )
48
- logger: logging.Logger = logging.getLogger(__name__)
41
+ from gsppy.utils import has_timestamps
49
42
 
50
43
 
51
44
  def setup_logging(verbose: bool) -> None:
52
45
  """
53
- Set the logging level based on the verbosity of the CLI output.
54
- :param verbose: Whether to enable verbose logging.
46
+ Configure logging with standardized format based on verbosity level.
47
+
48
+ When verbose is enabled, provides detailed structured logging with:
49
+ - Timestamps (ISO 8601 format)
50
+ - Log levels
51
+ - Process ID for traceability
52
+ - Module context
53
+
54
+ When verbose is disabled, uses simple format with just the message.
55
+
56
+ Parameters:
57
+ verbose: Whether to enable verbose logging with detailed formatting.
55
58
  """
59
+ # Remove any existing handlers
60
+ root_logger = logging.getLogger()
61
+ for handler in root_logger.handlers[:]:
62
+ root_logger.removeHandler(handler)
63
+
56
64
  if verbose:
57
- logger.setLevel(logging.DEBUG)
65
+ # Detailed format with timestamps, levels, PID, and context for verbose mode
66
+ log_format = "%(asctime)s | %(levelname)-8s | PID:%(process)d | %(name)s | %(message)s"
67
+ date_format = "%Y-%m-%dT%H:%M:%S"
68
+ log_level = logging.DEBUG
58
69
  else:
59
- logger.setLevel(logging.INFO)
70
+ # Simple format for default mode - just the message
71
+ log_format = "%(message)s"
72
+ date_format = None
73
+ log_level = logging.INFO
74
+
75
+ # Configure logging with the appropriate format
76
+ logging.basicConfig(
77
+ level=log_level,
78
+ format=log_format,
79
+ datefmt=date_format,
80
+ handlers=[logging.StreamHandler(sys.stdout)],
81
+ force=True, # Force reconfiguration even if already configured
82
+ )
83
+
84
+
85
+ logger: logging.Logger = logging.getLogger(__name__)
60
86
 
61
87
 
62
- def read_transactions_from_json(file_path: str) -> List[List[str]]:
88
+ def read_transactions_from_json(file_path: str) -> Union[List[List[str]], List[List[Tuple[str, float]]]]:
63
89
  """
64
90
  Read transactions from a JSON file.
65
91
 
92
+ Supports both simple transactions and timestamped transactions:
93
+ - Simple: [["A", "B", "C"], ["D", "E"]]
94
+ - Timestamped: [[["A", 1], ["B", 3]], [["D", 2], ["E", 5]]]
95
+ where the first element is the item and the second element is the timestamp
96
+
66
97
  Parameters:
67
98
  file_path (str): Path to the file containing transactions.
68
99
 
69
100
  Returns:
70
- List[List]: Parsed transactions from the file.
101
+ Union[List[List[str]], List[List[Tuple[str, float]]]]:
102
+ Parsed transactions from the file. For timestamped data,
103
+ inner lists are converted to tuples (item, timestamp).
71
104
 
72
105
  Raises:
73
106
  ValueError: If the file cannot be read or does not contain valid JSON.
74
107
  """
75
108
  try:
76
109
  with open(file_path, "r", encoding="utf-8") as f:
77
- transactions: List[List[str]] = json.load(f)
78
- return transactions
110
+ raw_data: Any = json.load(f)
111
+
112
+ if not isinstance(raw_data, list):
113
+ raise ValueError("JSON must contain a top-level list of transactions.")
114
+
115
+ raw_transactions: List[List[Union[str, Tuple[str, float]]]] = cast(
116
+ List[List[Union[str, Tuple[str, float]]]], raw_data
117
+ )
118
+
119
+ # Check if this is timestamped data using the helper function.
120
+ # Use defensive checks to avoid errors on malformed data:
121
+ # - Find the first non-empty transaction instead of assuming index 0 is non-empty.
122
+ # - Normalize inner list pairs (from json.load) to tuples before calling has_timestamps.
123
+ first_non_empty_transaction: Optional[List[Union[str, Tuple[str, float]]]] = next(
124
+ (transaction for transaction in raw_transactions if transaction),
125
+ None,
126
+ )
127
+
128
+ is_timestamped = False
129
+ if first_non_empty_transaction is not None:
130
+ # Normalize to the exact input type expected by has_timestamps
131
+ normalized_first: List[Union[str, Tuple[str, float]]] = []
132
+ for item in first_non_empty_transaction:
133
+ if isinstance(item, list) and len(item) == 2:
134
+ normalized_first.append((str(item[0]), float(item[1])))
135
+ elif isinstance(item, tuple):
136
+ normalized_first.append(cast(Tuple[str, float], item))
137
+ else:
138
+ normalized_first.append(str(item))
139
+
140
+ is_timestamped = has_timestamps(normalized_first)
141
+
142
+ if is_timestamped:
143
+ # Convert timestamped data: [[["A", 1], ["B", 2]]] -> [[("A", 1), ("B", 2)]]
144
+ transactions: List[List[Tuple[str, float]]] = [
145
+ [cast(Tuple[str, float], tuple(item) if isinstance(item, list) else item) for item in transaction]
146
+ for transaction in raw_transactions
147
+ ]
148
+ return transactions
149
+
150
+ # Simple transactions remain as-is (or invalid data passed through for GSP to validate)
151
+ simple_transactions: List[List[str]] = [[str(item) for item in transaction] for transaction in raw_transactions]
152
+ return simple_transactions
79
153
  except Exception as e:
80
154
  msg = f"Error reading transaction data from JSON file '{file_path}': {e}"
81
155
  logging.error(msg)
@@ -112,7 +186,7 @@ def read_transactions_from_csv(file_path: str) -> List[List[str]]:
112
186
  raise ValueError(msg) from e
113
187
 
114
188
 
115
- def detect_and_read_file(file_path: str) -> List[List[str]]:
189
+ def detect_and_read_file(file_path: str) -> Union[List[List[str]], List[List[Tuple[str, float]]]]:
116
190
  """
117
191
  Detect file format (CSV or JSON) and read transactions.
118
192
 
@@ -120,7 +194,8 @@ def detect_and_read_file(file_path: str) -> List[List[str]]:
120
194
  file_path (str): Path to the file containing transactions.
121
195
 
122
196
  Returns:
123
- List[List]: Parsed transactions from the file.
197
+ Union[List[List[str]], List[List[Tuple[str, float]]]]:
198
+ Parsed transactions from the file.
124
199
 
125
200
  Raises:
126
201
  ValueError: If the file format is unsupported or reading fails.
@@ -163,10 +238,53 @@ def detect_and_read_file(file_path: str) -> List[List[str]]:
163
238
  show_default=True,
164
239
  help="Backend to use for support counting.",
165
240
  )
241
+ @click.option(
242
+ "--mingap",
243
+ type=float,
244
+ default=None,
245
+ help="Minimum time gap required between consecutive items in patterns (requires timestamped transactions).",
246
+ )
247
+ @click.option(
248
+ "--maxgap",
249
+ type=float,
250
+ default=None,
251
+ help="Maximum time gap allowed between consecutive items in patterns (requires timestamped transactions).",
252
+ )
253
+ @click.option(
254
+ "--maxspan",
255
+ type=float,
256
+ default=None,
257
+ help="Maximum time span from first to last item in patterns (requires timestamped transactions).",
258
+ )
166
259
  @click.option("--verbose", is_flag=True, help="Enable verbose output for debugging purposes.")
167
- def main(file_path: str, min_support: float, backend: str, verbose: bool) -> None:
260
+ def main(
261
+ file_path: str,
262
+ min_support: float,
263
+ backend: str,
264
+ mingap: Optional[float],
265
+ maxgap: Optional[float],
266
+ maxspan: Optional[float],
267
+ verbose: bool,
268
+ ) -> None:
168
269
  """
169
270
  Run the GSP algorithm on transactional data from a file.
271
+
272
+ Supports both simple transactions (items only) and timestamped transactions
273
+ (item-timestamp pairs) for temporal pattern mining.
274
+
275
+ Examples:
276
+ Basic usage without temporal constraints:
277
+
278
+ ```bash
279
+ gsppy --file transactions.json --min_support 0.3
280
+ ```
281
+
282
+ With temporal constraints:
283
+
284
+ ```bash
285
+ gsppy --file temporal_data.json --min_support 0.3 --maxgap 10
286
+ gsppy --file events.json --min_support 0.5 --mingap 2 --maxgap 10 --maxspan 20
287
+ ```
170
288
  """
171
289
  setup_logging(verbose)
172
290
 
@@ -177,10 +295,8 @@ def main(file_path: str, min_support: float, backend: str, verbose: bool) -> Non
177
295
  logger.error(f"Error: {e}")
178
296
  sys.exit(1)
179
297
 
180
- # Check min_support
181
- if min_support <= 0.0 or min_support > 1.0:
182
- logger.error("Error: min_support must be in the range (0.0, 1.0].")
183
- sys.exit(1)
298
+ # Validate parameters
299
+ _validate_parameters(min_support, mingap, maxgap, maxspan)
184
300
 
185
301
  # Select backend for acceleration layer
186
302
  if backend and backend.lower() != "auto":
@@ -188,7 +304,7 @@ def main(file_path: str, min_support: float, backend: str, verbose: bool) -> Non
188
304
 
189
305
  # Initialize and run GSP algorithm
190
306
  try:
191
- gsp = GSP(transactions)
307
+ gsp = GSP(transactions, mingap=mingap, maxgap=maxgap, maxspan=maxspan, verbose=verbose)
192
308
  patterns: List[Dict[Tuple[str, ...], int]] = gsp.search(min_support=min_support)
193
309
  logger.info("Frequent Patterns Found:")
194
310
  for i, level in enumerate(patterns, start=1):
@@ -200,5 +316,43 @@ def main(file_path: str, min_support: float, backend: str, verbose: bool) -> Non
200
316
  sys.exit(1)
201
317
 
202
318
 
319
+ def _validate_parameters(
320
+ min_support: float,
321
+ mingap: Optional[float],
322
+ maxgap: Optional[float],
323
+ maxspan: Optional[float],
324
+ ) -> None:
325
+ """
326
+ Validate input parameters for GSP algorithm.
327
+
328
+ Args:
329
+ min_support: Minimum support threshold
330
+ mingap: Minimum time gap constraint
331
+ maxgap: Maximum time gap constraint
332
+ maxspan: Maximum time span constraint
333
+
334
+ Raises:
335
+ SystemExit: If validation fails
336
+ """
337
+ # Check min_support
338
+ if min_support <= 0.0 or min_support > 1.0:
339
+ logger.error("Error: min_support must be in the range (0.0, 1.0].")
340
+ sys.exit(1)
341
+
342
+ # Validate temporal constraints
343
+ if mingap is not None and mingap < 0:
344
+ logger.error("Error: mingap must be non-negative.")
345
+ sys.exit(1)
346
+ if maxgap is not None and maxgap < 0:
347
+ logger.error("Error: maxgap must be non-negative.")
348
+ sys.exit(1)
349
+ if maxspan is not None and maxspan < 0:
350
+ logger.error("Error: maxspan must be non-negative.")
351
+ sys.exit(1)
352
+ if mingap is not None and maxgap is not None and mingap > maxgap:
353
+ logger.error("Error: mingap cannot be greater than maxgap.")
354
+ sys.exit(1)
355
+
356
+
203
357
  if __name__ == "__main__":
204
358
  main()
gsppy/gsp.py CHANGED
@@ -88,11 +88,17 @@ Version:
88
88
  import math
89
89
  import logging
90
90
  import multiprocessing as mp
91
- from typing import Dict, List, Tuple, Optional
91
+ from typing import Dict, List, Tuple, Union, Optional, cast
92
92
  from itertools import chain
93
93
  from collections import Counter
94
94
 
95
- from gsppy.utils import split_into_batches, is_subsequence_in_list, generate_candidates_from_previous
95
+ from gsppy.utils import (
96
+ has_timestamps,
97
+ split_into_batches,
98
+ is_subsequence_in_list,
99
+ generate_candidates_from_previous,
100
+ is_subsequence_in_list_with_time_constraints,
101
+ )
96
102
  from gsppy.accelerate import support_counts as support_counts_accel
97
103
 
98
104
  logger: logging.Logger = logging.getLogger(__name__)
@@ -117,41 +123,98 @@ class GSP:
117
123
  k-sequence for pattern generation.
118
124
  """
119
125
 
120
- def __init__(self, raw_transactions: List[List[str]]):
126
+ def __init__(
127
+ self,
128
+ raw_transactions: Union[List[List[str]], List[List[Tuple[str, float]]]],
129
+ mingap: Optional[float] = None,
130
+ maxgap: Optional[float] = None,
131
+ maxspan: Optional[float] = None,
132
+ verbose: bool = False,
133
+ ):
121
134
  """
122
135
  Initialize the GSP algorithm with raw transactional data.
123
136
 
124
137
  Parameters:
125
- raw_transactions (List[List]): Input transaction dataset where each transaction
126
- is a list of items (e.g., [['A', 'B'], ['B', 'C', 'D']]).
138
+ raw_transactions (Union[List[List[str]], List[List[Tuple[str, float]]]]):
139
+ Input transaction dataset where each transaction is either:
140
+ - A list of items (e.g., [['A', 'B'], ['B', 'C', 'D']])
141
+ - A list of (item, timestamp) tuples (e.g., [[('A', 1.0), ('B', 2.0)]])
142
+ mingap (Optional[float]): Minimum time gap required between consecutive items in patterns.
143
+ maxgap (Optional[float]): Maximum time gap allowed between consecutive items in patterns.
144
+ maxspan (Optional[float]): Maximum time span from first to last item in patterns.
145
+ verbose (bool): Enable verbose logging output with detailed progress information.
146
+ Default is False (minimal output).
127
147
 
128
148
  Attributes Initialized:
129
149
  - Processes the input raw transaction dataset.
130
150
  - Computes unique singleton candidates (`unique_candidates`).
131
151
  - Extracts the maximum transaction size (`max_size`) from the dataset for limiting
132
152
  the search space.
153
+ - Stores temporal constraints for use during pattern mining.
133
154
 
134
155
  Raises:
135
156
  ValueError: If the input transaction dataset is empty, contains
136
157
  fewer than two transactions, or is not properly formatted.
158
+ Also raised if temporal constraints are invalid.
137
159
  """
138
160
  self.freq_patterns: List[Dict[Tuple[str, ...], int]] = []
161
+ self.mingap = mingap
162
+ self.maxgap = maxgap
163
+ self.maxspan = maxspan
164
+ self.verbose = verbose
165
+ self._configure_logging()
166
+ self._validate_temporal_constraints()
139
167
  self._pre_processing(raw_transactions)
140
168
 
141
- def _pre_processing(self, raw_transactions: List[List[str]]) -> None:
169
+ def _configure_logging(self) -> None:
170
+ """
171
+ Configure logging for the GSP instance based on verbosity setting.
172
+
173
+ When verbose is True, sets the module logger to DEBUG level for detailed output.
174
+ When verbose is False, sets the module logger to WARNING level for minimal output.
175
+
176
+ This method intentionally avoids modifying the root logger to prevent
177
+ unexpected global logging side effects, especially in multiprocessing
178
+ environments.
179
+ """
180
+ if self.verbose:
181
+ logger.setLevel(logging.DEBUG)
182
+ else:
183
+ logger.setLevel(logging.WARNING)
184
+
185
+ def _validate_temporal_constraints(self) -> None:
186
+ """
187
+ Validate temporal constraint parameters.
188
+
189
+ Raises:
190
+ ValueError: If any temporal constraint is negative or if mingap > maxgap.
191
+ """
192
+ if self.mingap is not None and self.mingap < 0:
193
+ raise ValueError("mingap must be non-negative")
194
+ if self.maxgap is not None and self.maxgap < 0:
195
+ raise ValueError("maxgap must be non-negative")
196
+ if self.maxspan is not None and self.maxspan < 0:
197
+ raise ValueError("maxspan must be non-negative")
198
+ if self.mingap is not None and self.maxgap is not None and self.mingap > self.maxgap:
199
+ raise ValueError("mingap cannot be greater than maxgap")
200
+
201
+ def _pre_processing(self, raw_transactions: Union[List[List[str]], List[List[Tuple[str, float]]]]) -> None:
142
202
  """
143
203
  Validate and preprocess the input transactional dataset.
144
204
 
145
205
  This method ensures that the dataset is formatted correctly and converts the transactions
146
206
  into tuples while counting unique singleton candidates for initial support computation steps.
207
+ It handles both simple transactions (items only) and timestamped transactions.
147
208
 
148
209
  Parameters:
149
- raw_transactions (List[List]): Input transactional data.
210
+ raw_transactions (Union[List[List[str]], List[List[Tuple[str, float]]]]):
211
+ Input transactional data (with or without timestamps).
150
212
 
151
213
  Attributes Set:
152
214
  - `transactions`: The preprocessed transactions converted to tuples.
153
215
  - `unique_candidates`: A list of unique singleton candidates derived from the dataset.
154
216
  - `max_size`: The length of the largest transaction in the data.
217
+ - `has_timestamps`: Boolean indicating if transactions include timestamps.
155
218
 
156
219
  Raises:
157
220
  ValueError: If the dataset is empty, improperly formatted, or contains fewer than 2 transactions.
@@ -171,28 +234,71 @@ class GSP:
171
234
  raise ValueError(msg)
172
235
 
173
236
  logger.info("Pre-processing transactions...")
237
+
238
+ # Detect if transactions have timestamps by checking non-empty transactions
239
+ self.has_timestamps = False
240
+ for tx in raw_transactions:
241
+ if tx: # Check non-empty transactions
242
+ tx_sequence = cast(List[Union[str, Tuple[str, float]]], tx)
243
+ self.has_timestamps = has_timestamps(tx_sequence)
244
+ if self.has_timestamps:
245
+ logger.debug("Detected timestamped transactions")
246
+ break
247
+
248
+ # Validate temporal constraints are only used with timestamps
249
+ if (self.mingap is not None or self.maxgap is not None or self.maxspan is not None) and not self.has_timestamps:
250
+ logger.warning(
251
+ "Temporal constraints specified but transactions do not have timestamps. "
252
+ "Constraints will be ignored."
253
+ )
254
+ # Clear temporal constraints since they cannot be applied
255
+ self.mingap = None
256
+ self.maxgap = None
257
+ self.maxspan = None
258
+
174
259
  self.max_size: int = max(len(item) for item in raw_transactions)
175
- self.transactions: List[Tuple[str, ...]] = [tuple(transaction) for transaction in raw_transactions]
176
- counts: Counter[str] = Counter(chain.from_iterable(raw_transactions))
260
+
261
+ if self.has_timestamps:
262
+ # For timestamped transactions, convert to tuples and extract items for counting
263
+ timestamped_txs = cast(List[List[Tuple[str, float]]], raw_transactions)
264
+ self.transactions = [tuple(transaction) for transaction in timestamped_txs]
265
+ # Extract just the items for counting unique candidates
266
+ all_items = chain.from_iterable([[item for item, _ in tx] for tx in timestamped_txs])
267
+ counts: Counter[str] = Counter(all_items)
268
+ else:
269
+ # For non-timestamped transactions, process as before
270
+ simple_txs = cast(List[List[str]], raw_transactions)
271
+ self.transactions = [tuple(transaction) for transaction in simple_txs]
272
+ counts: Counter[str] = Counter(chain.from_iterable(simple_txs))
273
+
177
274
  # Start with singleton candidates (1-sequences)
178
275
  self.unique_candidates: List[Tuple[str, ...]] = [(item,) for item in counts.keys()]
179
276
  logger.debug("Unique candidates: %s", self.unique_candidates)
180
277
 
181
278
  @staticmethod
182
279
  def _worker_batch(
183
- batch: List[Tuple[str, ...]], transactions: List[Tuple[str, ...]], min_support: int
280
+ batch: List[Tuple[str, ...]],
281
+ transactions: List[Union[Tuple[str, ...], Tuple[Tuple[str, float], ...]]],
282
+ min_support: int,
283
+ mingap: Optional[float] = None,
284
+ maxgap: Optional[float] = None,
285
+ maxspan: Optional[float] = None,
184
286
  ) -> List[Tuple[Tuple[str, ...], int]]:
185
287
  """
186
288
  Evaluate a batch of candidate sequences to compute their support.
187
289
 
188
290
  This method iterates over the candidates in the given batch and checks their frequency
189
291
  of appearance across all transactions. Candidates meeting the user-defined minimum
190
- support threshold are returned.
292
+ support threshold are returned. Supports temporal constraints when timestamps are present.
191
293
 
192
294
  Parameters:
193
295
  batch (List[Tuple]): A batch of candidate sequences, where each sequence is represented as a tuple.
194
- transactions (List[Tuple]): Preprocessed transactions as tuples.
296
+ transactions (List[Union[Tuple[str, ...], Tuple[Tuple[str, float], ...]]]):
297
+ Preprocessed transactions as tuples (with or without timestamps).
195
298
  min_support (int): Absolute minimum support count required for a candidate to be considered frequent.
299
+ mingap (Optional[float]): Minimum time gap between consecutive items.
300
+ maxgap (Optional[float]): Maximum time gap between consecutive items.
301
+ maxspan (Optional[float]): Maximum time span from first to last item.
196
302
 
197
303
  Returns:
198
304
  List[Tuple[Tuple, int]]: A list of tuples where each tuple contains:
@@ -200,8 +306,27 @@ class GSP:
200
306
  - The candidate's support count.
201
307
  """
202
308
  results: List[Tuple[Tuple[str, ...], int]] = []
309
+ has_temporal = mingap is not None or maxgap is not None or maxspan is not None
310
+
311
+ # Detect if transactions have timestamps using the helper function,
312
+ # based on the first non-empty transaction in the batch.
313
+ first_non_empty_tx = next((t for t in transactions if t), None)
314
+ has_timestamps_flag = bool(first_non_empty_tx and has_timestamps(first_non_empty_tx))
315
+
203
316
  for item in batch:
204
- frequency = sum(1 for t in transactions if is_subsequence_in_list(item, t))
317
+ if has_timestamps_flag or has_temporal:
318
+ # Use temporal-aware checking for timestamped transactions
319
+ frequency = sum(
320
+ 1
321
+ for t in transactions
322
+ if is_subsequence_in_list_with_time_constraints(
323
+ item, t, mingap=mingap, maxgap=maxgap, maxspan=maxspan
324
+ )
325
+ )
326
+ else:
327
+ # Use standard non-temporal checking for simple transactions
328
+ frequency = sum(1 for t in transactions if is_subsequence_in_list(item, t))
329
+
205
330
  if frequency >= min_support:
206
331
  results.append((item, frequency))
207
332
  return results
@@ -228,7 +353,7 @@ class GSP:
228
353
  with mp.Pool(processes=mp.cpu_count()) as pool:
229
354
  batch_results = pool.starmap(
230
355
  self._worker_batch, # Process a batch at a time
231
- [(batch, self.transactions, min_support) for batch in batches],
356
+ [(batch, self.transactions, min_support, self.mingap, self.maxgap, self.maxspan) for batch in batches],
232
357
  )
233
358
 
234
359
  # Flatten the list of results and convert to a dictionary
@@ -245,9 +370,21 @@ class GSP:
245
370
  Calculate support counts for candidate sequences using the fastest available backend.
246
371
  This will try the Rust extension if available (and configured), otherwise fall back to
247
372
  the Python multiprocessing implementation.
373
+
374
+ Note: When temporal constraints are active or transactions have timestamps,
375
+ the Python implementation is always used as the accelerated backends do not yet
376
+ support temporal constraints or timestamped transactions.
248
377
  """
378
+ # Use Python implementation when temporal constraints are active or timestamps present
379
+ has_temporal = self.mingap is not None or self.maxgap is not None or self.maxspan is not None
380
+ if has_temporal or self.has_timestamps:
381
+ return self._support_python(items, min_support, batch_size)
382
+
383
+ # For non-timestamped transactions, we can use accelerated support counting
384
+ # Cast is safe here because we've confirmed no timestamps above
385
+ non_timestamped_transactions = cast(List[Tuple[str, ...]], self.transactions)
249
386
  try:
250
- return support_counts_accel(self.transactions, items, min_support, batch_size, backend=backend)
387
+ return support_counts_accel(non_timestamped_transactions, items, min_support, batch_size, backend=backend)
251
388
  except Exception:
252
389
  # Fallback to Python implementation on any acceleration failure
253
390
  return self._support_python(items, min_support, batch_size)
@@ -270,6 +407,7 @@ class GSP:
270
407
  min_support: float = 0.2,
271
408
  max_k: Optional[int] = None,
272
409
  backend: Optional[str] = None,
410
+ verbose: Optional[bool] = None,
273
411
  ) -> List[Dict[Tuple[str, ...], int]]:
274
412
  """
275
413
  Execute the Generalized Sequential Pattern (GSP) mining algorithm.
@@ -278,10 +416,19 @@ class GSP:
278
416
  in the input transaction dataset. Patterns are extracted iteratively at each k-sequence level,
279
417
  starting from singleton sequences, until no further frequent patterns can be found.
280
418
 
419
+ When temporal constraints (mingap, maxgap, maxspan) are specified during initialization,
420
+ the algorithm enforces these constraints during pattern matching, allowing for time-aware
421
+ sequential pattern mining.
422
+
281
423
  Parameters:
282
424
  min_support (float): Minimum support threshold as a fraction of total transactions.
283
425
  For example, `0.3` means that a sequence is frequent if it
284
426
  appears in at least 30% of all transactions.
427
+ max_k (Optional[int]): Maximum length of patterns to mine. If None, mines up to max transaction length.
428
+ backend (Optional[str]): Backend to use for support counting ('auto', 'python', 'rust', 'gpu').
429
+ Note: temporal constraints always use Python backend.
430
+ verbose (Optional[bool]): Override instance verbosity setting for this search.
431
+ If None, uses the instance's verbose setting.
285
432
 
286
433
  Returns:
287
434
  List[Dict[Tuple[str, ...], int]]: A list of dictionaries containing frequent patterns
@@ -296,8 +443,8 @@ class GSP:
296
443
  and completion.
297
444
  - Status updates for each iteration until the algorithm terminates.
298
445
 
299
- Example:
300
- Basic usage with the default backend:
446
+ Examples:
447
+ Basic usage without temporal constraints:
301
448
 
302
449
  ```python
303
450
  from gsppy.gsp import GSP
@@ -311,11 +458,41 @@ class GSP:
311
458
  gsp = GSP(transactions)
312
459
  patterns = gsp.search(min_support=0.3)
313
460
  ```
461
+
462
+ Usage with temporal constraints (requires timestamped transactions):
463
+
464
+ ```python
465
+ from gsppy.gsp import GSP
466
+
467
+ # Transactions with timestamps (item, timestamp) pairs
468
+ # where timestamps can be in any unit (seconds, minutes, hours, days, etc.)
469
+ timestamped_transactions = [
470
+ [("A", 1), ("B", 3), ("C", 5)], # timestamps: 1, 3, 5
471
+ [("A", 2), ("B", 10), ("C", 12)], # timestamps: 2, 10, 12
472
+ [("A", 1), ("C", 4)], # timestamps: 1, 4
473
+ ]
474
+
475
+ # Find patterns with maxgap of 5 time units between consecutive items
476
+ gsp = GSP(timestamped_transactions, maxgap=5)
477
+ patterns = gsp.search(min_support=0.5)
478
+ # Pattern ("A", "B", "C") won't be found in transaction 2
479
+ # because gap between A and B is 8 (exceeds maxgap=5)
480
+ ```
314
481
  """
482
+ # Override verbosity if specified for this search
483
+ original_verbose = self.verbose
484
+ if verbose is not None:
485
+ self.verbose = verbose
486
+ self._configure_logging()
487
+
315
488
  if not 0.0 < min_support <= 1.0:
316
489
  raise ValueError("Minimum support must be in the range (0.0, 1.0]")
317
490
 
318
491
  logger.info(f"Starting GSP algorithm with min_support={min_support}...")
492
+ if self.mingap is not None or self.maxgap is not None or self.maxspan is not None:
493
+ logger.info(
494
+ f"Using temporal constraints: mingap={self.mingap}, maxgap={self.maxgap}, maxspan={self.maxspan}"
495
+ )
319
496
 
320
497
  # Convert fractional support to absolute count (ceil to preserve threshold semantics)
321
498
  abs_min_support = int(math.ceil(len(self.transactions) * float(min_support)))
@@ -352,4 +529,10 @@ class GSP:
352
529
 
353
530
  self._print_status(k_items, candidates)
354
531
  logger.info("GSP algorithm completed.")
532
+
533
+ # Restore original verbosity if it was overridden
534
+ if verbose is not None:
535
+ self.verbose = original_verbose
536
+ self._configure_logging()
537
+
355
538
  return self.freq_patterns[:-1]
gsppy/utils.py CHANGED
@@ -21,11 +21,48 @@ These utilities are designed to support sequence processing tasks and can be
21
21
  adapted to various domains, such as data mining, recommendation systems, and sequence analysis.
22
22
  """
23
23
 
24
- from typing import Dict, List, Tuple, Sequence, Generator
24
+ from typing import Dict, List, Tuple, Union, Optional, Sequence, Generator, cast
25
25
  from functools import lru_cache
26
26
  from itertools import product
27
27
 
28
28
 
29
+ def has_timestamps(
30
+ sequence: Union[
31
+ Tuple[Union[str, Tuple[str, Union[int, float]]], ...], List[Union[str, Tuple[str, Union[int, float]]]]
32
+ ],
33
+ ) -> bool:
34
+ """
35
+ Check if a sequence contains timestamped data (item-timestamp pairs).
36
+
37
+ Parameters:
38
+ sequence: A sequence that may contain timestamped data
39
+
40
+ Returns:
41
+ bool: True if the sequence contains timestamped data, False otherwise
42
+
43
+ Examples:
44
+ >>> has_timestamps((("A", 1), ("B", 2)))
45
+ True
46
+ >>> has_timestamps(("A", "B", "C"))
47
+ False
48
+ """
49
+ if not sequence or len(sequence) == 0:
50
+ return False
51
+
52
+ first_item = sequence[0]
53
+
54
+ # Check if first item is a tuple or list with 2 elements where second is numeric
55
+ if isinstance(first_item, (tuple, list)) and len(first_item) == 2:
56
+ try:
57
+ # Try to interpret second element as a number
58
+ float(first_item[1])
59
+ return True
60
+ except (TypeError, ValueError):
61
+ return False
62
+
63
+ return False
64
+
65
+
29
66
  def split_into_batches(
30
67
  items: Sequence[Tuple[str, ...]], batch_size: int
31
68
  ) -> Generator[Sequence[Tuple[str, ...]], None, None]:
@@ -59,11 +96,11 @@ def is_subsequence_in_list(subsequence: Tuple[str, ...], sequence: Tuple[str, ..
59
96
  bool: True if the subsequence is found, False otherwise.
60
97
 
61
98
  Examples:
62
- >>> is_subsequence_in_list(('a', 'c'), ('a', 'b', 'c'))
99
+ >>> is_subsequence_in_list(("a", "c"), ("a", "b", "c"))
63
100
  True
64
- >>> is_subsequence_in_list(('a', 'c'), ('c', 'a'))
101
+ >>> is_subsequence_in_list(("a", "c"), ("c", "a"))
65
102
  False
66
- >>> is_subsequence_in_list(('a', 'b'), ('a', 'b', 'c'))
103
+ >>> is_subsequence_in_list(("a", "b"), ("a", "b", "c"))
67
104
  True
68
105
  """
69
106
  # Handle the case where the subsequence is empty - it should not exist in any sequence
@@ -86,6 +123,249 @@ def is_subsequence_in_list(subsequence: Tuple[str, ...], sequence: Tuple[str, ..
86
123
  return False
87
124
 
88
125
 
126
+ def is_subsequence_in_list_with_time_constraints(
127
+ subsequence: Tuple[str, ...],
128
+ sequence: Union[Tuple[str, ...], Tuple[Tuple[str, float], ...]],
129
+ mingap: Optional[float] = None,
130
+ maxgap: Optional[float] = None,
131
+ maxspan: Optional[float] = None,
132
+ ) -> bool:
133
+ """
134
+ Check if a subsequence exists within a sequence with optional temporal constraints.
135
+
136
+ This function extends the standard subsequence check to support temporal constraints
137
+ for time-constrained sequential pattern mining. It handles both simple sequences
138
+ (items only) and timestamped sequences (item-timestamp pairs).
139
+
140
+ Temporal Constraints:
141
+ - mingap: Minimum time gap required between consecutive items in the pattern.
142
+ - maxgap: Maximum time gap allowed between consecutive items in the pattern.
143
+ - maxspan: Maximum time span from the first to last item in the pattern.
144
+
145
+ Parameters:
146
+ subsequence (Tuple[str, ...]): The pattern to search for (items only, no timestamps).
147
+ sequence (Union[Tuple[str, ...], Tuple[Tuple[str, float], ...]]):
148
+ The sequence to search within. Can be:
149
+ - Simple: Tuple of items (e.g., ('A', 'B', 'C'))
150
+ - Timestamped: Tuple of (item, timestamp) pairs (e.g., (('A', 1.0), ('B', 3.0)))
151
+ mingap (Optional[float]): Minimum time between consecutive pattern elements.
152
+ maxgap (Optional[float]): Maximum time between consecutive pattern elements.
153
+ maxspan (Optional[float]): Maximum time from first to last pattern element.
154
+
155
+ Returns:
156
+ bool: True if the subsequence is found respecting temporal constraints, False otherwise.
157
+
158
+ Examples:
159
+ >>> # Without timestamps (backward compatible)
160
+ >>> is_subsequence_in_list_with_time_constraints(("A", "C"), ("A", "B", "C"))
161
+ True
162
+
163
+ >>> # With timestamps and maxgap constraint
164
+ >>> seq = (("A", 1), ("B", 3), ("C", 10))
165
+ >>> is_subsequence_in_list_with_time_constraints(("A", "C"), seq, maxgap=5)
166
+ False # Gap between A and C is 9, exceeds maxgap=5
167
+
168
+ >>> # With timestamps and mingap constraint
169
+ >>> seq = (("A", 1), ("B", 2), ("C", 3))
170
+ >>> is_subsequence_in_list_with_time_constraints(("A", "C"), seq, mingap=3)
171
+ False # Gap between A and C is 2, less than mingap=3
172
+
173
+ >>> # With timestamps and maxspan constraint
174
+ >>> seq = (("A", 1), ("B", 5), ("C", 12))
175
+ >>> is_subsequence_in_list_with_time_constraints(("A", "C"), seq, maxspan=10)
176
+ False # Span from A to C is 11, exceeds maxspan=10
177
+ """
178
+ # Handle empty subsequence
179
+ if not subsequence:
180
+ return False
181
+
182
+ # Return False if the subsequence is longer than the sequence
183
+ if len(subsequence) > len(sequence):
184
+ return False
185
+
186
+ # Determine if sequence has timestamps
187
+ has_timestamps_flag = has_timestamps(sequence)
188
+
189
+ # If no temporal constraints and no timestamps, use the optimized cached version
190
+ if not has_timestamps_flag and mingap is None and maxgap is None and maxspan is None:
191
+ return is_subsequence_in_list(subsequence, sequence)
192
+
193
+ # Extract items and timestamps from sequence
194
+ seq_items, seq_times = _extract_items_and_timestamps(sequence, has_timestamps_flag)
195
+
196
+ # Try to find a match starting from each position
197
+ return _find_temporal_match(subsequence, seq_items, seq_times, mingap, maxgap, maxspan)
198
+
199
+
200
+ def _extract_items_and_timestamps(
201
+ sequence: Union[Tuple[str, ...], Tuple[Tuple[str, float], ...]],
202
+ has_timestamps_flag: bool,
203
+ ) -> Tuple[Tuple[str, ...], Optional[Tuple[float, ...]]]:
204
+ """
205
+ Extract items and timestamps from a sequence.
206
+
207
+ Args:
208
+ sequence: The sequence to extract from
209
+ has_timestamps_flag: Whether the sequence has timestamps
210
+
211
+ Returns:
212
+ Tuple of (items, timestamps) where timestamps is None if not present
213
+ """
214
+ if has_timestamps_flag:
215
+ # For timestamped sequences, extract items and timestamps separately
216
+ timestamped_seq = cast(Tuple[Tuple[str, float], ...], sequence)
217
+ seq_items = tuple(item for item, _ in timestamped_seq)
218
+ seq_times = tuple(time for _, time in timestamped_seq)
219
+ return seq_items, seq_times
220
+ else:
221
+ # For non-timestamped sequences, return items directly with None for timestamps
222
+ simple_seq = cast(Tuple[str, ...], sequence)
223
+ return simple_seq, None
224
+
225
+
226
+ def _find_temporal_match(
227
+ subsequence: Tuple[str, ...],
228
+ seq_items: Tuple[str, ...],
229
+ seq_times: Optional[Tuple[float, ...]],
230
+ mingap: Optional[float],
231
+ maxgap: Optional[float],
232
+ maxspan: Optional[float],
233
+ ) -> bool:
234
+ """
235
+ Find if subsequence matches with temporal constraints.
236
+
237
+ Args:
238
+ subsequence: Pattern to search for
239
+ seq_items: Items in the sequence
240
+ seq_times: Timestamps (None if not present)
241
+ mingap: Minimum gap constraint
242
+ maxgap: Maximum gap constraint
243
+ maxspan: Maximum span constraint
244
+
245
+ Returns:
246
+ True if match found, False otherwise
247
+ """
248
+ len_sub = len(subsequence)
249
+ len_seq = len(seq_items)
250
+
251
+ # Try starting from each position
252
+ for start_idx in range(len_seq - len_sub + 1):
253
+ if _try_match_from_position(start_idx, subsequence, seq_items, seq_times, mingap, maxgap, maxspan):
254
+ return True
255
+
256
+ return False
257
+
258
+
259
+ def _try_match_from_position(
260
+ start_idx: int,
261
+ subsequence: Tuple[str, ...],
262
+ seq_items: Tuple[str, ...],
263
+ seq_times: Optional[Tuple[float, ...]],
264
+ mingap: Optional[float],
265
+ maxgap: Optional[float],
266
+ maxspan: Optional[float],
267
+ ) -> bool:
268
+ """
269
+ Try to match subsequence starting from a given position.
270
+
271
+ Args:
272
+ start_idx: Starting position in sequence
273
+ subsequence: Pattern to match
274
+ seq_items: Items in sequence
275
+ seq_times: Timestamps (None if not present)
276
+ mingap: Minimum gap constraint
277
+ maxgap: Maximum gap constraint
278
+ maxspan: Maximum span constraint
279
+
280
+ Returns:
281
+ True if match found, False otherwise
282
+ """
283
+ sub_idx = 0
284
+ matched_indices: List[int] = []
285
+ len_sub = len(subsequence)
286
+ len_seq = len(seq_items)
287
+
288
+ for seq_idx in range(start_idx, len_seq):
289
+ if seq_items[seq_idx] == subsequence[sub_idx]:
290
+ # Check temporal constraints if we have timestamps and have previous matches
291
+ if (
292
+ seq_times is not None
293
+ and matched_indices
294
+ and not _check_temporal_constraints(seq_idx, matched_indices, seq_times, mingap, maxgap)
295
+ ):
296
+ # Skip this occurrence and continue searching for a valid one
297
+ continue
298
+
299
+ matched_indices.append(seq_idx)
300
+ sub_idx += 1
301
+
302
+ # If we've matched the entire subsequence, check maxspan
303
+ if sub_idx == len_sub:
304
+ return _check_maxspan(matched_indices, seq_times, maxspan)
305
+
306
+ return False
307
+
308
+
309
+ def _check_temporal_constraints(
310
+ seq_idx: int,
311
+ matched_indices: List[int],
312
+ seq_times: Tuple[float, ...],
313
+ mingap: Optional[float],
314
+ maxgap: Optional[float],
315
+ ) -> bool:
316
+ """
317
+ Check if temporal constraints are satisfied for a new match.
318
+
319
+ Args:
320
+ seq_idx: Current sequence index
321
+ matched_indices: Previously matched indices
322
+ seq_times: Timestamps
323
+ mingap: Minimum gap constraint
324
+ maxgap: Maximum gap constraint
325
+
326
+ Returns:
327
+ True if constraints satisfied, False otherwise
328
+ """
329
+ prev_idx = matched_indices[-1]
330
+ time_gap = seq_times[seq_idx] - seq_times[prev_idx]
331
+
332
+ # Check mingap constraint
333
+ if mingap is not None and time_gap < mingap:
334
+ return False
335
+
336
+ # Check maxgap constraint
337
+ if maxgap is not None and time_gap > maxgap:
338
+ return False
339
+
340
+ return True
341
+
342
+
343
+ def _check_maxspan(
344
+ matched_indices: List[int],
345
+ seq_times: Optional[Tuple[float, ...]],
346
+ maxspan: Optional[float],
347
+ ) -> bool:
348
+ """
349
+ Check if maxspan constraint is satisfied.
350
+
351
+ Args:
352
+ matched_indices: Matched sequence indices
353
+ seq_times: Timestamps (None if not present)
354
+ maxspan: Maximum span constraint
355
+
356
+ Returns:
357
+ True if constraint satisfied or not applicable, False otherwise
358
+ """
359
+ if seq_times is not None and maxspan is not None:
360
+ first_idx = matched_indices[0]
361
+ last_idx = matched_indices[-1]
362
+ span = seq_times[last_idx] - seq_times[first_idx]
363
+ if span > maxspan:
364
+ return False
365
+
366
+ return True
367
+
368
+
89
369
  def generate_candidates_from_previous(prev_patterns: Dict[Tuple[str, ...], int]) -> List[Tuple[str, ...]]:
90
370
  """
91
371
  Generate joined candidates from the previous level's frequent patterns.
@@ -96,7 +376,7 @@ def generate_candidates_from_previous(prev_patterns: Dict[Tuple[str, ...], int])
96
376
  Returns:
97
377
  List[Tuple]: Candidate patterns for the next level.
98
378
  """
99
- keys = list(prev_patterns.keys())
379
+ keys: List[Tuple[str, ...]] = list(prev_patterns.keys())
100
380
  return [
101
381
  pattern1 + (pattern2[-1],)
102
382
  for pattern1, pattern2 in product(keys, repeat=2)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gsppy
3
- Version: 3.3.0
3
+ Version: 3.5.0
4
4
  Summary: GSP (Generalized Sequence Pattern) algorithm in Python
5
5
  Project-URL: Homepage, https://github.com/jacksonpradolima/gsp-py
6
6
  Author-email: Jackson Antonio do Prado Lima <jacksonpradolima@gmail.com>
@@ -41,27 +41,28 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
41
41
  Requires-Python: >=3.10
42
42
  Requires-Dist: click>=8.0.0
43
43
  Provides-Extra: dev
44
- Requires-Dist: cython==3.2.3; extra == 'dev'
45
- Requires-Dist: hatch==1.16.2; extra == 'dev'
44
+ Requires-Dist: cython==3.2.4; extra == 'dev'
45
+ Requires-Dist: hatch==1.16.3; extra == 'dev'
46
46
  Requires-Dist: hatchling==1.28.0; extra == 'dev'
47
+ Requires-Dist: hypothesis<7.0.0,>=6.0.0; extra == 'dev'
47
48
  Requires-Dist: pylint==4.0.4; extra == 'dev'
48
- Requires-Dist: pyright==1.1.407; extra == 'dev'
49
+ Requires-Dist: pyright==1.1.408; extra == 'dev'
49
50
  Requires-Dist: pytest-benchmark==5.2.3; extra == 'dev'
50
51
  Requires-Dist: pytest-cov==7.0.0; extra == 'dev'
51
52
  Requires-Dist: pytest==9.0.2; extra == 'dev'
52
- Requires-Dist: ruff==0.14.10; extra == 'dev'
53
- Requires-Dist: tox==4.32.0; extra == 'dev'
54
- Requires-Dist: ty==0.0.8; extra == 'dev'
53
+ Requires-Dist: ruff==0.14.13; extra == 'dev'
54
+ Requires-Dist: tox==4.34.1; extra == 'dev'
55
+ Requires-Dist: ty==0.0.12; extra == 'dev'
55
56
  Provides-Extra: docs
56
57
  Requires-Dist: mkdocs-gen-files<1,>=0.5; extra == 'docs'
57
58
  Requires-Dist: mkdocs-literate-nav<1,>=0.6; extra == 'docs'
58
59
  Requires-Dist: mkdocs-material<10,>=9.5; extra == 'docs'
59
60
  Requires-Dist: mkdocs<2,>=1.6; extra == 'docs'
60
- Requires-Dist: mkdocstrings[python]<0.27,>=0.26; extra == 'docs'
61
+ Requires-Dist: mkdocstrings[python]<1.1,>=0.26; extra == 'docs'
61
62
  Provides-Extra: gpu
62
63
  Requires-Dist: cupy<14,>=11; extra == 'gpu'
63
64
  Provides-Extra: rust
64
- Requires-Dist: maturin==1.10.2; extra == 'rust'
65
+ Requires-Dist: maturin==1.11.5; extra == 'rust'
65
66
  Description-Content-Type: text/markdown
66
67
 
67
68
  [![Docs](https://img.shields.io/badge/Docs-GSP--Py%20Site-3D9970?style=flat-square)](https://jacksonpradolima.github.io/gsp-py/)
@@ -104,6 +105,7 @@ Sequence Pattern (GSP)** algorithm. Ideal for market basket analysis, temporal m
104
105
  6. [💡 Usage](#usage)
105
106
  - [✅ Example: Analyzing Sales Data](#example-analyzing-sales-data)
106
107
  - [📊 Explanation: Support and Results](#explanation-support-and-results)
108
+ - [⏱️ Temporal Constraints](#temporal-constraints)
107
109
  7. [⌨️ Typing](#typing)
108
110
  8. [🌟 Planned Features](#planned-features)
109
111
  9. [🤝 Contributing](#contributing)
@@ -122,6 +124,7 @@ principles**. Using support thresholds, GSP identifies frequent sequences of ite
122
124
  - **Ordered (non-contiguous) matching**: Detects patterns where items appear in order but not necessarily adjacent, following standard GSP semantics. For example, the pattern `('A', 'C')` is found in the sequence `['A', 'B', 'C']`.
123
125
  - **Support-based pruning**: Only retains sequences that meet the minimum support threshold.
124
126
  - **Candidate generation**: Iteratively generates candidate sequences of increasing length.
127
+ - **Temporal constraints**: Support for time-constrained pattern mining with `mingap`, `maxgap`, and `maxspan` parameters to find patterns within specific time windows.
125
128
  - **General-purpose**: Useful in retail, web analytics, social networks, temporal sequence mining, and more.
126
129
 
127
130
  For example:
@@ -372,7 +375,28 @@ gsppy --file path/to/transactions.csv --min_support 0.3 --backend rust
372
375
  - `--file`: Path to your input file (JSON or CSV). **Required**.
373
376
  - `--min_support`: Minimum support threshold as a fraction (e.g., `0.3` for 30%). Default is `0.2`.
374
377
  - `--backend`: Backend to use for support counting. One of `auto` (default), `python`, `rust`, or `gpu`.
375
- - `--verbose`: (Optional) Enable detailed output for debugging.
378
+ - `--verbose`: Enable detailed logging with timestamps, log levels, and process IDs for debugging and traceability.
379
+ - `--mingap`, `--maxgap`, `--maxspan`: Temporal constraints for time-aware pattern mining (requires timestamped transactions).
380
+
381
+ #### Verbose Mode
382
+
383
+ For debugging or to track execution in CI/CD pipelines, use the `--verbose` flag:
384
+
385
+ ```bash
386
+ gsppy --file transactions.json --min_support 0.3 --verbose
387
+ ```
388
+
389
+ This produces structured logging output with timestamps, log levels, and process information:
390
+
391
+ ```
392
+ YYYY-MM-DDTHH:MM:SS | INFO | PID:4179 | gsppy.gsp | Pre-processing transactions...
393
+ YYYY-MM-DDTHH:MM:SS | DEBUG | PID:4179 | gsppy.gsp | Unique candidates: [('Bread',), ('Milk',), ...]
394
+ YYYY-MM-DDTHH:MM:SS | INFO | PID:4179 | gsppy.gsp | Starting GSP algorithm with min_support=0.3...
395
+ YYYY-MM-DDTHH:MM:SS | INFO | PID:4179 | gsppy.gsp | Run 1: 6 candidates filtered to 5.
396
+ ...
397
+ ```
398
+
399
+ For complete logging documentation, see [docs/logging.md](docs/logging.md).
376
400
 
377
401
  #### Example
378
402
 
@@ -469,6 +493,30 @@ result = GSP(transactions).search(min_support)
469
493
  print(result)
470
494
  ```
471
495
 
496
+ ### Verbose Mode for Debugging
497
+
498
+ Enable detailed logging to track algorithm progress and debug issues:
499
+
500
+ ```python
501
+ from gsppy.gsp import GSP
502
+
503
+ # Enable verbose logging for the entire instance
504
+ gsp = GSP(transactions, verbose=True)
505
+ result = gsp.search(min_support=0.3)
506
+
507
+ # Or enable verbose for a specific search only
508
+ gsp = GSP(transactions)
509
+ result = gsp.search(min_support=0.3, verbose=True)
510
+ ```
511
+
512
+ Verbose mode provides:
513
+ - Detailed progress information during execution
514
+ - Candidate generation and filtering statistics
515
+ - Preprocessing and validation details
516
+ - Useful for debugging, research, and CI/CD integration
517
+
518
+ For complete documentation on logging, see [docs/logging.md](docs/logging.md).
519
+
472
520
  ### Output
473
521
 
474
522
  The algorithm will return a list of patterns with their corresponding support.
@@ -535,6 +583,128 @@ result = gsp.search(min_support=0.5) # Need at least 2/4 sequences
535
583
 
536
584
  ---
537
585
 
586
+ ## ⏱️ Temporal Constraints
587
+
588
+ GSP-Py supports **time-constrained sequential pattern mining** with three powerful temporal constraints: `mingap`, `maxgap`, and `maxspan`. These constraints enable domain-specific applications such as medical event mining, retail analytics, and temporal user journey discovery.
589
+
590
+ ### Temporal Constraint Parameters
591
+
592
+ - **`mingap`**: Minimum time gap required between consecutive items in a pattern
593
+ - **`maxgap`**: Maximum time gap allowed between consecutive items in a pattern
594
+ - **`maxspan`**: Maximum time span from the first to the last item in a pattern
595
+
596
+ ### Using Temporal Constraints
597
+
598
+ To use temporal constraints, your transactions must include timestamps as (item, timestamp) tuples:
599
+
600
+ ```python
601
+ from gsppy.gsp import GSP
602
+
603
+ # Transactions with timestamps (e.g., in seconds, hours, days, etc.)
604
+ timestamped_transactions = [
605
+ [("Login", 0), ("Browse", 2), ("AddToCart", 5), ("Purchase", 7)],
606
+ [("Login", 0), ("Browse", 1), ("AddToCart", 15), ("Purchase", 20)],
607
+ [("Login", 0), ("Browse", 3), ("AddToCart", 6), ("Purchase", 8)],
608
+ ]
609
+
610
+ # Find patterns where consecutive events occur within 10 time units
611
+ gsp = GSP(timestamped_transactions, maxgap=10)
612
+ patterns = gsp.search(min_support=0.6)
613
+
614
+ # The pattern ("Browse", "AddToCart", "Purchase") will:
615
+ # - Be found in transaction 1: gaps are 3 and 2 (both ≤ 10) ✅
616
+ # - NOT be found in transaction 2: gap between Browse→AddToCart is 14 (exceeds maxgap) ❌
617
+ # - Be found in transaction 3: gaps are 3 and 2 (both ≤ 10) ✅
618
+ # Result: Support = 2/3 = 67% (above threshold of 60%)
619
+ ```
620
+
621
+ ### CLI Usage with Temporal Constraints
622
+
623
+ ```bash
624
+ # Find patterns with maximum gap of 5 time units
625
+ gsppy --file temporal_data.json --min_support 0.3 --maxgap 5
626
+
627
+ # Find patterns with minimum gap of 2 time units
628
+ gsppy --file temporal_data.json --min_support 0.3 --mingap 2
629
+
630
+ # Find patterns that complete within 10 time units
631
+ gsppy --file temporal_data.json --min_support 0.3 --maxspan 10
632
+
633
+ # Combine multiple constraints
634
+ gsppy --file temporal_data.json --min_support 0.3 --mingap 1 --maxgap 5 --maxspan 10
635
+ ```
636
+
637
+ ### Real-World Examples
638
+
639
+ #### Medical Event Mining
640
+
641
+ ```python
642
+ from gsppy.gsp import GSP
643
+
644
+ # Medical events with timestamps in days
645
+ medical_sequences = [
646
+ [("Symptom", 0), ("Diagnosis", 2), ("Treatment", 5), ("Recovery", 15)],
647
+ [("Symptom", 0), ("Diagnosis", 1), ("Treatment", 20), ("Recovery", 30)],
648
+ [("Symptom", 0), ("Diagnosis", 3), ("Treatment", 6), ("Recovery", 18)],
649
+ ]
650
+
651
+ # Find patterns where treatment follows diagnosis within 10 days
652
+ gsp = GSP(medical_sequences, maxgap=10)
653
+ result = gsp.search(min_support=0.5)
654
+
655
+ # Pattern ("Diagnosis", "Treatment") found in sequences 1 & 3 only
656
+ # (sequence 2 has gap of 19 days, exceeding maxgap)
657
+ ```
658
+
659
+ #### Retail Analytics
660
+
661
+ ```python
662
+ from gsppy.gsp import GSP
663
+
664
+ # Customer purchases with timestamps in hours
665
+ purchase_sequences = [
666
+ [("Browse", 0), ("AddToCart", 0.5), ("Purchase", 1)],
667
+ [("Browse", 0), ("AddToCart", 1), ("Purchase", 25)], # Long delay
668
+ [("Browse", 0), ("AddToCart", 0.3), ("Purchase", 0.8)],
669
+ ]
670
+
671
+ # Find purchase journeys that complete within 2 hours
672
+ gsp = GSP(purchase_sequences, maxspan=2)
673
+ result = gsp.search(min_support=0.5)
674
+
675
+ # Full sequence found in 2 out of 3 transactions
676
+ # (sequence 2 has span of 25 hours, exceeding maxspan)
677
+ ```
678
+
679
+ #### User Journey Discovery
680
+
681
+ ```python
682
+ from gsppy.gsp import GSP
683
+
684
+ # Website navigation with timestamps in seconds
685
+ navigation_sequences = [
686
+ [("Home", 0), ("Search", 5), ("Product", 10), ("Checkout", 15)],
687
+ [("Home", 0), ("Search", 3), ("Product", 8), ("Checkout", 180)],
688
+ [("Home", 0), ("Search", 4), ("Product", 9), ("Checkout", 14)],
689
+ ]
690
+
691
+ # Find navigation patterns with:
692
+ # - Minimum 2 seconds between steps (mingap)
693
+ # - Maximum 20 seconds between steps (maxgap)
694
+ # - Complete within 30 seconds total (maxspan)
695
+ gsp = GSP(navigation_sequences, mingap=2, maxgap=20, maxspan=30)
696
+ result = gsp.search(min_support=0.5)
697
+ ```
698
+
699
+ ### Important Notes
700
+
701
+ - Temporal constraints require timestamped transactions (item-timestamp tuples)
702
+ - If temporal constraints are specified but transactions don't have timestamps, a warning is logged and constraints are ignored
703
+ - When using temporal constraints, the Python backend is automatically used (accelerated backends don't yet support temporal constraints)
704
+ - Timestamps can be in any unit (seconds, minutes, hours, days) as long as they're consistent within your dataset
705
+
706
+ ---
707
+
538
708
  ## ⌨️ Typing
539
709
 
540
710
  `gsppy` ships inline type information (PEP 561) via a bundled `py.typed` marker. The public API is re-exported from
@@ -554,11 +724,6 @@ We are actively working to improve GSP-Py. Here are some exciting features plann
554
724
  2. **Support for Preprocessing and Postprocessing**:
555
725
  - Add hooks to allow users to transform datasets before mining and customize the output results.
556
726
 
557
- 3. **Support for Time-Constrained Pattern Mining**:
558
- - Extend GSP-Py to handle temporal datasets by allowing users to define time constraints (e.g., maximum time gaps
559
- between events, time windows) during the sequence mining process.
560
- - Enable candidate pruning and support calculations based on these temporal constraints.
561
-
562
727
  Want to contribute or suggest an
563
728
  improvement? [Open a discussion or issue!](https://github.com/jacksonpradolima/gsp-py/issues)
564
729
 
@@ -583,16 +748,34 @@ uv run ruff check .
583
748
  uv run pyright
584
749
  ```
585
750
 
751
+ ### Testing & Fuzzing
752
+
753
+ GSP-Py includes comprehensive test coverage, including property-based fuzzing tests using [Hypothesis](https://hypothesis.readthedocs.io/). These fuzzing tests automatically generate random inputs to verify algorithm invariants and discover edge cases. Run the fuzzing tests with:
754
+
755
+ ```bash
756
+ uv run pytest tests/test_gsp_fuzzing.py -v
757
+ ```
758
+
586
759
  ### General Steps:
587
760
 
588
761
  1. Fork the repository.
589
762
  2. Create a feature branch: `git checkout -b feature/my-feature`.
590
- 3. Commit your changes: `git commit -m "Add my feature."`
763
+ 3. Commit your changes using [Conventional Commits](https://www.conventionalcommits.org/) format: `git commit -m "feat: add my feature"`.
591
764
  4. Push to your branch: `git push origin feature/my-feature`.
592
765
  5. Submit a pull request to the main repository!
593
766
 
594
767
  Looking for ideas? Check out our [Planned Features](#planned-features) section.
595
768
 
769
+ ### Release Management
770
+
771
+ GSP-Py uses automated release management with [Conventional Commits](https://www.conventionalcommits.org/). When commits are merged to `main`:
772
+ - **Releases are triggered** by: `fix:` (patch), `feat:` (minor), `perf:` (patch), or `BREAKING CHANGE:` (major)
773
+ - **No release** for: `docs:`, `style:`, `refactor:`, `test:`, `build:`, `ci:`, `chore:`
774
+ - CHANGELOG.md is automatically updated with structured release notes
775
+ - Git tags and GitHub releases are created automatically
776
+
777
+ See [Release Management Guide](docs/RELEASE_MANAGEMENT.md) for details on commit message format and release process.
778
+
596
779
  ---
597
780
 
598
781
  ## 📝 License
@@ -0,0 +1,11 @@
1
+ gsppy/__init__.py,sha256=NMVa-ZWT449wuxZMF9Ym7p-DChOxOibaaqlpPxksfuo,805
2
+ gsppy/accelerate.py,sha256=rDho3ysADETpuhT2SF9voBjd3XRaQUzuA5k_baNACF8,11020
3
+ gsppy/cli.py,sha256=-viXa8VFIF-QvrHYy1vtDxtMm50sM_tZq5B5DMZ1Jtw,12516
4
+ gsppy/gsp.py,sha256=k72pvdmD6jU4AId2rrHQrJ4FBUgtkuC0ntEY8QHGi5c,24486
5
+ gsppy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ gsppy/utils.py,sha256=dAEq1hEZMN0ZjoocKs_ZIgOI9j_Y6rJEAKneul3zNRo,13501
7
+ gsppy-3.5.0.dist-info/METADATA,sha256=ix2X_VEUTved_DaTsSJMERT-CZ34TUYF0XMC2KeNeuE,29747
8
+ gsppy-3.5.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
9
+ gsppy-3.5.0.dist-info/entry_points.txt,sha256=smvmcIWk424ARIGKOC_BM42hpT_SptKPcIeqs-8u8lM,41
10
+ gsppy-3.5.0.dist-info/licenses/LICENSE,sha256=AlXanKSqFzo_o-87gp3Qw3XzbmnfxYy7O0xJOcQGWJo,1086
11
+ gsppy-3.5.0.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- gsppy/__init__.py,sha256=FcWEYkzMCiqIBmc4yhgIXFKzvSNjJA7LX7juUabvoJ4,784
2
- gsppy/accelerate.py,sha256=2I3IA42FyPZvfwc0-f0bovZ8YgbdvJXj0qDlYWSWiXI,10998
3
- gsppy/cli.py,sha256=W5udAPKOjlxi-c-RKcz5HW-sDgoap4ojHD87bd-X498,6583
4
- gsppy/gsp.py,sha256=aCtPrldVNCkwj6wwytrZzbayYKkXi9Om-3xzrHUMkLQ,15293
5
- gsppy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- gsppy/utils.py,sha256=KtjfDgsTwvwxIyA2KCQmgu8cFkBqQvMZN8Ct5NB60Tc,3952
7
- gsppy-3.3.0.dist-info/METADATA,sha256=VQtJqYCs9I4HnO5EpEeI9SijBxxgaNir_mw1HMmWKlw,22727
8
- gsppy-3.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
9
- gsppy-3.3.0.dist-info/entry_points.txt,sha256=smvmcIWk424ARIGKOC_BM42hpT_SptKPcIeqs-8u8lM,41
10
- gsppy-3.3.0.dist-info/licenses/LICENSE,sha256=AlXanKSqFzo_o-87gp3Qw3XzbmnfxYy7O0xJOcQGWJo,1086
11
- gsppy-3.3.0.dist-info/RECORD,,
File without changes