gsppy 4.0.0__tar.gz → 4.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gsppy-4.0.0 → gsppy-4.1.0}/CHANGELOG.md +57 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/PKG-INFO +77 -1
- {gsppy-4.0.0 → gsppy-4.1.0}/README.md +76 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/gsppy/__init__.py +10 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/gsppy/cli.py +2 -2
- {gsppy-4.0.0 → gsppy-4.1.0}/gsppy/gsp.py +71 -7
- gsppy-4.1.0/gsppy/sequence.py +371 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/pyproject.toml +1 -1
- gsppy-4.1.0/tests/test_gsp_sequence_integration.py +345 -0
- gsppy-4.1.0/tests/test_sequence.py +466 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/.gitignore +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/CONTRIBUTING.md +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/LICENSE +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/SECURITY.md +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/gsppy/accelerate.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/gsppy/dataframe_adapters.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/gsppy/enums.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/gsppy/pruning.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/gsppy/py.typed +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/gsppy/token_mapper.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/gsppy/utils.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/rust/Cargo.lock +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/rust/Cargo.toml +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/rust/src/lib.rs +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/tests/__init__.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/tests/test_cli.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/tests/test_dataframe.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/tests/test_gsp.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/tests/test_gsp_fuzzing.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/tests/test_pruning.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/tests/test_spm_format.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/tests/test_temporal_constraints.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/tests/test_utils.py +0 -0
- {gsppy-4.0.0 → gsppy-4.1.0}/tox.ini +0 -0
|
@@ -1,6 +1,63 @@
|
|
|
1
1
|
# CHANGELOG
|
|
2
2
|
|
|
3
3
|
|
|
4
|
+
## v4.1.0 (2026-02-01)
|
|
5
|
+
|
|
6
|
+
### Bug Fixes
|
|
7
|
+
|
|
8
|
+
- Address code review feedback - add type annotations and remove unused variables
|
|
9
|
+
([`bf62d14`](https://github.com/jacksonpradolima/gsp-py/commit/bf62d144d8f1be1e7716291d41af955450612c81))
|
|
10
|
+
|
|
11
|
+
Co-authored-by: jacksonpradolima <7774063+jacksonpradolima@users.noreply.github.com>
|
|
12
|
+
|
|
13
|
+
### Chores
|
|
14
|
+
|
|
15
|
+
- Update uv.lock for version 4.0.0
|
|
16
|
+
([`f1ae2af`](https://github.com/jacksonpradolima/gsp-py/commit/f1ae2af2aa71ea44b9d8625ed647da79259ec096))
|
|
17
|
+
|
|
18
|
+
### Documentation
|
|
19
|
+
|
|
20
|
+
- Add Sequence documentation and examples to README
|
|
21
|
+
([`62d0d02`](https://github.com/jacksonpradolima/gsp-py/commit/62d0d02c19c5751331df53e680cc0b9aee19677b))
|
|
22
|
+
|
|
23
|
+
Co-authored-by: jacksonpradolima <7774063+jacksonpradolima@users.noreply.github.com>
|
|
24
|
+
|
|
25
|
+
- Update docs/ with Sequence abstraction documentation
|
|
26
|
+
([`2368cf3`](https://github.com/jacksonpradolima/gsp-py/commit/2368cf30239139e8e2af5457ee6acf14db30ef06))
|
|
27
|
+
|
|
28
|
+
Co-authored-by: jacksonpradolima <7774063+jacksonpradolima@users.noreply.github.com>
|
|
29
|
+
|
|
30
|
+
### Features
|
|
31
|
+
|
|
32
|
+
- Add Sequence abstraction class with comprehensive tests
|
|
33
|
+
([`6011bdb`](https://github.com/jacksonpradolima/gsp-py/commit/6011bdb7104755d109b58261b36e1dd1c36b2d61))
|
|
34
|
+
|
|
35
|
+
Co-authored-by: jacksonpradolima <7774063+jacksonpradolima@users.noreply.github.com>
|
|
36
|
+
|
|
37
|
+
- Integrate Sequence objects with GSP.search() via return_sequences parameter
|
|
38
|
+
([`7476588`](https://github.com/jacksonpradolima/gsp-py/commit/7476588f2b277276748e0550366014f2a93d8ef5))
|
|
39
|
+
|
|
40
|
+
Co-authored-by: jacksonpradolima <7774063+jacksonpradolima@users.noreply.github.com>
|
|
41
|
+
|
|
42
|
+
- Introduce Sequence abstraction for typed pattern representation
|
|
43
|
+
([`01ca37b`](https://github.com/jacksonpradolima/gsp-py/commit/01ca37b9bc4572eb7b1c1eaf6fdf26ca2324a3c5))
|
|
44
|
+
|
|
45
|
+
### Refactoring
|
|
46
|
+
|
|
47
|
+
- Address code review feedback - remove redundant checks
|
|
48
|
+
([`621e940`](https://github.com/jacksonpradolima/gsp-py/commit/621e9403379ae0fd07bf45b97616b9979f2d4aa6))
|
|
49
|
+
|
|
50
|
+
Co-authored-by: jacksonpradolima <7774063+jacksonpradolima@users.noreply.github.com>
|
|
51
|
+
|
|
52
|
+
- Reduce cognitive complexity in sequence_example.py and fix f-string
|
|
53
|
+
([`63ac4f9`](https://github.com/jacksonpradolima/gsp-py/commit/63ac4f9ceb869a5228cdccdcf6a9d0b9f46f0350))
|
|
54
|
+
|
|
55
|
+
Co-authored-by: jacksonpradolima <7774063+jacksonpradolima@users.noreply.github.com>
|
|
56
|
+
|
|
57
|
+
- Update type annotations and improve search method in GSP class
|
|
58
|
+
([`e2e9a3f`](https://github.com/jacksonpradolima/gsp-py/commit/e2e9a3f473d1e0c5d6990c8b7c5837a251761032))
|
|
59
|
+
|
|
60
|
+
|
|
4
61
|
## v4.0.0 (2026-02-01)
|
|
5
62
|
|
|
6
63
|
### Chores
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gsppy
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.1.0
|
|
4
4
|
Summary: GSP (Generalized Sequence Pattern) algorithm in Python
|
|
5
5
|
Project-URL: Homepage, https://github.com/jacksonpradolima/gsp-py
|
|
6
6
|
Author-email: Jackson Antonio do Prado Lima <jacksonpradolima@gmail.com>
|
|
@@ -559,6 +559,82 @@ Verbose mode provides:
|
|
|
559
559
|
|
|
560
560
|
For complete documentation on logging, see [docs/logging.md](docs/logging.md).
|
|
561
561
|
|
|
562
|
+
### Using Sequence Objects for Rich Pattern Representation
|
|
563
|
+
|
|
564
|
+
GSP-Py 4.0+ introduces a **Sequence abstraction class** that provides a richer, more maintainable way to work with sequential patterns. The Sequence class encapsulates pattern items, support counts, and optional metadata in an immutable, hashable object.
|
|
565
|
+
|
|
566
|
+
#### Traditional Dict-based Output (Default)
|
|
567
|
+
|
|
568
|
+
```python
|
|
569
|
+
from gsppy import GSP
|
|
570
|
+
|
|
571
|
+
transactions = [
|
|
572
|
+
['Bread', 'Milk'],
|
|
573
|
+
['Bread', 'Diaper', 'Beer', 'Eggs'],
|
|
574
|
+
['Milk', 'Diaper', 'Beer', 'Coke']
|
|
575
|
+
]
|
|
576
|
+
|
|
577
|
+
gsp = GSP(transactions)
|
|
578
|
+
result = gsp.search(min_support=0.3)
|
|
579
|
+
|
|
580
|
+
# Returns: [{('Bread',): 4, ('Milk',): 4, ...}, {('Bread', 'Milk'): 3, ...}, ...]
|
|
581
|
+
for level_patterns in result:
|
|
582
|
+
for pattern, support in level_patterns.items():
|
|
583
|
+
print(f"Pattern: {pattern}, Support: {support}")
|
|
584
|
+
```
|
|
585
|
+
|
|
586
|
+
#### Sequence Objects (New Feature)
|
|
587
|
+
|
|
588
|
+
```python
|
|
589
|
+
from gsppy import GSP
|
|
590
|
+
|
|
591
|
+
transactions = [
|
|
592
|
+
['Bread', 'Milk'],
|
|
593
|
+
['Bread', 'Diaper', 'Beer', 'Eggs'],
|
|
594
|
+
['Milk', 'Diaper', 'Beer', 'Coke']
|
|
595
|
+
]
|
|
596
|
+
|
|
597
|
+
gsp = GSP(transactions)
|
|
598
|
+
result = gsp.search(min_support=0.3, return_sequences=True)
|
|
599
|
+
|
|
600
|
+
# Returns: [[Sequence(('Bread',), support=4), ...], [Sequence(('Bread', 'Milk'), support=3), ...], ...]
|
|
601
|
+
for level_patterns in result:
|
|
602
|
+
for seq in level_patterns:
|
|
603
|
+
print(f"Pattern: {seq.items}, Support: {seq.support}, Length: {seq.length}")
|
|
604
|
+
# Access sequence properties
|
|
605
|
+
print(f" First item: {seq.first_item}, Last item: {seq.last_item}")
|
|
606
|
+
# Check if item is in sequence
|
|
607
|
+
if "Milk" in seq:
|
|
608
|
+
print(f" Contains Milk!")
|
|
609
|
+
```
|
|
610
|
+
|
|
611
|
+
#### Key Benefits of Sequence Objects
|
|
612
|
+
|
|
613
|
+
1. **Rich API**: Access pattern properties like `length`, `first_item`, `last_item`
|
|
614
|
+
2. **Type Safety**: IDE autocomplete and better type hints
|
|
615
|
+
3. **Immutable & Hashable**: Can be used as dictionary keys
|
|
616
|
+
4. **Extensible**: Add metadata for confidence, lift, or custom properties
|
|
617
|
+
5. **Backward Compatible**: Convert to/from dict format as needed
|
|
618
|
+
|
|
619
|
+
```python
|
|
620
|
+
from gsppy import Sequence, sequences_to_dict, dict_to_sequences
|
|
621
|
+
|
|
622
|
+
# Create custom sequences
|
|
623
|
+
seq = Sequence.from_tuple(("A", "B", "C"), support=5)
|
|
624
|
+
|
|
625
|
+
# Extend sequences
|
|
626
|
+
extended = seq.extend("D") # Creates Sequence(("A", "B", "C", "D"))
|
|
627
|
+
|
|
628
|
+
# Add metadata
|
|
629
|
+
seq_with_meta = seq.with_metadata(confidence=0.85, lift=1.5)
|
|
630
|
+
|
|
631
|
+
# Convert between formats for compatibility
|
|
632
|
+
seq_result = gsp.search(min_support=0.3, return_sequences=True)
|
|
633
|
+
dict_format = sequences_to_dict(seq_result[0]) # Convert to dict
|
|
634
|
+
```
|
|
635
|
+
|
|
636
|
+
For a complete example, see [examples/sequence_example.py](examples/sequence_example.py).
|
|
637
|
+
|
|
562
638
|
### Loading SPM/GSP Format Files
|
|
563
639
|
|
|
564
640
|
GSP-Py supports loading datasets in the classical SPM/GSP delimiter format, which is widely used in sequential pattern mining research. This format uses:
|
|
@@ -486,6 +486,82 @@ Verbose mode provides:
|
|
|
486
486
|
|
|
487
487
|
For complete documentation on logging, see [docs/logging.md](docs/logging.md).
|
|
488
488
|
|
|
489
|
+
### Using Sequence Objects for Rich Pattern Representation
|
|
490
|
+
|
|
491
|
+
GSP-Py 4.0+ introduces a **Sequence abstraction class** that provides a richer, more maintainable way to work with sequential patterns. The Sequence class encapsulates pattern items, support counts, and optional metadata in an immutable, hashable object.
|
|
492
|
+
|
|
493
|
+
#### Traditional Dict-based Output (Default)
|
|
494
|
+
|
|
495
|
+
```python
|
|
496
|
+
from gsppy import GSP
|
|
497
|
+
|
|
498
|
+
transactions = [
|
|
499
|
+
['Bread', 'Milk'],
|
|
500
|
+
['Bread', 'Diaper', 'Beer', 'Eggs'],
|
|
501
|
+
['Milk', 'Diaper', 'Beer', 'Coke']
|
|
502
|
+
]
|
|
503
|
+
|
|
504
|
+
gsp = GSP(transactions)
|
|
505
|
+
result = gsp.search(min_support=0.3)
|
|
506
|
+
|
|
507
|
+
# Returns: [{('Bread',): 4, ('Milk',): 4, ...}, {('Bread', 'Milk'): 3, ...}, ...]
|
|
508
|
+
for level_patterns in result:
|
|
509
|
+
for pattern, support in level_patterns.items():
|
|
510
|
+
print(f"Pattern: {pattern}, Support: {support}")
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
#### Sequence Objects (New Feature)
|
|
514
|
+
|
|
515
|
+
```python
|
|
516
|
+
from gsppy import GSP
|
|
517
|
+
|
|
518
|
+
transactions = [
|
|
519
|
+
['Bread', 'Milk'],
|
|
520
|
+
['Bread', 'Diaper', 'Beer', 'Eggs'],
|
|
521
|
+
['Milk', 'Diaper', 'Beer', 'Coke']
|
|
522
|
+
]
|
|
523
|
+
|
|
524
|
+
gsp = GSP(transactions)
|
|
525
|
+
result = gsp.search(min_support=0.3, return_sequences=True)
|
|
526
|
+
|
|
527
|
+
# Returns: [[Sequence(('Bread',), support=4), ...], [Sequence(('Bread', 'Milk'), support=3), ...], ...]
|
|
528
|
+
for level_patterns in result:
|
|
529
|
+
for seq in level_patterns:
|
|
530
|
+
print(f"Pattern: {seq.items}, Support: {seq.support}, Length: {seq.length}")
|
|
531
|
+
# Access sequence properties
|
|
532
|
+
print(f" First item: {seq.first_item}, Last item: {seq.last_item}")
|
|
533
|
+
# Check if item is in sequence
|
|
534
|
+
if "Milk" in seq:
|
|
535
|
+
print(f" Contains Milk!")
|
|
536
|
+
```
|
|
537
|
+
|
|
538
|
+
#### Key Benefits of Sequence Objects
|
|
539
|
+
|
|
540
|
+
1. **Rich API**: Access pattern properties like `length`, `first_item`, `last_item`
|
|
541
|
+
2. **Type Safety**: IDE autocomplete and better type hints
|
|
542
|
+
3. **Immutable & Hashable**: Can be used as dictionary keys
|
|
543
|
+
4. **Extensible**: Add metadata for confidence, lift, or custom properties
|
|
544
|
+
5. **Backward Compatible**: Convert to/from dict format as needed
|
|
545
|
+
|
|
546
|
+
```python
|
|
547
|
+
from gsppy import Sequence, sequences_to_dict, dict_to_sequences
|
|
548
|
+
|
|
549
|
+
# Create custom sequences
|
|
550
|
+
seq = Sequence.from_tuple(("A", "B", "C"), support=5)
|
|
551
|
+
|
|
552
|
+
# Extend sequences
|
|
553
|
+
extended = seq.extend("D") # Creates Sequence(("A", "B", "C", "D"))
|
|
554
|
+
|
|
555
|
+
# Add metadata
|
|
556
|
+
seq_with_meta = seq.with_metadata(confidence=0.85, lift=1.5)
|
|
557
|
+
|
|
558
|
+
# Convert between formats for compatibility
|
|
559
|
+
seq_result = gsp.search(min_support=0.3, return_sequences=True)
|
|
560
|
+
dict_format = sequences_to_dict(seq_result[0]) # Convert to dict
|
|
561
|
+
```
|
|
562
|
+
|
|
563
|
+
For a complete example, see [examples/sequence_example.py](examples/sequence_example.py).
|
|
564
|
+
|
|
489
565
|
### Loading SPM/GSP Format Files
|
|
490
566
|
|
|
491
567
|
GSP-Py supports loading datasets in the classical SPM/GSP delimiter format, which is widely used in sequential pattern mining research. This format uses:
|
|
@@ -24,6 +24,12 @@ from gsppy.pruning import (
|
|
|
24
24
|
FrequencyBasedPruning,
|
|
25
25
|
create_default_pruning_strategy,
|
|
26
26
|
)
|
|
27
|
+
from gsppy.sequence import (
|
|
28
|
+
Sequence,
|
|
29
|
+
sequences_to_dict,
|
|
30
|
+
dict_to_sequences,
|
|
31
|
+
to_sequence,
|
|
32
|
+
)
|
|
27
33
|
from gsppy.token_mapper import TokenMapper
|
|
28
34
|
|
|
29
35
|
# DataFrame adapters are optional - import only if dependencies are available
|
|
@@ -63,6 +69,10 @@ __all__ = [
|
|
|
63
69
|
"TemporalAwarePruning",
|
|
64
70
|
"CombinedPruning",
|
|
65
71
|
"create_default_pruning_strategy",
|
|
72
|
+
"Sequence",
|
|
73
|
+
"sequences_to_dict",
|
|
74
|
+
"dict_to_sequences",
|
|
75
|
+
"to_sequence",
|
|
66
76
|
"TokenMapper",
|
|
67
77
|
]
|
|
68
78
|
|
|
@@ -35,7 +35,7 @@ import csv
|
|
|
35
35
|
import sys
|
|
36
36
|
import json
|
|
37
37
|
import logging
|
|
38
|
-
from typing import Any,
|
|
38
|
+
from typing import Any, List, Tuple, Union, Optional, cast
|
|
39
39
|
|
|
40
40
|
import click
|
|
41
41
|
|
|
@@ -608,7 +608,7 @@ def main(
|
|
|
608
608
|
# Initialize and run GSP algorithm
|
|
609
609
|
try:
|
|
610
610
|
gsp = GSP(transactions, mingap=mingap, maxgap=maxgap, maxspan=maxspan, verbose=verbose)
|
|
611
|
-
patterns
|
|
611
|
+
patterns = gsp.search(min_support=min_support, return_sequences=False)
|
|
612
612
|
logger.info("Frequent Patterns Found:")
|
|
613
613
|
for i, level in enumerate(patterns, start=1):
|
|
614
614
|
logger.info(f"\n{i}-Sequence Patterns:")
|
|
@@ -90,7 +90,7 @@ from __future__ import annotations
|
|
|
90
90
|
import math
|
|
91
91
|
import logging
|
|
92
92
|
import multiprocessing as mp
|
|
93
|
-
from typing import TYPE_CHECKING, Dict, List, Tuple, Union, Optional, cast
|
|
93
|
+
from typing import TYPE_CHECKING, Dict, List, Tuple, Union, Literal, Optional, cast, overload
|
|
94
94
|
from itertools import chain
|
|
95
95
|
from collections import Counter
|
|
96
96
|
|
|
@@ -102,6 +102,7 @@ from gsppy.utils import (
|
|
|
102
102
|
is_subsequence_in_list_with_time_constraints,
|
|
103
103
|
)
|
|
104
104
|
from gsppy.pruning import PruningStrategy, create_default_pruning_strategy
|
|
105
|
+
from gsppy.sequence import Sequence, dict_to_sequences
|
|
105
106
|
from gsppy.accelerate import support_counts as support_counts_accel
|
|
106
107
|
|
|
107
108
|
if TYPE_CHECKING:
|
|
@@ -590,13 +591,37 @@ class GSP:
|
|
|
590
591
|
"""
|
|
591
592
|
logger.info("Run %d: %d candidates filtered to %d.", run, len(candidates), len(self.freq_patterns[run - 1]))
|
|
592
593
|
|
|
594
|
+
@overload
|
|
593
595
|
def search(
|
|
594
596
|
self,
|
|
595
597
|
min_support: float = 0.2,
|
|
596
598
|
max_k: Optional[int] = None,
|
|
597
599
|
backend: Optional[str] = None,
|
|
598
600
|
verbose: Optional[bool] = None,
|
|
599
|
-
|
|
601
|
+
*,
|
|
602
|
+
return_sequences: Literal[False] = False,
|
|
603
|
+
) -> List[Dict[Tuple[str, ...], int]]: ...
|
|
604
|
+
|
|
605
|
+
@overload
|
|
606
|
+
def search(
|
|
607
|
+
self,
|
|
608
|
+
min_support: float = 0.2,
|
|
609
|
+
max_k: Optional[int] = None,
|
|
610
|
+
backend: Optional[str] = None,
|
|
611
|
+
verbose: Optional[bool] = None,
|
|
612
|
+
*,
|
|
613
|
+
return_sequences: Literal[True],
|
|
614
|
+
) -> List[List[Sequence]]: ...
|
|
615
|
+
|
|
616
|
+
def search(
|
|
617
|
+
self,
|
|
618
|
+
min_support: float = 0.2,
|
|
619
|
+
max_k: Optional[int] = None,
|
|
620
|
+
backend: Optional[str] = None,
|
|
621
|
+
verbose: Optional[bool] = None,
|
|
622
|
+
*,
|
|
623
|
+
return_sequences: bool = False,
|
|
624
|
+
) -> Union[List[Dict[Tuple[str, ...], int]], List[List[Sequence]]]:
|
|
600
625
|
"""
|
|
601
626
|
Execute the Generalized Sequential Pattern (GSP) mining algorithm.
|
|
602
627
|
|
|
@@ -617,11 +642,20 @@ class GSP:
|
|
|
617
642
|
Note: temporal constraints always use Python backend.
|
|
618
643
|
verbose (Optional[bool]): Override instance verbosity setting for this search.
|
|
619
644
|
If None, uses the instance's verbose setting.
|
|
645
|
+
return_sequences (bool): If True, returns patterns as Sequence objects instead of
|
|
646
|
+
Dict[Tuple[str, ...], int]. Defaults to False for backward
|
|
647
|
+
compatibility. When True, returns List[List[Sequence]] where
|
|
648
|
+
each Sequence contains items, support count, and can be extended
|
|
649
|
+
with additional metadata.
|
|
620
650
|
|
|
621
651
|
Returns:
|
|
622
|
-
List[Dict[Tuple[str, ...], int]]:
|
|
623
|
-
|
|
624
|
-
|
|
652
|
+
Union[List[Dict[Tuple[str, ...], int]], List[List[Sequence]]]:
|
|
653
|
+
If return_sequences is False (default):
|
|
654
|
+
A list of dictionaries containing frequent patterns at each k-sequence level,
|
|
655
|
+
with patterns as keys and their support counts as values.
|
|
656
|
+
If return_sequences is True:
|
|
657
|
+
A list of lists containing Sequence objects at each k-sequence level,
|
|
658
|
+
where each Sequence encapsulates the pattern items and support count.
|
|
625
659
|
|
|
626
660
|
Raises:
|
|
627
661
|
ValueError: If the minimum support threshold is not in the range `(0.0, 1.0]`.
|
|
@@ -632,7 +666,7 @@ class GSP:
|
|
|
632
666
|
- Status updates for each iteration until the algorithm terminates.
|
|
633
667
|
|
|
634
668
|
Examples:
|
|
635
|
-
Basic usage without temporal constraints:
|
|
669
|
+
Basic usage without temporal constraints (default tuple-based):
|
|
636
670
|
|
|
637
671
|
```python
|
|
638
672
|
from gsppy.gsp import GSP
|
|
@@ -645,6 +679,28 @@ class GSP:
|
|
|
645
679
|
|
|
646
680
|
gsp = GSP(transactions)
|
|
647
681
|
patterns = gsp.search(min_support=0.3)
|
|
682
|
+
# Returns: [{('Bread',): 4, ('Milk',): 4, ...}, {('Bread', 'Milk'): 3, ...}, ...]
|
|
683
|
+
```
|
|
684
|
+
|
|
685
|
+
Using Sequence objects for richer pattern representation:
|
|
686
|
+
|
|
687
|
+
```python
|
|
688
|
+
from gsppy.gsp import GSP
|
|
689
|
+
|
|
690
|
+
transactions = [
|
|
691
|
+
["Bread", "Milk"],
|
|
692
|
+
["Bread", "Diaper", "Beer", "Eggs"],
|
|
693
|
+
["Milk", "Diaper", "Beer", "Coke"],
|
|
694
|
+
]
|
|
695
|
+
|
|
696
|
+
gsp = GSP(transactions)
|
|
697
|
+
patterns = gsp.search(min_support=0.3, return_sequences=True)
|
|
698
|
+
# Returns: [[Sequence(('Bread',), support=4), Sequence(('Milk',), support=4), ...], ...]
|
|
699
|
+
|
|
700
|
+
# Access pattern details
|
|
701
|
+
for level_patterns in patterns:
|
|
702
|
+
for seq in level_patterns:
|
|
703
|
+
print(f"Pattern: {seq.items}, Support: {seq.support}")
|
|
648
704
|
```
|
|
649
705
|
|
|
650
706
|
Usage with temporal constraints (requires timestamped transactions):
|
|
@@ -682,6 +738,9 @@ class GSP:
|
|
|
682
738
|
f"Using temporal constraints: mingap={self.mingap}, maxgap={self.maxgap}, maxspan={self.maxspan}"
|
|
683
739
|
)
|
|
684
740
|
|
|
741
|
+
# Clear freq_patterns for this search (allow reusing the GSP instance)
|
|
742
|
+
self.freq_patterns = []
|
|
743
|
+
|
|
685
744
|
# Convert fractional support to absolute count (ceil to preserve threshold semantics)
|
|
686
745
|
abs_min_support = int(math.ceil(len(self.transactions) * float(min_support)))
|
|
687
746
|
|
|
@@ -729,4 +788,9 @@ class GSP:
|
|
|
729
788
|
self.verbose = original_verbose
|
|
730
789
|
self._configure_logging()
|
|
731
790
|
|
|
732
|
-
|
|
791
|
+
# Return results in the requested format
|
|
792
|
+
result = self.freq_patterns[:-1]
|
|
793
|
+
if return_sequences:
|
|
794
|
+
# Convert Dict[Tuple[str, ...], int] to List[Sequence] for each level
|
|
795
|
+
return [dict_to_sequences(level_patterns) for level_patterns in result]
|
|
796
|
+
return result
|