gsppy 3.1.1__py3-none-any.whl → 3.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsppy/__init__.py +24 -0
- gsppy/accelerate.py +24 -5
- gsppy/cli.py +1 -1
- gsppy/gsp.py +18 -2
- gsppy/py.typed +0 -0
- {gsppy-3.1.1.dist-info → gsppy-3.2.7.dist-info}/METADATA +70 -17
- gsppy-3.2.7.dist-info/RECORD +11 -0
- gsppy-3.1.1.dist-info/RECORD +0 -10
- {gsppy-3.1.1.dist-info → gsppy-3.2.7.dist-info}/WHEEL +0 -0
- {gsppy-3.1.1.dist-info → gsppy-3.2.7.dist-info}/entry_points.txt +0 -0
- {gsppy-3.1.1.dist-info → gsppy-3.2.7.dist-info}/licenses/LICENSE +0 -0
gsppy/__init__.py
CHANGED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Public interface for the :mod:`gsppy` package.
|
|
2
|
+
|
|
3
|
+
This module centralizes the primary entry points, including the :class:`~gsppy.gsp.GSP`
|
|
4
|
+
implementation, CLI helpers for loading transactional data, and the package version string.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from importlib import metadata as importlib_metadata
|
|
8
|
+
|
|
9
|
+
from gsppy.cli import detect_and_read_file, read_transactions_from_csv, read_transactions_from_json, setup_logging
|
|
10
|
+
from gsppy.gsp import GSP
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
__version__ = importlib_metadata.version("gsppy")
|
|
14
|
+
except importlib_metadata.PackageNotFoundError: # pragma: no cover - handled only in editable installs
|
|
15
|
+
__version__ = "0.0.0"
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"GSP",
|
|
19
|
+
"detect_and_read_file",
|
|
20
|
+
"read_transactions_from_csv",
|
|
21
|
+
"read_transactions_from_json",
|
|
22
|
+
"setup_logging",
|
|
23
|
+
"__version__",
|
|
24
|
+
]
|
gsppy/accelerate.py
CHANGED
|
@@ -21,6 +21,7 @@ from .utils import split_into_batches, is_subsequence_in_list
|
|
|
21
21
|
|
|
22
22
|
# Optional GPU (CuPy) support
|
|
23
23
|
_gpu_available = False
|
|
24
|
+
cp: Any | None = None
|
|
24
25
|
try: # pragma: no cover - optional dependency path
|
|
25
26
|
import cupy as _cp_mod # type: ignore[import-not-found]
|
|
26
27
|
|
|
@@ -126,8 +127,8 @@ def _support_counts_gpu_singletons(
|
|
|
126
127
|
if not flat:
|
|
127
128
|
return []
|
|
128
129
|
|
|
129
|
-
cp_flat = cp.asarray(flat, dtype=cp.int32) # type: ignore[name-defined]
|
|
130
|
-
counts = cp.bincount(cp_flat, minlength=vocab_size) # type: ignore[attr-defined]
|
|
130
|
+
cp_flat = cp.asarray(flat, dtype=cp.int32) # type: ignore[name-defined, union-attr]
|
|
131
|
+
counts = cp.bincount(cp_flat, minlength=vocab_size) # type: ignore[attr-defined, union-attr]
|
|
131
132
|
counts_host: Any = counts.get() # back to host as a NumPy array
|
|
132
133
|
|
|
133
134
|
out: List[Tuple[List[int], int]] = []
|
|
@@ -178,6 +179,17 @@ def support_counts(
|
|
|
178
179
|
fall back to CPU for the rest
|
|
179
180
|
- "python": force pure-Python fallback
|
|
180
181
|
- otherwise: try Rust first and fall back to Python
|
|
182
|
+
|
|
183
|
+
Example:
|
|
184
|
+
Running a search with an explicit backend:
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
from gsppy.accelerate import support_counts
|
|
188
|
+
|
|
189
|
+
transactions = [("A", "B"), ("A", "C")]
|
|
190
|
+
candidates = [("A",), ("B",), ("A", "B")]
|
|
191
|
+
counts = support_counts(transactions, candidates, min_support_abs=1, backend="python")
|
|
192
|
+
```
|
|
181
193
|
"""
|
|
182
194
|
backend_sel = (backend or _env_backend()).lower()
|
|
183
195
|
|
|
@@ -222,7 +234,8 @@ def support_counts(
|
|
|
222
234
|
try:
|
|
223
235
|
other_enc = [enc for enc, _ in others]
|
|
224
236
|
res = cast(
|
|
225
|
-
List[Tuple[List[int], int]],
|
|
237
|
+
List[Tuple[List[int], int]],
|
|
238
|
+
_compute_supports_rust(enc_tx, other_enc, int(min_support_abs)), # ty:ignore[call-non-callable]
|
|
226
239
|
)
|
|
227
240
|
for enc_cand, freq in res:
|
|
228
241
|
out[tuple(inv_vocab[i] for i in enc_cand)] = int(freq)
|
|
@@ -247,7 +260,10 @@ def support_counts(
|
|
|
247
260
|
# use rust
|
|
248
261
|
enc_tx, inv_vocab, vocab = _get_encoded_transactions(transactions)
|
|
249
262
|
enc_cands = _encode_candidates(candidates, vocab)
|
|
250
|
-
result = cast(
|
|
263
|
+
result = cast(
|
|
264
|
+
List[Tuple[List[int], int]],
|
|
265
|
+
_compute_supports_rust(enc_tx, enc_cands, int(min_support_abs)), # ty:ignore[call-non-callable]
|
|
266
|
+
)
|
|
251
267
|
out_rust: Dict[Tuple[str, ...], int] = {}
|
|
252
268
|
for enc_cand, freq in result:
|
|
253
269
|
out_rust[tuple(inv_vocab[i] for i in enc_cand)] = int(freq)
|
|
@@ -258,7 +274,10 @@ def support_counts(
|
|
|
258
274
|
enc_tx, inv_vocab, vocab = _get_encoded_transactions(transactions)
|
|
259
275
|
enc_cands = _encode_candidates(candidates, vocab)
|
|
260
276
|
try:
|
|
261
|
-
result = cast(
|
|
277
|
+
result = cast(
|
|
278
|
+
List[Tuple[List[int], int]],
|
|
279
|
+
_compute_supports_rust(enc_tx, enc_cands, int(min_support_abs)), # ty:ignore[call-non-callable]
|
|
280
|
+
)
|
|
262
281
|
out2: Dict[Tuple[str, ...], int] = {}
|
|
263
282
|
for enc_cand, freq in result:
|
|
264
283
|
out2[tuple(inv_vocab[i] for i in enc_cand)] = int(freq)
|
gsppy/cli.py
CHANGED
|
@@ -45,7 +45,7 @@ logging.basicConfig(
|
|
|
45
45
|
format="%(message)s", # Simplified to keep CLI output clean
|
|
46
46
|
handlers=[logging.StreamHandler(sys.stdout)],
|
|
47
47
|
)
|
|
48
|
-
logger = logging.getLogger(__name__)
|
|
48
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
def setup_logging(verbose: bool) -> None:
|
gsppy/gsp.py
CHANGED
|
@@ -95,7 +95,7 @@ from collections import Counter
|
|
|
95
95
|
from gsppy.utils import split_into_batches, is_subsequence_in_list, generate_candidates_from_previous
|
|
96
96
|
from gsppy.accelerate import support_counts as support_counts_accel
|
|
97
97
|
|
|
98
|
-
logger = logging.getLogger(__name__)
|
|
98
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
|
99
99
|
|
|
100
100
|
|
|
101
101
|
class GSP:
|
|
@@ -171,7 +171,7 @@ class GSP:
|
|
|
171
171
|
raise ValueError(msg)
|
|
172
172
|
|
|
173
173
|
logger.info("Pre-processing transactions...")
|
|
174
|
-
self.max_size = max(len(item) for item in raw_transactions)
|
|
174
|
+
self.max_size: int = max(len(item) for item in raw_transactions)
|
|
175
175
|
self.transactions: List[Tuple[str, ...]] = [tuple(transaction) for transaction in raw_transactions]
|
|
176
176
|
counts: Counter[str] = Counter(chain.from_iterable(raw_transactions))
|
|
177
177
|
# Start with singleton candidates (1-sequences)
|
|
@@ -295,6 +295,22 @@ class GSP:
|
|
|
295
295
|
- Information about the algorithm's start, intermediate progress (candidates filtered),
|
|
296
296
|
and completion.
|
|
297
297
|
- Status updates for each iteration until the algorithm terminates.
|
|
298
|
+
|
|
299
|
+
Example:
|
|
300
|
+
Basic usage with the default backend:
|
|
301
|
+
|
|
302
|
+
```python
|
|
303
|
+
from gsppy.gsp import GSP
|
|
304
|
+
|
|
305
|
+
transactions = [
|
|
306
|
+
["Bread", "Milk"],
|
|
307
|
+
["Bread", "Diaper", "Beer", "Eggs"],
|
|
308
|
+
["Milk", "Diaper", "Beer", "Coke"],
|
|
309
|
+
]
|
|
310
|
+
|
|
311
|
+
gsp = GSP(transactions)
|
|
312
|
+
patterns = gsp.search(min_support=0.3)
|
|
313
|
+
```
|
|
298
314
|
"""
|
|
299
315
|
if not 0.0 < min_support <= 1.0:
|
|
300
316
|
raise ValueError("Minimum support must be in the range (0.0, 1.0]")
|
gsppy/py.typed
ADDED
|
File without changes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gsppy
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.2.7
|
|
4
4
|
Summary: GSP (Generalized Sequence Pattern) algorithm in Python
|
|
5
5
|
Project-URL: Homepage, https://github.com/jacksonpradolima/gsp-py
|
|
6
6
|
Author-email: Jackson Antonio do Prado Lima <jacksonpradolima@gmail.com>
|
|
@@ -41,27 +41,34 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
41
41
|
Requires-Python: >=3.10
|
|
42
42
|
Requires-Dist: click>=8.0.0
|
|
43
43
|
Provides-Extra: dev
|
|
44
|
-
Requires-Dist: cython==3.
|
|
45
|
-
Requires-Dist: hatch==1.
|
|
46
|
-
Requires-Dist: hatchling==1.
|
|
47
|
-
Requires-Dist:
|
|
48
|
-
Requires-Dist:
|
|
49
|
-
Requires-Dist:
|
|
50
|
-
Requires-Dist: pytest-benchmark==5.1.0; extra == 'dev'
|
|
44
|
+
Requires-Dist: cython==3.2.3; extra == 'dev'
|
|
45
|
+
Requires-Dist: hatch==1.16.2; extra == 'dev'
|
|
46
|
+
Requires-Dist: hatchling==1.28.0; extra == 'dev'
|
|
47
|
+
Requires-Dist: pylint==4.0.4; extra == 'dev'
|
|
48
|
+
Requires-Dist: pyright==1.1.407; extra == 'dev'
|
|
49
|
+
Requires-Dist: pytest-benchmark==5.2.3; extra == 'dev'
|
|
51
50
|
Requires-Dist: pytest-cov==7.0.0; extra == 'dev'
|
|
52
|
-
Requires-Dist: pytest==
|
|
53
|
-
Requires-Dist: ruff==0.
|
|
51
|
+
Requires-Dist: pytest==9.0.2; extra == 'dev'
|
|
52
|
+
Requires-Dist: ruff==0.14.10; extra == 'dev'
|
|
54
53
|
Requires-Dist: tox==4.32.0; extra == 'dev'
|
|
54
|
+
Requires-Dist: ty==0.0.8; extra == 'dev'
|
|
55
|
+
Provides-Extra: docs
|
|
56
|
+
Requires-Dist: mkdocs-gen-files<1,>=0.5; extra == 'docs'
|
|
57
|
+
Requires-Dist: mkdocs-literate-nav<1,>=0.6; extra == 'docs'
|
|
58
|
+
Requires-Dist: mkdocs-material<10,>=9.5; extra == 'docs'
|
|
59
|
+
Requires-Dist: mkdocs<2,>=1.6; extra == 'docs'
|
|
60
|
+
Requires-Dist: mkdocstrings[python]<0.27,>=0.26; extra == 'docs'
|
|
55
61
|
Provides-Extra: gpu
|
|
56
62
|
Requires-Dist: cupy<14,>=11; extra == 'gpu'
|
|
57
63
|
Provides-Extra: rust
|
|
58
|
-
Requires-Dist: maturin==1.
|
|
64
|
+
Requires-Dist: maturin==1.10.2; extra == 'rust'
|
|
59
65
|
Description-Content-Type: text/markdown
|
|
60
66
|
|
|
61
67
|
[]()
|
|
62
68
|

|
|
63
69
|
[](https://doi.org/10.5281/zenodo.3333987)
|
|
64
70
|
|
|
71
|
+
[](https://jacksonpradolima.github.io/gsp-py/)
|
|
65
72
|
[](https://pypi.org/project/gsppy/)
|
|
66
73
|
[](https://sonarcloud.io/summary/new_code?id=jacksonpradolima_gsp-py)
|
|
67
74
|
[](https://sonarcloud.io/summary/new_code?id=jacksonpradolima_gsp-py)
|
|
@@ -87,13 +94,15 @@ Sequence Pattern (GSP)** algorithm. Ideal for market basket analysis, temporal m
|
|
|
87
94
|
- [❖ Clone Repository](#option-1-clone-the-repository)
|
|
88
95
|
- [❖ Install via PyPI](#option-2-install-via-pip)
|
|
89
96
|
4. [🛠️ Developer Installation](#developer-installation)
|
|
90
|
-
5. [
|
|
97
|
+
5. [📖 Documentation](#documentation)
|
|
98
|
+
6. [💡 Usage](#usage)
|
|
91
99
|
- [✅ Example: Analyzing Sales Data](#example-analyzing-sales-data)
|
|
92
100
|
- [📊 Explanation: Support and Results](#explanation-support-and-results)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
101
|
+
7. [⌨️ Typing](#typing)
|
|
102
|
+
8. [🌟 Planned Features](#planned-features)
|
|
103
|
+
9. [🤝 Contributing](#contributing)
|
|
104
|
+
10. [📝 License](#license)
|
|
105
|
+
11. [📖 Citation](#citation)
|
|
97
106
|
|
|
98
107
|
---
|
|
99
108
|
|
|
@@ -245,7 +254,7 @@ make install # sync deps (from uv.lock) + install project (-e .)
|
|
|
245
254
|
make test # pytest -n auto
|
|
246
255
|
make lint # ruff check .
|
|
247
256
|
make format # ruff --fix
|
|
248
|
-
make typecheck # pyright
|
|
257
|
+
make typecheck # pyright + ty
|
|
249
258
|
make pre-commit-install # install the pre-commit hook
|
|
250
259
|
make pre-commit-run # run pre-commit on all files
|
|
251
260
|
|
|
@@ -259,6 +268,41 @@ make bench-big # run large benchmark
|
|
|
259
268
|
> [!NOTE]
|
|
260
269
|
> Tox in this project uses the "tox-uv" plugin. When running `make tox` or `tox`, missing Python interpreters can be provisioned automatically via uv (no need to pre-install all versions). This makes local setup faster.
|
|
261
270
|
|
|
271
|
+
## 🔏 Release assets and verification
|
|
272
|
+
|
|
273
|
+
Every GitHub release bundles artifacts to help you validate what you download:
|
|
274
|
+
|
|
275
|
+
- Built wheels and source distributions produced by the automated publish workflow.
|
|
276
|
+
- `sbom.json` (CycloneDX) generated with [Syft](https://github.com/anchore/syft).
|
|
277
|
+
- Sigstore-generated `.sig` and `.pem` files for each artifact, created using GitHub OIDC identity.
|
|
278
|
+
|
|
279
|
+
To verify a downloaded artifact from a release:
|
|
280
|
+
|
|
281
|
+
```bash
|
|
282
|
+
python -m pip install sigstore # installs the CLI
|
|
283
|
+
sigstore verify identity \
|
|
284
|
+
--certificate gsppy-<version>-py3-none-any.whl.pem \
|
|
285
|
+
--signature gsppy-<version>-py3-none-any.whl.sig \
|
|
286
|
+
--cert-identity "https://github.com/jacksonpradolima/gsp-py/.github/workflows/publish.yml@refs/tags/v<version>" \
|
|
287
|
+
--cert-oidc-issuer https://token.actions.githubusercontent.com \
|
|
288
|
+
gsppy-<version>-py3-none-any.whl
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
Replace `<version>` with the numeric package version (for example, `3.1.1`) in the filenames; in `--cert-identity`, this becomes `v<version>` (for example, `v3.1.1`). Adjust the filenames for the sdist (`.tar.gz`) if preferred. The same release page also hosts `sbom.json` for supply-chain inspection.
|
|
292
|
+
|
|
293
|
+
## 📖 Documentation
|
|
294
|
+
|
|
295
|
+
- **Live site:** https://jacksonpradolima.github.io/gsp-py/
|
|
296
|
+
- **Build locally:**
|
|
297
|
+
|
|
298
|
+
```bash
|
|
299
|
+
uv venv .venv
|
|
300
|
+
uv sync --extra docs
|
|
301
|
+
uv run mkdocs serve
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
The docs use MkDocs with the Material theme and mkdocstrings to render the Python API directly from docstrings.
|
|
305
|
+
|
|
262
306
|
## 💡 Usage
|
|
263
307
|
|
|
264
308
|
The library is designed to be easy to use and integrate with your own projects. You can use GSP-Py either programmatically (Python API) or directly from the command line (CLI).
|
|
@@ -485,6 +529,15 @@ result = gsp.search(min_support=0.5) # Need at least 2/4 sequences
|
|
|
485
529
|
|
|
486
530
|
---
|
|
487
531
|
|
|
532
|
+
## ⌨️ Typing
|
|
533
|
+
|
|
534
|
+
`gsppy` ships inline type information (PEP 561) via a bundled `py.typed` marker. The public API is re-exported from
|
|
535
|
+
`gsppy` directly—import `GSP` for programmatic use or reuse the CLI helpers (`detect_and_read_file`,
|
|
536
|
+
`read_transactions_from_json`, `read_transactions_from_csv`, and `setup_logging`) when embedding the tool in
|
|
537
|
+
larger applications.
|
|
538
|
+
|
|
539
|
+
---
|
|
540
|
+
|
|
488
541
|
## 🌟 Planned Features
|
|
489
542
|
|
|
490
543
|
We are actively working to improve GSP-Py. Here are some exciting features planned for future releases:
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
gsppy/__init__.py,sha256=FcWEYkzMCiqIBmc4yhgIXFKzvSNjJA7LX7juUabvoJ4,784
|
|
2
|
+
gsppy/accelerate.py,sha256=2I3IA42FyPZvfwc0-f0bovZ8YgbdvJXj0qDlYWSWiXI,10998
|
|
3
|
+
gsppy/cli.py,sha256=W5udAPKOjlxi-c-RKcz5HW-sDgoap4ojHD87bd-X498,6583
|
|
4
|
+
gsppy/gsp.py,sha256=aCtPrldVNCkwj6wwytrZzbayYKkXi9Om-3xzrHUMkLQ,15293
|
|
5
|
+
gsppy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
gsppy/utils.py,sha256=KtjfDgsTwvwxIyA2KCQmgu8cFkBqQvMZN8Ct5NB60Tc,3952
|
|
7
|
+
gsppy-3.2.7.dist-info/METADATA,sha256=H7qZ7b0DGtca_pA9uiY0fijQTvPmsFeHHGg9fzKc6V0,22130
|
|
8
|
+
gsppy-3.2.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
9
|
+
gsppy-3.2.7.dist-info/entry_points.txt,sha256=smvmcIWk424ARIGKOC_BM42hpT_SptKPcIeqs-8u8lM,41
|
|
10
|
+
gsppy-3.2.7.dist-info/licenses/LICENSE,sha256=AlXanKSqFzo_o-87gp3Qw3XzbmnfxYy7O0xJOcQGWJo,1086
|
|
11
|
+
gsppy-3.2.7.dist-info/RECORD,,
|
gsppy-3.1.1.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
gsppy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
gsppy/accelerate.py,sha256=YO3YQFzo2VAC6IXOTnQnOajkZO7SabkieGb1IPgWdSI,10407
|
|
3
|
-
gsppy/cli.py,sha256=wsGoc_utxpRfgCF9vPOAyLDTOJZ8NaiwiUny5VyIYvQ,6567
|
|
4
|
-
gsppy/gsp.py,sha256=GCHFhOu-DyHEPsse_OXzf9IaZoigF8ouRqgn_OsZBvA,14855
|
|
5
|
-
gsppy/utils.py,sha256=KtjfDgsTwvwxIyA2KCQmgu8cFkBqQvMZN8Ct5NB60Tc,3952
|
|
6
|
-
gsppy-3.1.1.dist-info/METADATA,sha256=uN-rN-CzsrwW_uh4s60DUevIKjm5CuiYyHRh5cgyKqQ,19819
|
|
7
|
-
gsppy-3.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
8
|
-
gsppy-3.1.1.dist-info/entry_points.txt,sha256=smvmcIWk424ARIGKOC_BM42hpT_SptKPcIeqs-8u8lM,41
|
|
9
|
-
gsppy-3.1.1.dist-info/licenses/LICENSE,sha256=AlXanKSqFzo_o-87gp3Qw3XzbmnfxYy7O0xJOcQGWJo,1086
|
|
10
|
-
gsppy-3.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|