gsppy 3.1.1__tar.gz → 3.2.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gsppy-3.1.1 → gsppy-3.2.8}/.gitignore +0 -5
- {gsppy-3.1.1 → gsppy-3.2.8}/CHANGELOG.md +55 -0
- {gsppy-3.1.1 → gsppy-3.2.8}/PKG-INFO +70 -17
- {gsppy-3.1.1 → gsppy-3.2.8}/README.md +53 -6
- gsppy-3.2.8/gsppy/__init__.py +24 -0
- {gsppy-3.1.1 → gsppy-3.2.8}/gsppy/accelerate.py +24 -5
- {gsppy-3.1.1 → gsppy-3.2.8}/gsppy/cli.py +1 -1
- {gsppy-3.1.1 → gsppy-3.2.8}/gsppy/gsp.py +18 -2
- {gsppy-3.1.1 → gsppy-3.2.8}/pyproject.toml +26 -18
- gsppy-3.1.1/mypy.ini +0 -54
- {gsppy-3.1.1 → gsppy-3.2.8}/CONTRIBUTING.md +0 -0
- {gsppy-3.1.1 → gsppy-3.2.8}/LICENSE +0 -0
- {gsppy-3.1.1 → gsppy-3.2.8}/SECURITY.md +0 -0
- /gsppy-3.1.1/gsppy/__init__.py → /gsppy-3.2.8/gsppy/py.typed +0 -0
- {gsppy-3.1.1 → gsppy-3.2.8}/gsppy/utils.py +0 -0
- {gsppy-3.1.1 → gsppy-3.2.8}/rust/Cargo.lock +0 -0
- {gsppy-3.1.1 → gsppy-3.2.8}/rust/Cargo.toml +0 -0
- {gsppy-3.1.1 → gsppy-3.2.8}/rust/src/lib.rs +0 -0
- {gsppy-3.1.1 → gsppy-3.2.8}/tests/__init__.py +0 -0
- {gsppy-3.1.1 → gsppy-3.2.8}/tests/test_cli.py +0 -0
- {gsppy-3.1.1 → gsppy-3.2.8}/tests/test_gsp.py +0 -0
- {gsppy-3.1.1 → gsppy-3.2.8}/tests/test_utils.py +0 -0
- {gsppy-3.1.1 → gsppy-3.2.8}/tox.ini +0 -0
|
@@ -1,5 +1,60 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [v3.2.0] - Unreleased
|
|
4
|
+
|
|
5
|
+
### **Tooling and Developer Experience**
|
|
6
|
+
|
|
7
|
+
* **Type Checking**: Replaced `mypy` with `ty`, Astral's fast Rust-based type checker.
|
|
8
|
+
* Updated all configurations, workflows, and documentation.
|
|
9
|
+
* `ty` automatically infers Python version from `project.requires-python` in `pyproject.toml`.
|
|
10
|
+
|
|
11
|
+
### **Dependency Management**
|
|
12
|
+
|
|
13
|
+
* Updated `uv.lock` to sync with latest dependency versions.
|
|
14
|
+
|
|
15
|
+
## [v3.1.1] - 2024-12-21
|
|
16
|
+
|
|
17
|
+
### **Bug Fixes**
|
|
18
|
+
|
|
19
|
+
* **Pattern Matching**: Fixed `is_subsequence_in_list` to correctly detect non-contiguous (ordered) subsequences.
|
|
20
|
+
* Updated to use two-pointer approach for ordered pattern matching.
|
|
21
|
+
* Patterns are now matched in order but do not require contiguous elements.
|
|
22
|
+
* Example: Pattern `('A', 'C')` now correctly matches in sequence `['A', 'B', 'C']`.
|
|
23
|
+
|
|
24
|
+
### **Documentation Updates**
|
|
25
|
+
|
|
26
|
+
* **README.md**: Added comprehensive documentation for non-contiguous pattern matching behavior.
|
|
27
|
+
* Added "ordered (non-contiguous) matching" to Key Features section.
|
|
28
|
+
* Added detailed "Understanding Non-Contiguous Pattern Matching" section with examples.
|
|
29
|
+
* Updated sample outputs to reflect additional patterns detected.
|
|
30
|
+
|
|
31
|
+
### **Testing Improvements**
|
|
32
|
+
|
|
33
|
+
* Added comprehensive test suite for contiguous vs non-contiguous pattern matching:
|
|
34
|
+
* `test_contiguous_vs_non_contiguous_patterns`: Demonstrates patterns found in both modes.
|
|
35
|
+
* `test_non_contiguous_with_longer_gaps`: Tests matching with large gaps between elements.
|
|
36
|
+
* `test_order_sensitivity`: Verifies order requirements in pattern matching.
|
|
37
|
+
* `test_is_subsequence_contiguous_vs_non_contiguous`: Tests utility function behavior.
|
|
38
|
+
* `test_is_subsequence_with_gaps`: Tests various gap sizes in pattern matching.
|
|
39
|
+
|
|
40
|
+
### **Dependency Updates**
|
|
41
|
+
|
|
42
|
+
* Updated development dependencies:
|
|
43
|
+
* `mypy` from 1.18.1 to 1.18.2
|
|
44
|
+
* `ruff` from 0.13.0 to 0.13.3 (multiple incremental updates)
|
|
45
|
+
* `pyright` from 1.1.405 to 1.1.406
|
|
46
|
+
* `pylint` from 3.3.8 to 4.0.2
|
|
47
|
+
* `hatch` from 1.14.1 to 1.15.1
|
|
48
|
+
* `tox` from 4.30.2 to 4.32.0
|
|
49
|
+
* `cython` from 3.1.3 to 3.1.4
|
|
50
|
+
* `maturin` from 1.6.0 to 1.9.6 (multiple incremental updates)
|
|
51
|
+
* `pytest` from 8.3.4 to 8.4.2
|
|
52
|
+
* `pytest-cov` from 5.0.0 to 7.0.0
|
|
53
|
+
|
|
54
|
+
### **CI/CD Updates**
|
|
55
|
+
|
|
56
|
+
* Updated `actions/checkout` from v5 to v6 in GitHub Actions workflows.
|
|
57
|
+
|
|
3
58
|
## [v3.0.0] - 2025-09-14
|
|
4
59
|
|
|
5
60
|
### **New Features**
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gsppy
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.2.8
|
|
4
4
|
Summary: GSP (Generalized Sequence Pattern) algorithm in Python
|
|
5
5
|
Project-URL: Homepage, https://github.com/jacksonpradolima/gsp-py
|
|
6
6
|
Author-email: Jackson Antonio do Prado Lima <jacksonpradolima@gmail.com>
|
|
@@ -41,27 +41,34 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
41
41
|
Requires-Python: >=3.10
|
|
42
42
|
Requires-Dist: click>=8.0.0
|
|
43
43
|
Provides-Extra: dev
|
|
44
|
-
Requires-Dist: cython==3.
|
|
45
|
-
Requires-Dist: hatch==1.
|
|
46
|
-
Requires-Dist: hatchling==1.
|
|
47
|
-
Requires-Dist:
|
|
48
|
-
Requires-Dist:
|
|
49
|
-
Requires-Dist:
|
|
50
|
-
Requires-Dist: pytest-benchmark==5.1.0; extra == 'dev'
|
|
44
|
+
Requires-Dist: cython==3.2.3; extra == 'dev'
|
|
45
|
+
Requires-Dist: hatch==1.16.2; extra == 'dev'
|
|
46
|
+
Requires-Dist: hatchling==1.28.0; extra == 'dev'
|
|
47
|
+
Requires-Dist: pylint==4.0.4; extra == 'dev'
|
|
48
|
+
Requires-Dist: pyright==1.1.407; extra == 'dev'
|
|
49
|
+
Requires-Dist: pytest-benchmark==5.2.3; extra == 'dev'
|
|
51
50
|
Requires-Dist: pytest-cov==7.0.0; extra == 'dev'
|
|
52
|
-
Requires-Dist: pytest==
|
|
53
|
-
Requires-Dist: ruff==0.
|
|
51
|
+
Requires-Dist: pytest==9.0.2; extra == 'dev'
|
|
52
|
+
Requires-Dist: ruff==0.14.10; extra == 'dev'
|
|
54
53
|
Requires-Dist: tox==4.32.0; extra == 'dev'
|
|
54
|
+
Requires-Dist: ty==0.0.8; extra == 'dev'
|
|
55
|
+
Provides-Extra: docs
|
|
56
|
+
Requires-Dist: mkdocs-gen-files<1,>=0.5; extra == 'docs'
|
|
57
|
+
Requires-Dist: mkdocs-literate-nav<1,>=0.6; extra == 'docs'
|
|
58
|
+
Requires-Dist: mkdocs-material<10,>=9.5; extra == 'docs'
|
|
59
|
+
Requires-Dist: mkdocs<2,>=1.6; extra == 'docs'
|
|
60
|
+
Requires-Dist: mkdocstrings[python]<0.27,>=0.26; extra == 'docs'
|
|
55
61
|
Provides-Extra: gpu
|
|
56
62
|
Requires-Dist: cupy<14,>=11; extra == 'gpu'
|
|
57
63
|
Provides-Extra: rust
|
|
58
|
-
Requires-Dist: maturin==1.
|
|
64
|
+
Requires-Dist: maturin==1.10.2; extra == 'rust'
|
|
59
65
|
Description-Content-Type: text/markdown
|
|
60
66
|
|
|
61
67
|
[]()
|
|
62
68
|

|
|
63
69
|
[](https://doi.org/10.5281/zenodo.3333987)
|
|
64
70
|
|
|
71
|
+
[](https://jacksonpradolima.github.io/gsp-py/)
|
|
65
72
|
[](https://pypi.org/project/gsppy/)
|
|
66
73
|
[](https://sonarcloud.io/summary/new_code?id=jacksonpradolima_gsp-py)
|
|
67
74
|
[](https://sonarcloud.io/summary/new_code?id=jacksonpradolima_gsp-py)
|
|
@@ -87,13 +94,15 @@ Sequence Pattern (GSP)** algorithm. Ideal for market basket analysis, temporal m
|
|
|
87
94
|
- [❖ Clone Repository](#option-1-clone-the-repository)
|
|
88
95
|
- [❖ Install via PyPI](#option-2-install-via-pip)
|
|
89
96
|
4. [🛠️ Developer Installation](#developer-installation)
|
|
90
|
-
5. [
|
|
97
|
+
5. [📖 Documentation](#documentation)
|
|
98
|
+
6. [💡 Usage](#usage)
|
|
91
99
|
- [✅ Example: Analyzing Sales Data](#example-analyzing-sales-data)
|
|
92
100
|
- [📊 Explanation: Support and Results](#explanation-support-and-results)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
101
|
+
7. [⌨️ Typing](#typing)
|
|
102
|
+
8. [🌟 Planned Features](#planned-features)
|
|
103
|
+
9. [🤝 Contributing](#contributing)
|
|
104
|
+
10. [📝 License](#license)
|
|
105
|
+
11. [📖 Citation](#citation)
|
|
97
106
|
|
|
98
107
|
---
|
|
99
108
|
|
|
@@ -245,7 +254,7 @@ make install # sync deps (from uv.lock) + install project (-e .)
|
|
|
245
254
|
make test # pytest -n auto
|
|
246
255
|
make lint # ruff check .
|
|
247
256
|
make format # ruff --fix
|
|
248
|
-
make typecheck # pyright
|
|
257
|
+
make typecheck # pyright + ty
|
|
249
258
|
make pre-commit-install # install the pre-commit hook
|
|
250
259
|
make pre-commit-run # run pre-commit on all files
|
|
251
260
|
|
|
@@ -259,6 +268,41 @@ make bench-big # run large benchmark
|
|
|
259
268
|
> [!NOTE]
|
|
260
269
|
> Tox in this project uses the "tox-uv" plugin. When running `make tox` or `tox`, missing Python interpreters can be provisioned automatically via uv (no need to pre-install all versions). This makes local setup faster.
|
|
261
270
|
|
|
271
|
+
## 🔏 Release assets and verification
|
|
272
|
+
|
|
273
|
+
Every GitHub release bundles artifacts to help you validate what you download:
|
|
274
|
+
|
|
275
|
+
- Built wheels and source distributions produced by the automated publish workflow.
|
|
276
|
+
- `sbom.json` (CycloneDX) generated with [Syft](https://github.com/anchore/syft).
|
|
277
|
+
- Sigstore-generated `.sig` and `.pem` files for each artifact, created using GitHub OIDC identity.
|
|
278
|
+
|
|
279
|
+
To verify a downloaded artifact from a release:
|
|
280
|
+
|
|
281
|
+
```bash
|
|
282
|
+
python -m pip install sigstore # installs the CLI
|
|
283
|
+
sigstore verify identity \
|
|
284
|
+
--certificate gsppy-<version>-py3-none-any.whl.pem \
|
|
285
|
+
--signature gsppy-<version>-py3-none-any.whl.sig \
|
|
286
|
+
--cert-identity "https://github.com/jacksonpradolima/gsp-py/.github/workflows/publish.yml@refs/tags/v<version>" \
|
|
287
|
+
--cert-oidc-issuer https://token.actions.githubusercontent.com \
|
|
288
|
+
gsppy-<version>-py3-none-any.whl
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
Replace `<version>` with the numeric package version (for example, `3.1.1`) in the filenames; in `--cert-identity`, this becomes `v<version>` (for example, `v3.1.1`). Adjust the filenames for the sdist (`.tar.gz`) if preferred. The same release page also hosts `sbom.json` for supply-chain inspection.
|
|
292
|
+
|
|
293
|
+
## 📖 Documentation
|
|
294
|
+
|
|
295
|
+
- **Live site:** https://jacksonpradolima.github.io/gsp-py/
|
|
296
|
+
- **Build locally:**
|
|
297
|
+
|
|
298
|
+
```bash
|
|
299
|
+
uv venv .venv
|
|
300
|
+
uv sync --extra docs
|
|
301
|
+
uv run mkdocs serve
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
The docs use MkDocs with the Material theme and mkdocstrings to render the Python API directly from docstrings.
|
|
305
|
+
|
|
262
306
|
## 💡 Usage
|
|
263
307
|
|
|
264
308
|
The library is designed to be easy to use and integrate with your own projects. You can use GSP-Py either programmatically (Python API) or directly from the command line (CLI).
|
|
@@ -485,6 +529,15 @@ result = gsp.search(min_support=0.5) # Need at least 2/4 sequences
|
|
|
485
529
|
|
|
486
530
|
---
|
|
487
531
|
|
|
532
|
+
## ⌨️ Typing
|
|
533
|
+
|
|
534
|
+
`gsppy` ships inline type information (PEP 561) via a bundled `py.typed` marker. The public API is re-exported from
|
|
535
|
+
`gsppy` directly—import `GSP` for programmatic use or reuse the CLI helpers (`detect_and_read_file`,
|
|
536
|
+
`read_transactions_from_json`, `read_transactions_from_csv`, and `setup_logging`) when embedding the tool in
|
|
537
|
+
larger applications.
|
|
538
|
+
|
|
539
|
+
---
|
|
540
|
+
|
|
488
541
|
## 🌟 Planned Features
|
|
489
542
|
|
|
490
543
|
We are actively working to improve GSP-Py. Here are some exciting features planned for future releases:
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|

|
|
3
3
|
[](https://doi.org/10.5281/zenodo.3333987)
|
|
4
4
|
|
|
5
|
+
[](https://jacksonpradolima.github.io/gsp-py/)
|
|
5
6
|
[](https://pypi.org/project/gsppy/)
|
|
6
7
|
[](https://sonarcloud.io/summary/new_code?id=jacksonpradolima_gsp-py)
|
|
7
8
|
[](https://sonarcloud.io/summary/new_code?id=jacksonpradolima_gsp-py)
|
|
@@ -27,13 +28,15 @@ Sequence Pattern (GSP)** algorithm. Ideal for market basket analysis, temporal m
|
|
|
27
28
|
- [❖ Clone Repository](#option-1-clone-the-repository)
|
|
28
29
|
- [❖ Install via PyPI](#option-2-install-via-pip)
|
|
29
30
|
4. [🛠️ Developer Installation](#developer-installation)
|
|
30
|
-
5. [
|
|
31
|
+
5. [📖 Documentation](#documentation)
|
|
32
|
+
6. [💡 Usage](#usage)
|
|
31
33
|
- [✅ Example: Analyzing Sales Data](#example-analyzing-sales-data)
|
|
32
34
|
- [📊 Explanation: Support and Results](#explanation-support-and-results)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
7. [⌨️ Typing](#typing)
|
|
36
|
+
8. [🌟 Planned Features](#planned-features)
|
|
37
|
+
9. [🤝 Contributing](#contributing)
|
|
38
|
+
10. [📝 License](#license)
|
|
39
|
+
11. [📖 Citation](#citation)
|
|
37
40
|
|
|
38
41
|
---
|
|
39
42
|
|
|
@@ -185,7 +188,7 @@ make install # sync deps (from uv.lock) + install project (-e .)
|
|
|
185
188
|
make test # pytest -n auto
|
|
186
189
|
make lint # ruff check .
|
|
187
190
|
make format # ruff --fix
|
|
188
|
-
make typecheck # pyright
|
|
191
|
+
make typecheck # pyright + ty
|
|
189
192
|
make pre-commit-install # install the pre-commit hook
|
|
190
193
|
make pre-commit-run # run pre-commit on all files
|
|
191
194
|
|
|
@@ -199,6 +202,41 @@ make bench-big # run large benchmark
|
|
|
199
202
|
> [!NOTE]
|
|
200
203
|
> Tox in this project uses the "tox-uv" plugin. When running `make tox` or `tox`, missing Python interpreters can be provisioned automatically via uv (no need to pre-install all versions). This makes local setup faster.
|
|
201
204
|
|
|
205
|
+
## 🔏 Release assets and verification
|
|
206
|
+
|
|
207
|
+
Every GitHub release bundles artifacts to help you validate what you download:
|
|
208
|
+
|
|
209
|
+
- Built wheels and source distributions produced by the automated publish workflow.
|
|
210
|
+
- `sbom.json` (CycloneDX) generated with [Syft](https://github.com/anchore/syft).
|
|
211
|
+
- Sigstore-generated `.sig` and `.pem` files for each artifact, created using GitHub OIDC identity.
|
|
212
|
+
|
|
213
|
+
To verify a downloaded artifact from a release:
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
python -m pip install sigstore # installs the CLI
|
|
217
|
+
sigstore verify identity \
|
|
218
|
+
--certificate gsppy-<version>-py3-none-any.whl.pem \
|
|
219
|
+
--signature gsppy-<version>-py3-none-any.whl.sig \
|
|
220
|
+
--cert-identity "https://github.com/jacksonpradolima/gsp-py/.github/workflows/publish.yml@refs/tags/v<version>" \
|
|
221
|
+
--cert-oidc-issuer https://token.actions.githubusercontent.com \
|
|
222
|
+
gsppy-<version>-py3-none-any.whl
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Replace `<version>` with the numeric package version (for example, `3.1.1`) in the filenames; in `--cert-identity`, this becomes `v<version>` (for example, `v3.1.1`). Adjust the filenames for the sdist (`.tar.gz`) if preferred. The same release page also hosts `sbom.json` for supply-chain inspection.
|
|
226
|
+
|
|
227
|
+
## 📖 Documentation
|
|
228
|
+
|
|
229
|
+
- **Live site:** https://jacksonpradolima.github.io/gsp-py/
|
|
230
|
+
- **Build locally:**
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
uv venv .venv
|
|
234
|
+
uv sync --extra docs
|
|
235
|
+
uv run mkdocs serve
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
The docs use MkDocs with the Material theme and mkdocstrings to render the Python API directly from docstrings.
|
|
239
|
+
|
|
202
240
|
## 💡 Usage
|
|
203
241
|
|
|
204
242
|
The library is designed to be easy to use and integrate with your own projects. You can use GSP-Py either programmatically (Python API) or directly from the command line (CLI).
|
|
@@ -425,6 +463,15 @@ result = gsp.search(min_support=0.5) # Need at least 2/4 sequences
|
|
|
425
463
|
|
|
426
464
|
---
|
|
427
465
|
|
|
466
|
+
## ⌨️ Typing
|
|
467
|
+
|
|
468
|
+
`gsppy` ships inline type information (PEP 561) via a bundled `py.typed` marker. The public API is re-exported from
|
|
469
|
+
`gsppy` directly—import `GSP` for programmatic use or reuse the CLI helpers (`detect_and_read_file`,
|
|
470
|
+
`read_transactions_from_json`, `read_transactions_from_csv`, and `setup_logging`) when embedding the tool in
|
|
471
|
+
larger applications.
|
|
472
|
+
|
|
473
|
+
---
|
|
474
|
+
|
|
428
475
|
## 🌟 Planned Features
|
|
429
476
|
|
|
430
477
|
We are actively working to improve GSP-Py. Here are some exciting features planned for future releases:
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Public interface for the :mod:`gsppy` package.
|
|
2
|
+
|
|
3
|
+
This module centralizes the primary entry points, including the :class:`~gsppy.gsp.GSP`
|
|
4
|
+
implementation, CLI helpers for loading transactional data, and the package version string.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from importlib import metadata as importlib_metadata
|
|
8
|
+
|
|
9
|
+
from gsppy.cli import detect_and_read_file, read_transactions_from_csv, read_transactions_from_json, setup_logging
|
|
10
|
+
from gsppy.gsp import GSP
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
__version__ = importlib_metadata.version("gsppy")
|
|
14
|
+
except importlib_metadata.PackageNotFoundError: # pragma: no cover - handled only in editable installs
|
|
15
|
+
__version__ = "0.0.0"
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"GSP",
|
|
19
|
+
"detect_and_read_file",
|
|
20
|
+
"read_transactions_from_csv",
|
|
21
|
+
"read_transactions_from_json",
|
|
22
|
+
"setup_logging",
|
|
23
|
+
"__version__",
|
|
24
|
+
]
|
|
@@ -21,6 +21,7 @@ from .utils import split_into_batches, is_subsequence_in_list
|
|
|
21
21
|
|
|
22
22
|
# Optional GPU (CuPy) support
|
|
23
23
|
_gpu_available = False
|
|
24
|
+
cp: Any | None = None
|
|
24
25
|
try: # pragma: no cover - optional dependency path
|
|
25
26
|
import cupy as _cp_mod # type: ignore[import-not-found]
|
|
26
27
|
|
|
@@ -126,8 +127,8 @@ def _support_counts_gpu_singletons(
|
|
|
126
127
|
if not flat:
|
|
127
128
|
return []
|
|
128
129
|
|
|
129
|
-
cp_flat = cp.asarray(flat, dtype=cp.int32) # type: ignore[name-defined]
|
|
130
|
-
counts = cp.bincount(cp_flat, minlength=vocab_size) # type: ignore[attr-defined]
|
|
130
|
+
cp_flat = cp.asarray(flat, dtype=cp.int32) # type: ignore[name-defined, union-attr]
|
|
131
|
+
counts = cp.bincount(cp_flat, minlength=vocab_size) # type: ignore[attr-defined, union-attr]
|
|
131
132
|
counts_host: Any = counts.get() # back to host as a NumPy array
|
|
132
133
|
|
|
133
134
|
out: List[Tuple[List[int], int]] = []
|
|
@@ -178,6 +179,17 @@ def support_counts(
|
|
|
178
179
|
fall back to CPU for the rest
|
|
179
180
|
- "python": force pure-Python fallback
|
|
180
181
|
- otherwise: try Rust first and fall back to Python
|
|
182
|
+
|
|
183
|
+
Example:
|
|
184
|
+
Running a search with an explicit backend:
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
from gsppy.accelerate import support_counts
|
|
188
|
+
|
|
189
|
+
transactions = [("A", "B"), ("A", "C")]
|
|
190
|
+
candidates = [("A",), ("B",), ("A", "B")]
|
|
191
|
+
counts = support_counts(transactions, candidates, min_support_abs=1, backend="python")
|
|
192
|
+
```
|
|
181
193
|
"""
|
|
182
194
|
backend_sel = (backend or _env_backend()).lower()
|
|
183
195
|
|
|
@@ -222,7 +234,8 @@ def support_counts(
|
|
|
222
234
|
try:
|
|
223
235
|
other_enc = [enc for enc, _ in others]
|
|
224
236
|
res = cast(
|
|
225
|
-
List[Tuple[List[int], int]],
|
|
237
|
+
List[Tuple[List[int], int]],
|
|
238
|
+
_compute_supports_rust(enc_tx, other_enc, int(min_support_abs)), # ty:ignore[call-non-callable]
|
|
226
239
|
)
|
|
227
240
|
for enc_cand, freq in res:
|
|
228
241
|
out[tuple(inv_vocab[i] for i in enc_cand)] = int(freq)
|
|
@@ -247,7 +260,10 @@ def support_counts(
|
|
|
247
260
|
# use rust
|
|
248
261
|
enc_tx, inv_vocab, vocab = _get_encoded_transactions(transactions)
|
|
249
262
|
enc_cands = _encode_candidates(candidates, vocab)
|
|
250
|
-
result = cast(
|
|
263
|
+
result = cast(
|
|
264
|
+
List[Tuple[List[int], int]],
|
|
265
|
+
_compute_supports_rust(enc_tx, enc_cands, int(min_support_abs)), # ty:ignore[call-non-callable]
|
|
266
|
+
)
|
|
251
267
|
out_rust: Dict[Tuple[str, ...], int] = {}
|
|
252
268
|
for enc_cand, freq in result:
|
|
253
269
|
out_rust[tuple(inv_vocab[i] for i in enc_cand)] = int(freq)
|
|
@@ -258,7 +274,10 @@ def support_counts(
|
|
|
258
274
|
enc_tx, inv_vocab, vocab = _get_encoded_transactions(transactions)
|
|
259
275
|
enc_cands = _encode_candidates(candidates, vocab)
|
|
260
276
|
try:
|
|
261
|
-
result = cast(
|
|
277
|
+
result = cast(
|
|
278
|
+
List[Tuple[List[int], int]],
|
|
279
|
+
_compute_supports_rust(enc_tx, enc_cands, int(min_support_abs)), # ty:ignore[call-non-callable]
|
|
280
|
+
)
|
|
262
281
|
out2: Dict[Tuple[str, ...], int] = {}
|
|
263
282
|
for enc_cand, freq in result:
|
|
264
283
|
out2[tuple(inv_vocab[i] for i in enc_cand)] = int(freq)
|
|
@@ -45,7 +45,7 @@ logging.basicConfig(
|
|
|
45
45
|
format="%(message)s", # Simplified to keep CLI output clean
|
|
46
46
|
handlers=[logging.StreamHandler(sys.stdout)],
|
|
47
47
|
)
|
|
48
|
-
logger = logging.getLogger(__name__)
|
|
48
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
def setup_logging(verbose: bool) -> None:
|
|
@@ -95,7 +95,7 @@ from collections import Counter
|
|
|
95
95
|
from gsppy.utils import split_into_batches, is_subsequence_in_list, generate_candidates_from_previous
|
|
96
96
|
from gsppy.accelerate import support_counts as support_counts_accel
|
|
97
97
|
|
|
98
|
-
logger = logging.getLogger(__name__)
|
|
98
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
|
99
99
|
|
|
100
100
|
|
|
101
101
|
class GSP:
|
|
@@ -171,7 +171,7 @@ class GSP:
|
|
|
171
171
|
raise ValueError(msg)
|
|
172
172
|
|
|
173
173
|
logger.info("Pre-processing transactions...")
|
|
174
|
-
self.max_size = max(len(item) for item in raw_transactions)
|
|
174
|
+
self.max_size: int = max(len(item) for item in raw_transactions)
|
|
175
175
|
self.transactions: List[Tuple[str, ...]] = [tuple(transaction) for transaction in raw_transactions]
|
|
176
176
|
counts: Counter[str] = Counter(chain.from_iterable(raw_transactions))
|
|
177
177
|
# Start with singleton candidates (1-sequences)
|
|
@@ -295,6 +295,22 @@ class GSP:
|
|
|
295
295
|
- Information about the algorithm's start, intermediate progress (candidates filtered),
|
|
296
296
|
and completion.
|
|
297
297
|
- Status updates for each iteration until the algorithm terminates.
|
|
298
|
+
|
|
299
|
+
Example:
|
|
300
|
+
Basic usage with the default backend:
|
|
301
|
+
|
|
302
|
+
```python
|
|
303
|
+
from gsppy.gsp import GSP
|
|
304
|
+
|
|
305
|
+
transactions = [
|
|
306
|
+
["Bread", "Milk"],
|
|
307
|
+
["Bread", "Diaper", "Beer", "Eggs"],
|
|
308
|
+
["Milk", "Diaper", "Beer", "Coke"],
|
|
309
|
+
]
|
|
310
|
+
|
|
311
|
+
gsp = GSP(transactions)
|
|
312
|
+
patterns = gsp.search(min_support=0.3)
|
|
313
|
+
```
|
|
298
314
|
"""
|
|
299
315
|
if not 0.0 < min_support <= 1.0:
|
|
300
316
|
raise ValueError("Minimum support must be in the range (0.0, 1.0]")
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "gsppy"
|
|
7
|
-
version = "3.
|
|
7
|
+
version = "3.2.8"
|
|
8
8
|
description = "GSP (Generalized Sequence Pattern) algorithm in Python"
|
|
9
9
|
keywords = ["GSP", "sequential patterns", "data analysis", "sequence mining"]
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -39,27 +39,37 @@ gsppy = "gsppy.cli:main"
|
|
|
39
39
|
|
|
40
40
|
[project.optional-dependencies]
|
|
41
41
|
dev = [
|
|
42
|
-
"cython==3.
|
|
43
|
-
"hatch==1.
|
|
44
|
-
"hatchling==1.
|
|
45
|
-
"
|
|
46
|
-
"
|
|
47
|
-
"
|
|
48
|
-
"pytest==
|
|
49
|
-
"pytest-benchmark==5.1.0",
|
|
42
|
+
"cython==3.2.3",
|
|
43
|
+
"hatch==1.16.2",
|
|
44
|
+
"hatchling==1.28.0",
|
|
45
|
+
"pylint==4.0.4",
|
|
46
|
+
"pyright==1.1.407",
|
|
47
|
+
"pytest==9.0.2",
|
|
48
|
+
"pytest-benchmark==5.2.3",
|
|
50
49
|
"pytest-cov==7.0.0",
|
|
51
|
-
"ruff==0.
|
|
50
|
+
"ruff==0.14.10",
|
|
52
51
|
"tox==4.32.0",
|
|
52
|
+
"ty==0.0.8",
|
|
53
|
+
]
|
|
54
|
+
docs = [
|
|
55
|
+
"mkdocs>=1.6,<2",
|
|
56
|
+
"mkdocs-gen-files>=0.5,<1",
|
|
57
|
+
"mkdocs-literate-nav>=0.6,<1",
|
|
58
|
+
"mkdocs-material>=9.5,<10",
|
|
59
|
+
"mkdocstrings[python]>=0.26,<0.27",
|
|
53
60
|
]
|
|
54
61
|
rust = [
|
|
55
|
-
"maturin==1.
|
|
62
|
+
"maturin==1.10.2"
|
|
56
63
|
]
|
|
57
64
|
gpu = [
|
|
58
65
|
"cupy>=11,<14"
|
|
59
66
|
]
|
|
60
67
|
|
|
61
68
|
[tool.hatch.build]
|
|
62
|
-
include = [
|
|
69
|
+
include = [
|
|
70
|
+
"gsppy/*",
|
|
71
|
+
"gsppy/py.typed",
|
|
72
|
+
]
|
|
63
73
|
|
|
64
74
|
[tool.hatch.metadata.hooks.fancy-pypi-readme]
|
|
65
75
|
content-type = "text/markdown"
|
|
@@ -73,16 +83,14 @@ include = [
|
|
|
73
83
|
"/*.ini",
|
|
74
84
|
"bin/*",
|
|
75
85
|
"gsppy/*",
|
|
86
|
+
"gsppy/py.typed",
|
|
76
87
|
"tests/*",
|
|
77
88
|
"rust/**",
|
|
78
89
|
]
|
|
79
90
|
|
|
80
|
-
[tool.
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
ignore_missing_imports = true
|
|
84
|
-
incremental = true
|
|
85
|
-
cache_dir = ".mypy_cache"
|
|
91
|
+
[tool.ty]
|
|
92
|
+
# ty is Astral's fast type checker written in Rust
|
|
93
|
+
# Python version is automatically inferred from project.requires-python
|
|
86
94
|
|
|
87
95
|
[tool.ruff]
|
|
88
96
|
line-length = 120
|
gsppy-3.1.1/mypy.ini
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
[mypy]
|
|
2
|
-
# Ignore errors from the typing module
|
|
3
|
-
python_version = 3.9
|
|
4
|
-
|
|
5
|
-
# Output configuration
|
|
6
|
-
pretty = True
|
|
7
|
-
show_error_codes = True
|
|
8
|
-
|
|
9
|
-
# Type-checking strictness configuration
|
|
10
|
-
strict_equality = True
|
|
11
|
-
implicit_reexport = True
|
|
12
|
-
check_untyped_defs = True
|
|
13
|
-
no_implicit_optional = True
|
|
14
|
-
|
|
15
|
-
# Warnings
|
|
16
|
-
warn_return_any = True
|
|
17
|
-
warn_unreachable = True
|
|
18
|
-
warn_unused_configs = True
|
|
19
|
-
|
|
20
|
-
# Turn these options off as it could cause conflicts
|
|
21
|
-
# with the Pyright options.
|
|
22
|
-
warn_unused_ignores = False
|
|
23
|
-
warn_redundant_casts = False
|
|
24
|
-
|
|
25
|
-
# Error handling and strict type management
|
|
26
|
-
disallow_any_generics = True
|
|
27
|
-
disallow_untyped_defs = True
|
|
28
|
-
disallow_untyped_calls = True
|
|
29
|
-
disallow_subclassing_any = True
|
|
30
|
-
disallow_incomplete_defs = True
|
|
31
|
-
disallow_untyped_decorators = True
|
|
32
|
-
|
|
33
|
-
# Fine-grained caching
|
|
34
|
-
cache_fine_grained = True
|
|
35
|
-
|
|
36
|
-
# Exclusion of specific files and directories
|
|
37
|
-
exclude = ^(tests/.*|examples/.*)$
|
|
38
|
-
|
|
39
|
-
# By default, mypy reports an error if you assign a value to the result
|
|
40
|
-
# of a function call that doesn't return anything. We do this in our test
|
|
41
|
-
# cases:
|
|
42
|
-
# ```
|
|
43
|
-
# result = ...
|
|
44
|
-
# assert result is None
|
|
45
|
-
# ```
|
|
46
|
-
# Changing this codegen to make mypy happy would increase complexity
|
|
47
|
-
# and would not be worth it.
|
|
48
|
-
disable_error_code = func-returns-value
|
|
49
|
-
|
|
50
|
-
# https://github.com/python/mypy/issues/12162
|
|
51
|
-
[mypy.overrides]
|
|
52
|
-
module = "black.files.*"
|
|
53
|
-
ignore_errors = True
|
|
54
|
-
ignore_missing_imports = True
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|