gsppy 3.1.1__py3-none-any.whl → 3.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gsppy/__init__.py CHANGED
@@ -0,0 +1,24 @@
1
+ """Public interface for the :mod:`gsppy` package.
2
+
3
+ This module centralizes the primary entry points, including the :class:`~gsppy.gsp.GSP`
4
+ implementation, CLI helpers for loading transactional data, and the package version string.
5
+ """
6
+
7
+ from importlib import metadata as importlib_metadata
8
+
9
+ from gsppy.cli import detect_and_read_file, read_transactions_from_csv, read_transactions_from_json, setup_logging
10
+ from gsppy.gsp import GSP
11
+
12
+ try:
13
+ __version__ = importlib_metadata.version("gsppy")
14
+ except importlib_metadata.PackageNotFoundError: # pragma: no cover - handled only in editable installs
15
+ __version__ = "0.0.0"
16
+
17
+ __all__ = [
18
+ "GSP",
19
+ "detect_and_read_file",
20
+ "read_transactions_from_csv",
21
+ "read_transactions_from_json",
22
+ "setup_logging",
23
+ "__version__",
24
+ ]
gsppy/accelerate.py CHANGED
@@ -21,6 +21,7 @@ from .utils import split_into_batches, is_subsequence_in_list
21
21
 
22
22
  # Optional GPU (CuPy) support
23
23
  _gpu_available = False
24
+ cp: Any | None = None
24
25
  try: # pragma: no cover - optional dependency path
25
26
  import cupy as _cp_mod # type: ignore[import-not-found]
26
27
 
@@ -126,8 +127,8 @@ def _support_counts_gpu_singletons(
126
127
  if not flat:
127
128
  return []
128
129
 
129
- cp_flat = cp.asarray(flat, dtype=cp.int32) # type: ignore[name-defined]
130
- counts = cp.bincount(cp_flat, minlength=vocab_size) # type: ignore[attr-defined]
130
+ cp_flat = cp.asarray(flat, dtype=cp.int32) # type: ignore[name-defined, union-attr]
131
+ counts = cp.bincount(cp_flat, minlength=vocab_size) # type: ignore[attr-defined, union-attr]
131
132
  counts_host: Any = counts.get() # back to host as a NumPy array
132
133
 
133
134
  out: List[Tuple[List[int], int]] = []
@@ -178,6 +179,17 @@ def support_counts(
178
179
  fall back to CPU for the rest
179
180
  - "python": force pure-Python fallback
180
181
  - otherwise: try Rust first and fall back to Python
182
+
183
+ Example:
184
+ Running a search with an explicit backend:
185
+
186
+ ```python
187
+ from gsppy.accelerate import support_counts
188
+
189
+ transactions = [("A", "B"), ("A", "C")]
190
+ candidates = [("A",), ("B",), ("A", "B")]
191
+ counts = support_counts(transactions, candidates, min_support_abs=1, backend="python")
192
+ ```
181
193
  """
182
194
  backend_sel = (backend or _env_backend()).lower()
183
195
 
@@ -222,7 +234,8 @@ def support_counts(
222
234
  try:
223
235
  other_enc = [enc for enc, _ in others]
224
236
  res = cast(
225
- List[Tuple[List[int], int]], _compute_supports_rust(enc_tx, other_enc, int(min_support_abs))
237
+ List[Tuple[List[int], int]],
238
+ _compute_supports_rust(enc_tx, other_enc, int(min_support_abs)), # ty:ignore[call-non-callable]
226
239
  )
227
240
  for enc_cand, freq in res:
228
241
  out[tuple(inv_vocab[i] for i in enc_cand)] = int(freq)
@@ -247,7 +260,10 @@ def support_counts(
247
260
  # use rust
248
261
  enc_tx, inv_vocab, vocab = _get_encoded_transactions(transactions)
249
262
  enc_cands = _encode_candidates(candidates, vocab)
250
- result = cast(List[Tuple[List[int], int]], _compute_supports_rust(enc_tx, enc_cands, int(min_support_abs)))
263
+ result = cast(
264
+ List[Tuple[List[int], int]],
265
+ _compute_supports_rust(enc_tx, enc_cands, int(min_support_abs)), # ty:ignore[call-non-callable]
266
+ )
251
267
  out_rust: Dict[Tuple[str, ...], int] = {}
252
268
  for enc_cand, freq in result:
253
269
  out_rust[tuple(inv_vocab[i] for i in enc_cand)] = int(freq)
@@ -258,7 +274,10 @@ def support_counts(
258
274
  enc_tx, inv_vocab, vocab = _get_encoded_transactions(transactions)
259
275
  enc_cands = _encode_candidates(candidates, vocab)
260
276
  try:
261
- result = cast(List[Tuple[List[int], int]], _compute_supports_rust(enc_tx, enc_cands, int(min_support_abs)))
277
+ result = cast(
278
+ List[Tuple[List[int], int]],
279
+ _compute_supports_rust(enc_tx, enc_cands, int(min_support_abs)), # ty:ignore[call-non-callable]
280
+ )
262
281
  out2: Dict[Tuple[str, ...], int] = {}
263
282
  for enc_cand, freq in result:
264
283
  out2[tuple(inv_vocab[i] for i in enc_cand)] = int(freq)
gsppy/cli.py CHANGED
@@ -45,7 +45,7 @@ logging.basicConfig(
45
45
  format="%(message)s", # Simplified to keep CLI output clean
46
46
  handlers=[logging.StreamHandler(sys.stdout)],
47
47
  )
48
- logger = logging.getLogger(__name__)
48
+ logger: logging.Logger = logging.getLogger(__name__)
49
49
 
50
50
 
51
51
  def setup_logging(verbose: bool) -> None:
gsppy/gsp.py CHANGED
@@ -95,7 +95,7 @@ from collections import Counter
95
95
  from gsppy.utils import split_into_batches, is_subsequence_in_list, generate_candidates_from_previous
96
96
  from gsppy.accelerate import support_counts as support_counts_accel
97
97
 
98
- logger = logging.getLogger(__name__)
98
+ logger: logging.Logger = logging.getLogger(__name__)
99
99
 
100
100
 
101
101
  class GSP:
@@ -171,7 +171,7 @@ class GSP:
171
171
  raise ValueError(msg)
172
172
 
173
173
  logger.info("Pre-processing transactions...")
174
- self.max_size = max(len(item) for item in raw_transactions)
174
+ self.max_size: int = max(len(item) for item in raw_transactions)
175
175
  self.transactions: List[Tuple[str, ...]] = [tuple(transaction) for transaction in raw_transactions]
176
176
  counts: Counter[str] = Counter(chain.from_iterable(raw_transactions))
177
177
  # Start with singleton candidates (1-sequences)
@@ -295,6 +295,22 @@ class GSP:
295
295
  - Information about the algorithm's start, intermediate progress (candidates filtered),
296
296
  and completion.
297
297
  - Status updates for each iteration until the algorithm terminates.
298
+
299
+ Example:
300
+ Basic usage with the default backend:
301
+
302
+ ```python
303
+ from gsppy.gsp import GSP
304
+
305
+ transactions = [
306
+ ["Bread", "Milk"],
307
+ ["Bread", "Diaper", "Beer", "Eggs"],
308
+ ["Milk", "Diaper", "Beer", "Coke"],
309
+ ]
310
+
311
+ gsp = GSP(transactions)
312
+ patterns = gsp.search(min_support=0.3)
313
+ ```
298
314
  """
299
315
  if not 0.0 < min_support <= 1.0:
300
316
  raise ValueError("Minimum support must be in the range (0.0, 1.0]")
gsppy/py.typed ADDED
File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gsppy
3
- Version: 3.1.1
3
+ Version: 3.2.7
4
4
  Summary: GSP (Generalized Sequence Pattern) algorithm in Python
5
5
  Project-URL: Homepage, https://github.com/jacksonpradolima/gsp-py
6
6
  Author-email: Jackson Antonio do Prado Lima <jacksonpradolima@gmail.com>
@@ -41,27 +41,34 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
41
41
  Requires-Python: >=3.10
42
42
  Requires-Dist: click>=8.0.0
43
43
  Provides-Extra: dev
44
- Requires-Dist: cython==3.1.4; extra == 'dev'
45
- Requires-Dist: hatch==1.15.1; extra == 'dev'
46
- Requires-Dist: hatchling==1.27.0; extra == 'dev'
47
- Requires-Dist: mypy==1.18.2; extra == 'dev'
48
- Requires-Dist: pylint==4.0.2; extra == 'dev'
49
- Requires-Dist: pyright==1.1.406; extra == 'dev'
50
- Requires-Dist: pytest-benchmark==5.1.0; extra == 'dev'
44
+ Requires-Dist: cython==3.2.3; extra == 'dev'
45
+ Requires-Dist: hatch==1.16.2; extra == 'dev'
46
+ Requires-Dist: hatchling==1.28.0; extra == 'dev'
47
+ Requires-Dist: pylint==4.0.4; extra == 'dev'
48
+ Requires-Dist: pyright==1.1.407; extra == 'dev'
49
+ Requires-Dist: pytest-benchmark==5.2.3; extra == 'dev'
51
50
  Requires-Dist: pytest-cov==7.0.0; extra == 'dev'
52
- Requires-Dist: pytest==8.4.2; extra == 'dev'
53
- Requires-Dist: ruff==0.13.3; extra == 'dev'
51
+ Requires-Dist: pytest==9.0.2; extra == 'dev'
52
+ Requires-Dist: ruff==0.14.10; extra == 'dev'
54
53
  Requires-Dist: tox==4.32.0; extra == 'dev'
54
+ Requires-Dist: ty==0.0.8; extra == 'dev'
55
+ Provides-Extra: docs
56
+ Requires-Dist: mkdocs-gen-files<1,>=0.5; extra == 'docs'
57
+ Requires-Dist: mkdocs-literate-nav<1,>=0.6; extra == 'docs'
58
+ Requires-Dist: mkdocs-material<10,>=9.5; extra == 'docs'
59
+ Requires-Dist: mkdocs<2,>=1.6; extra == 'docs'
60
+ Requires-Dist: mkdocstrings[python]<0.27,>=0.26; extra == 'docs'
55
61
  Provides-Extra: gpu
56
62
  Requires-Dist: cupy<14,>=11; extra == 'gpu'
57
63
  Provides-Extra: rust
58
- Requires-Dist: maturin==1.9.6; extra == 'rust'
64
+ Requires-Dist: maturin==1.10.2; extra == 'rust'
59
65
  Description-Content-Type: text/markdown
60
66
 
61
67
  [![PyPI License](https://img.shields.io/pypi/l/gsppy.svg?style=flat-square)]()
62
68
  ![](https://img.shields.io/badge/python-3.10+-blue.svg)
63
69
  [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3333987.svg)](https://doi.org/10.5281/zenodo.3333987)
64
70
 
71
+ [![Docs](https://img.shields.io/badge/Docs-GSP--Py%20Site-3D9970?style=flat-square)](https://jacksonpradolima.github.io/gsp-py/)
65
72
  [![PyPI Downloads](https://img.shields.io/pypi/dm/gsppy.svg?style=flat-square)](https://pypi.org/project/gsppy/)
66
73
  [![Bugs](https://sonarcloud.io/api/project_badges/measure?project=jacksonpradolima_gsp-py&metric=bugs)](https://sonarcloud.io/summary/new_code?id=jacksonpradolima_gsp-py)
67
74
  [![Vulnerabilities](https://sonarcloud.io/api/project_badges/measure?project=jacksonpradolima_gsp-py&metric=vulnerabilities)](https://sonarcloud.io/summary/new_code?id=jacksonpradolima_gsp-py)
@@ -87,13 +94,15 @@ Sequence Pattern (GSP)** algorithm. Ideal for market basket analysis, temporal m
87
94
  - [❖ Clone Repository](#option-1-clone-the-repository)
88
95
  - [❖ Install via PyPI](#option-2-install-via-pip)
89
96
  4. [🛠️ Developer Installation](#developer-installation)
90
- 5. [💡 Usage](#usage)
97
+ 5. [📖 Documentation](#documentation)
98
+ 6. [💡 Usage](#usage)
91
99
  - [✅ Example: Analyzing Sales Data](#example-analyzing-sales-data)
92
100
  - [📊 Explanation: Support and Results](#explanation-support-and-results)
93
- 6. [🌟 Planned Features](#planned-features)
94
- 7. [🤝 Contributing](#contributing)
95
- 8. [📝 License](#license)
96
- 9. [📖 Citation](#citation)
101
+ 7. [⌨️ Typing](#typing)
102
+ 8. [🌟 Planned Features](#planned-features)
103
+ 9. [🤝 Contributing](#contributing)
104
+ 10. [📝 License](#license)
105
+ 11. [📖 Citation](#citation)
97
106
 
98
107
  ---
99
108
 
@@ -245,7 +254,7 @@ make install # sync deps (from uv.lock) + install project (-e .)
245
254
  make test # pytest -n auto
246
255
  make lint # ruff check .
247
256
  make format # ruff --fix
248
- make typecheck # pyright (and mypy if configured)
257
+ make typecheck # pyright + ty
249
258
  make pre-commit-install # install the pre-commit hook
250
259
  make pre-commit-run # run pre-commit on all files
251
260
 
@@ -259,6 +268,41 @@ make bench-big # run large benchmark
259
268
  > [!NOTE]
260
269
  > Tox in this project uses the "tox-uv" plugin. When running `make tox` or `tox`, missing Python interpreters can be provisioned automatically via uv (no need to pre-install all versions). This makes local setup faster.
261
270
 
271
+ ## 🔏 Release assets and verification
272
+
273
+ Every GitHub release bundles artifacts to help you validate what you download:
274
+
275
+ - Built wheels and source distributions produced by the automated publish workflow.
276
+ - `sbom.json` (CycloneDX) generated with [Syft](https://github.com/anchore/syft).
277
+ - Sigstore-generated `.sig` and `.pem` files for each artifact, created using GitHub OIDC identity.
278
+
279
+ To verify a downloaded artifact from a release:
280
+
281
+ ```bash
282
+ python -m pip install sigstore # installs the CLI
283
+ sigstore verify identity \
284
+ --certificate gsppy-<version>-py3-none-any.whl.pem \
285
+ --signature gsppy-<version>-py3-none-any.whl.sig \
286
+ --cert-identity "https://github.com/jacksonpradolima/gsp-py/.github/workflows/publish.yml@refs/tags/v<version>" \
287
+ --cert-oidc-issuer https://token.actions.githubusercontent.com \
288
+ gsppy-<version>-py3-none-any.whl
289
+ ```
290
+
291
+ Replace `<version>` with the numeric package version (for example, `3.1.1`) in the filenames; in `--cert-identity`, this becomes `v<version>` (for example, `v3.1.1`). Adjust the filenames for the sdist (`.tar.gz`) if preferred. The same release page also hosts `sbom.json` for supply-chain inspection.
292
+
293
+ ## 📖 Documentation
294
+
295
+ - **Live site:** https://jacksonpradolima.github.io/gsp-py/
296
+ - **Build locally:**
297
+
298
+ ```bash
299
+ uv venv .venv
300
+ uv sync --extra docs
301
+ uv run mkdocs serve
302
+ ```
303
+
304
+ The docs use MkDocs with the Material theme and mkdocstrings to render the Python API directly from docstrings.
305
+
262
306
  ## 💡 Usage
263
307
 
264
308
  The library is designed to be easy to use and integrate with your own projects. You can use GSP-Py either programmatically (Python API) or directly from the command line (CLI).
@@ -485,6 +529,15 @@ result = gsp.search(min_support=0.5) # Need at least 2/4 sequences
485
529
 
486
530
  ---
487
531
 
532
+ ## ⌨️ Typing
533
+
534
+ `gsppy` ships inline type information (PEP 561) via a bundled `py.typed` marker. The public API is re-exported from
535
+ `gsppy` directly—import `GSP` for programmatic use or reuse the CLI helpers (`detect_and_read_file`,
536
+ `read_transactions_from_json`, `read_transactions_from_csv`, and `setup_logging`) when embedding the tool in
537
+ larger applications.
538
+
539
+ ---
540
+
488
541
  ## 🌟 Planned Features
489
542
 
490
543
  We are actively working to improve GSP-Py. Here are some exciting features planned for future releases:
@@ -0,0 +1,11 @@
1
+ gsppy/__init__.py,sha256=FcWEYkzMCiqIBmc4yhgIXFKzvSNjJA7LX7juUabvoJ4,784
2
+ gsppy/accelerate.py,sha256=2I3IA42FyPZvfwc0-f0bovZ8YgbdvJXj0qDlYWSWiXI,10998
3
+ gsppy/cli.py,sha256=W5udAPKOjlxi-c-RKcz5HW-sDgoap4ojHD87bd-X498,6583
4
+ gsppy/gsp.py,sha256=aCtPrldVNCkwj6wwytrZzbayYKkXi9Om-3xzrHUMkLQ,15293
5
+ gsppy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ gsppy/utils.py,sha256=KtjfDgsTwvwxIyA2KCQmgu8cFkBqQvMZN8Ct5NB60Tc,3952
7
+ gsppy-3.2.7.dist-info/METADATA,sha256=H7qZ7b0DGtca_pA9uiY0fijQTvPmsFeHHGg9fzKc6V0,22130
8
+ gsppy-3.2.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
9
+ gsppy-3.2.7.dist-info/entry_points.txt,sha256=smvmcIWk424ARIGKOC_BM42hpT_SptKPcIeqs-8u8lM,41
10
+ gsppy-3.2.7.dist-info/licenses/LICENSE,sha256=AlXanKSqFzo_o-87gp3Qw3XzbmnfxYy7O0xJOcQGWJo,1086
11
+ gsppy-3.2.7.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- gsppy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- gsppy/accelerate.py,sha256=YO3YQFzo2VAC6IXOTnQnOajkZO7SabkieGb1IPgWdSI,10407
3
- gsppy/cli.py,sha256=wsGoc_utxpRfgCF9vPOAyLDTOJZ8NaiwiUny5VyIYvQ,6567
4
- gsppy/gsp.py,sha256=GCHFhOu-DyHEPsse_OXzf9IaZoigF8ouRqgn_OsZBvA,14855
5
- gsppy/utils.py,sha256=KtjfDgsTwvwxIyA2KCQmgu8cFkBqQvMZN8Ct5NB60Tc,3952
6
- gsppy-3.1.1.dist-info/METADATA,sha256=uN-rN-CzsrwW_uh4s60DUevIKjm5CuiYyHRh5cgyKqQ,19819
7
- gsppy-3.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
8
- gsppy-3.1.1.dist-info/entry_points.txt,sha256=smvmcIWk424ARIGKOC_BM42hpT_SptKPcIeqs-8u8lM,41
9
- gsppy-3.1.1.dist-info/licenses/LICENSE,sha256=AlXanKSqFzo_o-87gp3Qw3XzbmnfxYy7O0xJOcQGWJo,1086
10
- gsppy-3.1.1.dist-info/RECORD,,
File without changes