bblean 0.6.0b1__tar.gz → 0.6.1b0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bblean-0.6.0b1 → bblean-0.6.1b0}/.github/workflows/ci-cpp.yaml +23 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/.github/workflows/upload-to-pypi.yaml +7 -5
- {bblean-0.6.0b1 → bblean-0.6.1b0}/PKG-INFO +9 -3
- {bblean-0.6.0b1 → bblean-0.6.1b0}/README.md +8 -2
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_legacy/bb_int64.py +2 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_version.py +2 -2
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/bitbirch.py +36 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/cli.py +17 -18
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/csrc/similarity.cpp +4 -4
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/multiround.py +2 -7
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/plotting.py +7 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/sklearn.py +1 -2
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean.egg-info/PKG-INFO +9 -3
- {bblean-0.6.0b1 → bblean-0.6.1b0}/setup.py +3 -1
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_cli.py +13 -1
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_global_clustering.py +36 -30
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_regression.py +1 -1
- {bblean-0.6.0b1 → bblean-0.6.1b0}/.cruft.json +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/.flake8 +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/.github/CODEOWNERS +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/.github/workflows/ci.yaml +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/.gitignore +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/.pre-commit-config.yaml +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/LICENSE +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/LICENSES/BSD-3-Clause.txt +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/LICENSES/GPL-3.0-only.txt +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/__init__.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_config.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_console.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_legacy/__init__.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_legacy/bb_uint8.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_memory.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_merges.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_py_similarity.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_timer.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/analysis.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/csrc/README.md +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/fingerprints.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/metrics.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/similarity.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/smiles.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/utils.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean-demo-v2.gif +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean-demo.cast +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean.egg-info/SOURCES.txt +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean.egg-info/dependency_links.txt +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean.egg-info/entry_points.txt +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean.egg-info/requires.txt +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean.egg-info/top_level.txt +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_static/api.svg +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_static/installing.svg +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_static/logo-dark-bw.svg +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_static/logo-light-bw.svg +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_static/publications.svg +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_static/style.css +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_static/user-guide.svg +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_templates/module.rst +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/api-reference.rst +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/conf.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/index.rst +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/installing.rst +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/publications.rst +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/user-guide/linux_memory_setup.rst +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/user-guide/notebooks/bitbirch_best_practices.ipynb +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/user-guide/notebooks/bitbirch_quickstart.ipynb +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/user-guide/parameters.rst +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/user-guide.rst +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/environment.yaml +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/examples/biogen_logS.csv +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/examples/bitbirch_best_practices.ipynb +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/examples/bitbirch_quickstart.ipynb +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/examples/chembl-33-natural-products-subset.smi +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/examples/dataset_splitting.ipynb +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/pyproject.toml +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/setup.cfg +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/chembl-sample-3k.smi +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/chembl-sample-bad.smi +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/legacy_merges.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/legacy_metrics.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_bb_consistency.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_fake_fps.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_fingerprints.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_import_bblean.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_merges.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_metrics.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_multiround.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_refine.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_sampling.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_similarity.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_simple.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_sklearn.py +0 -0
- {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_utils.py +0 -0
|
@@ -36,6 +36,29 @@ jobs:
|
|
|
36
36
|
- name: run-pytest
|
|
37
37
|
run: BITBIRCH_CANT_SKIP_CPP_TESTS=1 pytest -s ./tests/test_similarity.py
|
|
38
38
|
|
|
39
|
+
cpp-tests-win:
|
|
40
|
+
runs-on: windows-latest
|
|
41
|
+
steps:
|
|
42
|
+
- name: checkout-repo
|
|
43
|
+
uses: actions/checkout@v5
|
|
44
|
+
|
|
45
|
+
- name: setup-python
|
|
46
|
+
uses: actions/setup-python@v6
|
|
47
|
+
with:
|
|
48
|
+
python-version: ${{ env.PYTHON_VERSION }}
|
|
49
|
+
|
|
50
|
+
- name: install-repo
|
|
51
|
+
run: |
|
|
52
|
+
python -m pip install --upgrade pip
|
|
53
|
+
pip install pytest pytest-subtests inline-snapshot
|
|
54
|
+
$env:BITBIRCH_BUILD_CPP="1"
|
|
55
|
+
pip install --verbose .
|
|
56
|
+
|
|
57
|
+
- name: run-pytest
|
|
58
|
+
run: |
|
|
59
|
+
$env:BITBIRCH_CANT_SKIP_CPP_TESTS="1"
|
|
60
|
+
pytest -s ./tests/test_similarity.py
|
|
61
|
+
|
|
39
62
|
|
|
40
63
|
# Ubuntu 24.04 for reproducibility in regression tests only
|
|
41
64
|
# coverage makes tests slightly slower, so regression tests should not run with
|
|
@@ -15,15 +15,17 @@ on:
|
|
|
15
15
|
required: false
|
|
16
16
|
default: false
|
|
17
17
|
type: boolean
|
|
18
|
+
release:
|
|
19
|
+
types: [published]
|
|
18
20
|
|
|
19
21
|
env:
|
|
20
22
|
PYTHON_VERSION: '3.11'
|
|
21
|
-
SETUPTOOLS_SCM_PRETEND_VERSION: ${{ github.event.inputs.version }}
|
|
23
|
+
SETUPTOOLS_SCM_PRETEND_VERSION: ${{ github.event_name == 'release' && github.event.release.tag_name || github.event.inputs.version }}
|
|
22
24
|
# cibuildwheel configuration:
|
|
23
25
|
# Skip py 3.14, 32 bit and musllinux (Alpine) wheels
|
|
24
26
|
CIBW_SKIP: "cp314-* cp314t-* *-manylinux_i686 *-win32 *-musllinux_*"
|
|
25
27
|
CIBW_TEST_REQUIRES: pytest pytest-subtests inline-snapshot
|
|
26
|
-
CIBW_TEST_COMMAND: pytest -k
|
|
28
|
+
CIBW_TEST_COMMAND: pytest -k "not regression" {project}/tests
|
|
27
29
|
CIBW_PRERELEASE_PYTHONS: False
|
|
28
30
|
CIBW_ENVIRONMENT: "SETUPTOOLS_SCM_PRETEND_VERSION=${{ github.event.inputs.version }} BITBIRCH_BUILD_CPP=1"
|
|
29
31
|
# Build wheels that support both aarch64 and x86_64 on macOS
|
|
@@ -63,7 +65,7 @@ jobs:
|
|
|
63
65
|
build_wheels:
|
|
64
66
|
strategy:
|
|
65
67
|
matrix:
|
|
66
|
-
os: [ubuntu-latest, macos-latest]
|
|
68
|
+
os: [ubuntu-latest, macos-latest, windows-latest]
|
|
67
69
|
name: build-${{ matrix.os }}-wheels
|
|
68
70
|
runs-on: ${{ matrix.os }}
|
|
69
71
|
steps:
|
|
@@ -93,7 +95,7 @@ jobs:
|
|
|
93
95
|
publish_to_testpypi:
|
|
94
96
|
needs: [build_wheels, make_sdist]
|
|
95
97
|
runs-on: ubuntu-latest
|
|
96
|
-
if: ${{ github.event.inputs.upload-testpypi
|
|
98
|
+
if: ${{ github.event_name != 'release' && github.event.inputs.upload-testpypi }}
|
|
97
99
|
environment:
|
|
98
100
|
name: testpypi
|
|
99
101
|
url: https://test.pypi.org/p/bblean
|
|
@@ -115,7 +117,7 @@ jobs:
|
|
|
115
117
|
publish_to_pypi:
|
|
116
118
|
needs: [build_wheels, make_sdist]
|
|
117
119
|
runs-on: ubuntu-latest
|
|
118
|
-
if: ${{ github.event.inputs.upload-pypi
|
|
120
|
+
if: ${{ github.event_name == 'release' || github.event.inputs.upload-pypi }}
|
|
119
121
|
environment:
|
|
120
122
|
name: pypi
|
|
121
123
|
url: https://pypi.org/p/bblean
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bblean
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.1b0
|
|
4
4
|
Summary: BitBirch-Lean Python package
|
|
5
5
|
Author: The Miranda-Quintana Lab and other BitBirch developers
|
|
6
6
|
Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>
|
|
@@ -85,14 +85,20 @@ tuning](https://mqcomplab.github.io/bblean/devdocs/user-guide/parameters.html) g
|
|
|
85
85
|
|
|
86
86
|
## Installation
|
|
87
87
|
|
|
88
|
-
BitBIRCH-Lean requires Python 3.11 or higher, and can be installed in Linux or
|
|
89
|
-
|
|
88
|
+
BitBIRCH-Lean requires Python 3.11 or higher, and can be installed in Windows, Linux or
|
|
89
|
+
macOS via pip, which automatically includes C++ extensions:
|
|
90
90
|
|
|
91
91
|
```bash
|
|
92
92
|
pip install bblean
|
|
93
|
+
# Alternatively you can use 'uv pip install'
|
|
94
|
+
bb --help
|
|
93
95
|
```
|
|
96
|
+
|
|
94
97
|
We recommend installing `bblean` in a conda environment or a `venv`.
|
|
95
98
|
|
|
99
|
+
Memory usage and C++ extensions are most optimized for Linux / macOS. We support windows
|
|
100
|
+
on a best-effort basis, some releases may not have Windows support.
|
|
101
|
+
|
|
96
102
|
### From source
|
|
97
103
|
|
|
98
104
|
To build from source instead (editable mode):
|
|
@@ -42,14 +42,20 @@ tuning](https://mqcomplab.github.io/bblean/devdocs/user-guide/parameters.html) g
|
|
|
42
42
|
|
|
43
43
|
## Installation
|
|
44
44
|
|
|
45
|
-
BitBIRCH-Lean requires Python 3.11 or higher, and can be installed in Linux or
|
|
46
|
-
|
|
45
|
+
BitBIRCH-Lean requires Python 3.11 or higher, and can be installed in Windows, Linux or
|
|
46
|
+
macOS via pip, which automatically includes C++ extensions:
|
|
47
47
|
|
|
48
48
|
```bash
|
|
49
49
|
pip install bblean
|
|
50
|
+
# Alternatively you can use 'uv pip install'
|
|
51
|
+
bb --help
|
|
50
52
|
```
|
|
53
|
+
|
|
51
54
|
We recommend installing `bblean` in a conda environment or a `venv`.
|
|
52
55
|
|
|
56
|
+
Memory usage and C++ extensions are most optimized for Linux / macOS. We support windows
|
|
57
|
+
on a best-effort basis, some releases may not have Windows support.
|
|
58
|
+
|
|
53
59
|
### From source
|
|
54
60
|
|
|
55
61
|
To build from source instead (editable mode):
|
|
@@ -633,6 +633,7 @@ class BitBirch:
|
|
|
633
633
|
X = X[:max_fps]
|
|
634
634
|
threshold = self.threshold
|
|
635
635
|
branching_factor = self.branching_factor
|
|
636
|
+
|
|
636
637
|
n_features = _validate_n_features(X, input_is_packed, n_features)
|
|
637
638
|
d_type = X.dtype
|
|
638
639
|
|
|
@@ -718,6 +719,7 @@ class BitBirch:
|
|
|
718
719
|
"""
|
|
719
720
|
threshold = self.threshold
|
|
720
721
|
branching_factor = self.branching_factor
|
|
722
|
+
|
|
721
723
|
n_features = _validate_n_features(X, input_is_packed, n_features)
|
|
722
724
|
d_type = X.dtype
|
|
723
725
|
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.6.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 6,
|
|
31
|
+
__version__ = version = '0.6.1b0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 6, 1, 'b0')
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -47,6 +47,8 @@
|
|
|
47
47
|
# ./LICENSES/GPL-3.0-only.txt. If not, see <http://www.gnu.org/licenses/gpl-3.0.html>.
|
|
48
48
|
r"""BitBirch 'Lean' class for fast, memory-efficient O(N) clustering"""
|
|
49
49
|
from __future__ import annotations # Stringize type annotations for no runtime overhead
|
|
50
|
+
import pickle
|
|
51
|
+
import sys
|
|
50
52
|
import typing_extensions as tpx
|
|
51
53
|
import os
|
|
52
54
|
import random
|
|
@@ -1316,6 +1318,40 @@ class BitBirch:
|
|
|
1316
1318
|
parts.append(f"tolerance={self.tolerance}")
|
|
1317
1319
|
return f"{self.__class__.__name__}({', '.join(parts)})"
|
|
1318
1320
|
|
|
1321
|
+
def save(self, path: Path | str) -> None:
|
|
1322
|
+
r""":meta private:"""
|
|
1323
|
+
# TODO: BitBIRCH is highly recursive. pickling may crash python,
|
|
1324
|
+
# an alternative solution would be better
|
|
1325
|
+
msg = (
|
|
1326
|
+
"Saving large BitBIRCH trees may result in large memory peaks."
|
|
1327
|
+
" An alternative serialization method may be implemented in the future"
|
|
1328
|
+
)
|
|
1329
|
+
warnings.warn(msg)
|
|
1330
|
+
_old_limit = sys.getrecursionlimit()
|
|
1331
|
+
sys.setrecursionlimit(1_000_000_000)
|
|
1332
|
+
with open(path, mode="wb") as f:
|
|
1333
|
+
pickle.dump(self, f)
|
|
1334
|
+
sys.setrecursionlimit(_old_limit)
|
|
1335
|
+
|
|
1336
|
+
@classmethod
|
|
1337
|
+
def load(cls, path: Path | str) -> tpx.Self:
|
|
1338
|
+
r""":meta private:"""
|
|
1339
|
+
# TODO: BitBIRCH is highly recursive. pickling may crash python,
|
|
1340
|
+
# an alternative solution would be better
|
|
1341
|
+
msg = (
|
|
1342
|
+
"Loading large BitBIRCH trees may result in large memory peaks."
|
|
1343
|
+
" An alternative serialization method may be implemented in the future"
|
|
1344
|
+
)
|
|
1345
|
+
warnings.warn(msg)
|
|
1346
|
+
_old_limit = sys.getrecursionlimit()
|
|
1347
|
+
sys.setrecursionlimit(1_000_000_000)
|
|
1348
|
+
with open(path, mode="rb") as f:
|
|
1349
|
+
tree = pickle.load(f)
|
|
1350
|
+
sys.setrecursionlimit(_old_limit)
|
|
1351
|
+
if not isinstance(tree, cls):
|
|
1352
|
+
raise ValueError("Path does not contain a bitbirch object")
|
|
1353
|
+
return tree
|
|
1354
|
+
|
|
1319
1355
|
def global_clustering(
|
|
1320
1356
|
self,
|
|
1321
1357
|
n_clusters: int,
|
|
@@ -957,7 +957,6 @@ def _run(
|
|
|
957
957
|
bool,
|
|
958
958
|
Option(
|
|
959
959
|
"--monitor-mem/--no-monitor-mem",
|
|
960
|
-
"--monitor-rss/--no-monitor-rss",
|
|
961
960
|
help="Monitor RAM used by all processes",
|
|
962
961
|
rich_help_panel="Advanced",
|
|
963
962
|
),
|
|
@@ -966,7 +965,6 @@ def _run(
|
|
|
966
965
|
float,
|
|
967
966
|
Option(
|
|
968
967
|
"--monitor-mem-seconds",
|
|
969
|
-
"--monitor-rss-seconds",
|
|
970
968
|
help="Interval in seconds for RAM monitoring",
|
|
971
969
|
rich_help_panel="Debug",
|
|
972
970
|
hidden=True,
|
|
@@ -1098,26 +1096,29 @@ def _run(
|
|
|
1098
1096
|
|
|
1099
1097
|
timer.end_timing("total", console, indent=False)
|
|
1100
1098
|
console.print_peak_mem(out_dir, indent=False)
|
|
1099
|
+
if save_tree:
|
|
1100
|
+
if variant != "lean":
|
|
1101
|
+
console.print("Can't save tree for non-lean variants", style="red")
|
|
1102
|
+
else:
|
|
1103
|
+
# TODO: Find alternative solution
|
|
1104
|
+
tree.save_pickle(out_dir / "bitbirch.pkl")
|
|
1101
1105
|
if variant == "lean":
|
|
1102
|
-
if save_tree:
|
|
1103
|
-
# TODO: BitBIRCH is highly recursive. pickling may crash python,
|
|
1104
|
-
# an alternative solution would be better
|
|
1105
|
-
_old_limit = sys.getrecursionlimit()
|
|
1106
|
-
sys.setrecursionlimit(100_000)
|
|
1107
|
-
with open(out_dir / "bitbirch.pkl", mode="wb") as f:
|
|
1108
|
-
pickle.dump(tree, f)
|
|
1109
|
-
sys.setrecursionlimit(_old_limit)
|
|
1110
1106
|
tree.delete_internal_nodes()
|
|
1111
|
-
|
|
1112
|
-
|
|
1107
|
+
# Dump outputs (peak memory, timings, config, cluster ids)
|
|
1108
|
+
if save_centroids:
|
|
1109
|
+
if variant != "lean":
|
|
1110
|
+
console.print("Can't save centroids for non-lean variants", style="red")
|
|
1111
|
+
with open(out_dir / "clusters.pkl", mode="wb") as f:
|
|
1112
|
+
pickle.dump(tree.get_cluster_mol_ids(), f)
|
|
1113
|
+
else:
|
|
1113
1114
|
output = tree.get_centroids_mol_ids()
|
|
1114
1115
|
with open(out_dir / "clusters.pkl", mode="wb") as f:
|
|
1115
1116
|
pickle.dump(output["mol_ids"], f)
|
|
1116
1117
|
with open(out_dir / "cluster-centroids-packed.pkl", mode="wb") as f:
|
|
1117
1118
|
pickle.dump(output["centroids"], f)
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1119
|
+
else:
|
|
1120
|
+
with open(out_dir / "clusters.pkl", mode="wb") as f:
|
|
1121
|
+
pickle.dump(tree.get_cluster_mol_ids(), f)
|
|
1121
1122
|
|
|
1122
1123
|
collect_system_specs_and_dump_config(ctx.params)
|
|
1123
1124
|
timer.dump(out_dir / "timings.json")
|
|
@@ -1284,8 +1285,7 @@ def _multiround(
|
|
|
1284
1285
|
monitor_rss: Annotated[
|
|
1285
1286
|
bool,
|
|
1286
1287
|
Option(
|
|
1287
|
-
"--monitor-mem",
|
|
1288
|
-
"--monitor-rss",
|
|
1288
|
+
"--monitor-mem/--no-monitor-mem",
|
|
1289
1289
|
help="Monitor RAM used by all processes",
|
|
1290
1290
|
rich_help_panel="Advanced",
|
|
1291
1291
|
),
|
|
@@ -1294,7 +1294,6 @@ def _multiround(
|
|
|
1294
1294
|
float,
|
|
1295
1295
|
Option(
|
|
1296
1296
|
"--monitor-mem-seconds",
|
|
1297
|
-
"--monitor-rss-seconds",
|
|
1298
1297
|
help="Interval in seconds for RAM monitoring",
|
|
1299
1298
|
rich_help_panel="Debug",
|
|
1300
1299
|
hidden=True,
|
|
@@ -69,7 +69,7 @@ uint32_t _popcount_1d(const py::array_t<uint8_t>& arr) {
|
|
|
69
69
|
#endif
|
|
70
70
|
uint32_t count{0}; // Output scalar
|
|
71
71
|
py::ssize_t steps = arr.shape(0);
|
|
72
|
-
if (is_8byte_aligned(arr)
|
|
72
|
+
if (is_8byte_aligned(arr) && (steps % 64 == 0)) {
|
|
73
73
|
#ifdef DEBUG_LOGS
|
|
74
74
|
py::print("DEBUG: _popcount_1d fn triggered uint64 + popcount 64");
|
|
75
75
|
#endif
|
|
@@ -110,7 +110,7 @@ py::array_t<uint32_t> _popcount_2d(const CArrayForcecast<uint8_t>& arr) {
|
|
|
110
110
|
print_8byte_alignment_check(arr);
|
|
111
111
|
#endif
|
|
112
112
|
py::ssize_t steps = arr.shape(1);
|
|
113
|
-
if (is_8byte_aligned(arr)
|
|
113
|
+
if (is_8byte_aligned(arr) && (steps % 64 == 0)) {
|
|
114
114
|
#ifdef DEBUG_LOGS
|
|
115
115
|
py::print("DEBUG: _popcount_2d fn triggered uint64 + popcount 64");
|
|
116
116
|
#endif
|
|
@@ -243,7 +243,7 @@ py::array_t<uint8_t> centroid_from_sum(const CArrayForcecast<T>& linear_sum,
|
|
|
243
243
|
}
|
|
244
244
|
}
|
|
245
245
|
|
|
246
|
-
if (
|
|
246
|
+
if (!pack) {
|
|
247
247
|
return centroid_unpacked;
|
|
248
248
|
}
|
|
249
249
|
|
|
@@ -351,7 +351,7 @@ py::array_t<double> jt_sim_packed_precalc_cardinalities(
|
|
|
351
351
|
}
|
|
352
352
|
auto out = py::array_t<double>(n_samples);
|
|
353
353
|
|
|
354
|
-
if (is_8byte_aligned(arr)
|
|
354
|
+
if (is_8byte_aligned(arr) && is_8byte_aligned(vec) &&
|
|
355
355
|
(n_features % 64 == 0)) {
|
|
356
356
|
#ifdef DEBUG_LOGS
|
|
357
357
|
py::print("DEBUG: jt_sim_packed fn triggered uint64 + popcount 64");
|
|
@@ -298,13 +298,8 @@ class _FinalTreeMergingRound(_TreeMergingRound):
|
|
|
298
298
|
|
|
299
299
|
# Save clusters and exit
|
|
300
300
|
if self.save_tree:
|
|
301
|
-
# TODO:
|
|
302
|
-
|
|
303
|
-
_old_limit = sys.getrecursionlimit()
|
|
304
|
-
sys.setrecursionlimit(100_000)
|
|
305
|
-
with open(self.out_dir / "bitbirch.pkl", mode="wb") as f:
|
|
306
|
-
pickle.dump(tree, f)
|
|
307
|
-
sys.setrecursionlimit(_old_limit)
|
|
301
|
+
# TODO: Find alternative solution
|
|
302
|
+
tree.save_pickle(self.out_dir / "bitbirch.pkl")
|
|
308
303
|
tree.delete_internal_nodes()
|
|
309
304
|
if self.save_centroids:
|
|
310
305
|
output = tree.get_centroids_mol_ids()
|
|
@@ -399,13 +399,17 @@ def dump_mol_images(
|
|
|
399
399
|
clusters: list[list[int]],
|
|
400
400
|
cluster_idx: int = 0,
|
|
401
401
|
batch_size: int = 30,
|
|
402
|
+
limit: int = -1,
|
|
402
403
|
) -> None:
|
|
403
404
|
r"""Dump smiles associated with a specific cluster as ``*.png`` image files"""
|
|
404
405
|
if isinstance(smiles, str):
|
|
405
406
|
smiles = [smiles]
|
|
406
407
|
smiles = np.asarray(smiles)
|
|
407
408
|
idxs = clusters[cluster_idx]
|
|
409
|
+
num = 0
|
|
408
410
|
for i, idx_seq in enumerate(batched(idxs, batch_size)):
|
|
411
|
+
if num + len(idx_seq) > limit:
|
|
412
|
+
idx_seq = idx_seq[: num + len(idx_seq) - limit]
|
|
409
413
|
mols = []
|
|
410
414
|
for smi in smiles[list(idx_seq)]:
|
|
411
415
|
mol = Chem.MolFromSmiles(smi)
|
|
@@ -415,6 +419,9 @@ def dump_mol_images(
|
|
|
415
419
|
img = Draw.MolsToGridImage(mols, molsPerRow=5)
|
|
416
420
|
with open(f"cluster_{cluster_idx}_{i}.png", "wb") as f:
|
|
417
421
|
f.write(img.data)
|
|
422
|
+
num += len(idx_seq)
|
|
423
|
+
if num >= limit:
|
|
424
|
+
break
|
|
418
425
|
|
|
419
426
|
|
|
420
427
|
# For internal use, dispatches a visualization workflow and optionally saves
|
|
@@ -131,8 +131,7 @@ class BitBirch(
|
|
|
131
131
|
.astype(np.uint8, copy=False)
|
|
132
132
|
.view(np.bool)
|
|
133
133
|
)
|
|
134
|
-
# TODO:
|
|
135
|
-
# I believe this may be a sklearn bug
|
|
134
|
+
# TODO: Due to a sklearn bug this performs unnecessary casts
|
|
136
135
|
centers = self.subcluster_centers_.astype(np.uint8, copy=False).view(np.bool)
|
|
137
136
|
argmin = pairwise_distances_argmin(X, centers, metric="jaccard")
|
|
138
137
|
return self.subcluster_labels_[argmin]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bblean
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.1b0
|
|
4
4
|
Summary: BitBirch-Lean Python package
|
|
5
5
|
Author: The Miranda-Quintana Lab and other BitBirch developers
|
|
6
6
|
Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>
|
|
@@ -85,14 +85,20 @@ tuning](https://mqcomplab.github.io/bblean/devdocs/user-guide/parameters.html) g
|
|
|
85
85
|
|
|
86
86
|
## Installation
|
|
87
87
|
|
|
88
|
-
BitBIRCH-Lean requires Python 3.11 or higher, and can be installed in Linux or
|
|
89
|
-
|
|
88
|
+
BitBIRCH-Lean requires Python 3.11 or higher, and can be installed in Windows, Linux or
|
|
89
|
+
macOS via pip, which automatically includes C++ extensions:
|
|
90
90
|
|
|
91
91
|
```bash
|
|
92
92
|
pip install bblean
|
|
93
|
+
# Alternatively you can use 'uv pip install'
|
|
94
|
+
bb --help
|
|
93
95
|
```
|
|
96
|
+
|
|
94
97
|
We recommend installing `bblean` in a conda environment or a `venv`.
|
|
95
98
|
|
|
99
|
+
Memory usage and C++ extensions are most optimized for Linux / macOS. We support windows
|
|
100
|
+
on a best-effort basis, some releases may not have Windows support.
|
|
101
|
+
|
|
96
102
|
### From source
|
|
97
103
|
|
|
98
104
|
To build from source instead (editable mode):
|
|
@@ -23,13 +23,15 @@ APPLE_SILICON = platform.machine().lower() in ["arm64", "aarch64"]
|
|
|
23
23
|
ARM = platform.machine().lower().startswith("arm") and not APPLE_SILICON
|
|
24
24
|
|
|
25
25
|
# Build C++ extensions (recommended)
|
|
26
|
+
extra_compile_args = []
|
|
26
27
|
if os.getenv("BITBIRCH_BUILD_CPP"):
|
|
27
28
|
import pybind11
|
|
28
29
|
from pybind11.setup_helpers import Pybind11Extension, WIN
|
|
29
30
|
|
|
30
31
|
# setuptools paths must be relative
|
|
31
32
|
ext_sources = [str((Path(name) / "csrc" / "similarity.cpp"))]
|
|
32
|
-
|
|
33
|
+
if not WIN:
|
|
34
|
+
extra_compile_args.append("-O3") # -O3 includes -ftree-vectorize
|
|
33
35
|
if not WIN:
|
|
34
36
|
if X86:
|
|
35
37
|
if os.getenv("BITBIRCH_BUILD_NATIVE"):
|
|
@@ -254,6 +254,7 @@ def test_multiround() -> None:
|
|
|
254
254
|
"--no-verbose",
|
|
255
255
|
"--set-mid-merge",
|
|
256
256
|
"tolerance-legacy",
|
|
257
|
+
"--no-monitor-mem",
|
|
257
258
|
],
|
|
258
259
|
)
|
|
259
260
|
with open(out_dir / "clusters.pkl", mode="rb") as f:
|
|
@@ -288,7 +289,18 @@ def test_run() -> None:
|
|
|
288
289
|
np.save(dir / "fingerprints.npy", fps)
|
|
289
290
|
out_dir = dir / "output"
|
|
290
291
|
result = runner.invoke(
|
|
291
|
-
app,
|
|
292
|
+
app,
|
|
293
|
+
[
|
|
294
|
+
"run",
|
|
295
|
+
str(dir),
|
|
296
|
+
"-o",
|
|
297
|
+
str(out_dir),
|
|
298
|
+
"-b",
|
|
299
|
+
"50",
|
|
300
|
+
"-t",
|
|
301
|
+
"0.65",
|
|
302
|
+
"--no-monitor-mem",
|
|
303
|
+
],
|
|
292
304
|
)
|
|
293
305
|
with open(out_dir / "clusters.pkl", mode="rb") as f:
|
|
294
306
|
obj = pickle.load(f)
|
|
@@ -1,11 +1,19 @@
|
|
|
1
1
|
import sys
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
2
4
|
import numpy as np
|
|
5
|
+
|
|
3
6
|
from bblean.bitbirch import BitBirch
|
|
4
7
|
from bblean.fingerprints import make_fake_fingerprints, unpack_fingerprints
|
|
5
8
|
from inline_snapshot import snapshot
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
def test_random_fps_consistency() -> None:
|
|
12
|
+
# TODO For some strage reason this test *fails on macOS and Windows*
|
|
13
|
+
# The kmeans implementation of sklearn seems to work different in linux and macOS
|
|
14
|
+
if sys.platform != "linux":
|
|
15
|
+
pytest.skip("Currently global clustering is non-deterministic on mac / windows")
|
|
16
|
+
|
|
9
17
|
fps = make_fake_fingerprints(3000, n_features=2048, seed=126205095409235, pack=True)
|
|
10
18
|
tree = BitBirch(branching_factor=50, threshold=0.65, merge_criterion="diameter")
|
|
11
19
|
tree.fit(fps, n_features=2048)
|
|
@@ -29,33 +37,31 @@ def test_random_fps_consistency() -> None:
|
|
|
29
37
|
[235, 255, 123, 255, 255],
|
|
30
38
|
]
|
|
31
39
|
)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
[
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
[
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
]
|
|
61
|
-
)
|
|
40
|
+
|
|
41
|
+
tree.global_clustering(
|
|
42
|
+
20,
|
|
43
|
+
method="kmeans",
|
|
44
|
+
n_init=1,
|
|
45
|
+
init=unpack_fingerprints(np.vstack(output_cent))[::2][:20],
|
|
46
|
+
max_iter=10,
|
|
47
|
+
)
|
|
48
|
+
output_mol_ids = tree.get_cluster_mol_ids(global_clusters=True, sort=False)
|
|
49
|
+
output_med = tree.get_medoids(fps, global_clusters=True, sort=False)
|
|
50
|
+
assert [o[:5] for o in output_mol_ids[:5]] == snapshot(
|
|
51
|
+
[
|
|
52
|
+
[16, 1023, 1793, 2, 15],
|
|
53
|
+
[1873, 1882, 1912, 1954, 1970],
|
|
54
|
+
[12, 1877, 1861, 2068, 2012],
|
|
55
|
+
[1560, 1901, 2065, 2037, 2396],
|
|
56
|
+
[62, 73, 75, 87, 121],
|
|
57
|
+
]
|
|
58
|
+
)
|
|
59
|
+
assert output_med[:5, :5].tolist() == snapshot(
|
|
60
|
+
[
|
|
61
|
+
[255, 127, 252, 111, 223],
|
|
62
|
+
[255, 255, 95, 255, 239],
|
|
63
|
+
[123, 239, 238, 135, 126],
|
|
64
|
+
[223, 14, 207, 187, 104],
|
|
65
|
+
[255, 255, 255, 247, 255],
|
|
66
|
+
]
|
|
67
|
+
)
|
|
@@ -77,7 +77,7 @@ def test_speed_regression(subtests) -> None:
|
|
|
77
77
|
# all_max_allowed_ns = [1_200_000_000, 1_900_000_000, 2_500_000_000]
|
|
78
78
|
# For the ubuntu-24.04 in gh CI the following are required:
|
|
79
79
|
if CSIM_AVAIL:
|
|
80
|
-
all_max_allowed_ns = [900_000_000,
|
|
80
|
+
all_max_allowed_ns = [900_000_000, 1_500_000_000, 2_000_000_000]
|
|
81
81
|
else:
|
|
82
82
|
all_max_allowed_ns = [1_700_000_000, 2_600_000_000, 3_600_000_000]
|
|
83
83
|
for fps_num, max_allowed_ns in zip(all_fps_nums, all_max_allowed_ns):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/user-guide/notebooks/bitbirch_best_practices.ipynb
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|