bblean 0.6.0b1__tar.gz → 0.6.1b0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {bblean-0.6.0b1 → bblean-0.6.1b0}/.github/workflows/ci-cpp.yaml +23 -0
  2. {bblean-0.6.0b1 → bblean-0.6.1b0}/.github/workflows/upload-to-pypi.yaml +7 -5
  3. {bblean-0.6.0b1 → bblean-0.6.1b0}/PKG-INFO +9 -3
  4. {bblean-0.6.0b1 → bblean-0.6.1b0}/README.md +8 -2
  5. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_legacy/bb_int64.py +2 -0
  6. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_version.py +2 -2
  7. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/bitbirch.py +36 -0
  8. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/cli.py +17 -18
  9. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/csrc/similarity.cpp +4 -4
  10. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/multiround.py +2 -7
  11. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/plotting.py +7 -0
  12. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/sklearn.py +1 -2
  13. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean.egg-info/PKG-INFO +9 -3
  14. {bblean-0.6.0b1 → bblean-0.6.1b0}/setup.py +3 -1
  15. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_cli.py +13 -1
  16. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_global_clustering.py +36 -30
  17. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_regression.py +1 -1
  18. {bblean-0.6.0b1 → bblean-0.6.1b0}/.cruft.json +0 -0
  19. {bblean-0.6.0b1 → bblean-0.6.1b0}/.flake8 +0 -0
  20. {bblean-0.6.0b1 → bblean-0.6.1b0}/.github/CODEOWNERS +0 -0
  21. {bblean-0.6.0b1 → bblean-0.6.1b0}/.github/workflows/ci.yaml +0 -0
  22. {bblean-0.6.0b1 → bblean-0.6.1b0}/.gitignore +0 -0
  23. {bblean-0.6.0b1 → bblean-0.6.1b0}/.pre-commit-config.yaml +0 -0
  24. {bblean-0.6.0b1 → bblean-0.6.1b0}/LICENSE +0 -0
  25. {bblean-0.6.0b1 → bblean-0.6.1b0}/LICENSES/BSD-3-Clause.txt +0 -0
  26. {bblean-0.6.0b1 → bblean-0.6.1b0}/LICENSES/GPL-3.0-only.txt +0 -0
  27. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/__init__.py +0 -0
  28. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_config.py +0 -0
  29. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_console.py +0 -0
  30. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_legacy/__init__.py +0 -0
  31. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_legacy/bb_uint8.py +0 -0
  32. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_memory.py +0 -0
  33. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_merges.py +0 -0
  34. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_py_similarity.py +0 -0
  35. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/_timer.py +0 -0
  36. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/analysis.py +0 -0
  37. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/csrc/README.md +0 -0
  38. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/fingerprints.py +0 -0
  39. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/metrics.py +0 -0
  40. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/similarity.py +0 -0
  41. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/smiles.py +0 -0
  42. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean/utils.py +0 -0
  43. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean-demo-v2.gif +0 -0
  44. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean-demo.cast +0 -0
  45. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean.egg-info/SOURCES.txt +0 -0
  46. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean.egg-info/dependency_links.txt +0 -0
  47. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean.egg-info/entry_points.txt +0 -0
  48. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean.egg-info/requires.txt +0 -0
  49. {bblean-0.6.0b1 → bblean-0.6.1b0}/bblean.egg-info/top_level.txt +0 -0
  50. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_static/api.svg +0 -0
  51. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_static/installing.svg +0 -0
  52. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_static/logo-dark-bw.svg +0 -0
  53. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_static/logo-light-bw.svg +0 -0
  54. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_static/publications.svg +0 -0
  55. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_static/style.css +0 -0
  56. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_static/user-guide.svg +0 -0
  57. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/_templates/module.rst +0 -0
  58. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/api-reference.rst +0 -0
  59. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/conf.py +0 -0
  60. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/index.rst +0 -0
  61. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/installing.rst +0 -0
  62. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/publications.rst +0 -0
  63. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/user-guide/linux_memory_setup.rst +0 -0
  64. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/user-guide/notebooks/bitbirch_best_practices.ipynb +0 -0
  65. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/user-guide/notebooks/bitbirch_quickstart.ipynb +0 -0
  66. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/user-guide/parameters.rst +0 -0
  67. {bblean-0.6.0b1 → bblean-0.6.1b0}/docs/src/user-guide.rst +0 -0
  68. {bblean-0.6.0b1 → bblean-0.6.1b0}/environment.yaml +0 -0
  69. {bblean-0.6.0b1 → bblean-0.6.1b0}/examples/biogen_logS.csv +0 -0
  70. {bblean-0.6.0b1 → bblean-0.6.1b0}/examples/bitbirch_best_practices.ipynb +0 -0
  71. {bblean-0.6.0b1 → bblean-0.6.1b0}/examples/bitbirch_quickstart.ipynb +0 -0
  72. {bblean-0.6.0b1 → bblean-0.6.1b0}/examples/chembl-33-natural-products-subset.smi +0 -0
  73. {bblean-0.6.0b1 → bblean-0.6.1b0}/examples/dataset_splitting.ipynb +0 -0
  74. {bblean-0.6.0b1 → bblean-0.6.1b0}/pyproject.toml +0 -0
  75. {bblean-0.6.0b1 → bblean-0.6.1b0}/setup.cfg +0 -0
  76. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/chembl-sample-3k.smi +0 -0
  77. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/chembl-sample-bad.smi +0 -0
  78. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/legacy_merges.py +0 -0
  79. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/legacy_metrics.py +0 -0
  80. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_bb_consistency.py +0 -0
  81. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_fake_fps.py +0 -0
  82. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_fingerprints.py +0 -0
  83. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_import_bblean.py +0 -0
  84. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_merges.py +0 -0
  85. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_metrics.py +0 -0
  86. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_multiround.py +0 -0
  87. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_refine.py +0 -0
  88. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_sampling.py +0 -0
  89. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_similarity.py +0 -0
  90. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_simple.py +0 -0
  91. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_sklearn.py +0 -0
  92. {bblean-0.6.0b1 → bblean-0.6.1b0}/tests/test_utils.py +0 -0
@@ -36,6 +36,29 @@ jobs:
36
36
  - name: run-pytest
37
37
  run: BITBIRCH_CANT_SKIP_CPP_TESTS=1 pytest -s ./tests/test_similarity.py
38
38
 
39
+ cpp-tests-win:
40
+ runs-on: windows-latest
41
+ steps:
42
+ - name: checkout-repo
43
+ uses: actions/checkout@v5
44
+
45
+ - name: setup-python
46
+ uses: actions/setup-python@v6
47
+ with:
48
+ python-version: ${{ env.PYTHON_VERSION }}
49
+
50
+ - name: install-repo
51
+ run: |
52
+ python -m pip install --upgrade pip
53
+ pip install pytest pytest-subtests inline-snapshot
54
+ $env:BITBIRCH_BUILD_CPP="1"
55
+ pip install --verbose .
56
+
57
+ - name: run-pytest
58
+ run: |
59
+ $env:BITBIRCH_CANT_SKIP_CPP_TESTS="1"
60
+ pytest -s ./tests/test_similarity.py
61
+
39
62
 
40
63
  # Ubuntu 24.04 for reproducibility in regression tests only
41
64
  # coverage makes tests slightly slower, so regression tests should not run with
@@ -15,15 +15,17 @@ on:
15
15
  required: false
16
16
  default: false
17
17
  type: boolean
18
+ release:
19
+ types: [published]
18
20
 
19
21
  env:
20
22
  PYTHON_VERSION: '3.11'
21
- SETUPTOOLS_SCM_PRETEND_VERSION: ${{ github.event.inputs.version }}
23
+ SETUPTOOLS_SCM_PRETEND_VERSION: ${{ github.event_name == 'release' && github.event.release.tag_name || github.event.inputs.version }}
22
24
  # cibuildwheel configuration:
23
25
  # Skip py 3.14, 32 bit and musllinux (Alpine) wheels
24
26
  CIBW_SKIP: "cp314-* cp314t-* *-manylinux_i686 *-win32 *-musllinux_*"
25
27
  CIBW_TEST_REQUIRES: pytest pytest-subtests inline-snapshot
26
- CIBW_TEST_COMMAND: pytest -k 'not regression' {project}/tests
28
+ CIBW_TEST_COMMAND: pytest -k "not regression" {project}/tests
27
29
  CIBW_PRERELEASE_PYTHONS: False
28
30
  CIBW_ENVIRONMENT: "SETUPTOOLS_SCM_PRETEND_VERSION=${{ github.event.inputs.version }} BITBIRCH_BUILD_CPP=1"
29
31
  # Build wheels that support both aarch64 and x86_64 on macOS
@@ -63,7 +65,7 @@ jobs:
63
65
  build_wheels:
64
66
  strategy:
65
67
  matrix:
66
- os: [ubuntu-latest, macos-latest]
68
+ os: [ubuntu-latest, macos-latest, windows-latest]
67
69
  name: build-${{ matrix.os }}-wheels
68
70
  runs-on: ${{ matrix.os }}
69
71
  steps:
@@ -93,7 +95,7 @@ jobs:
93
95
  publish_to_testpypi:
94
96
  needs: [build_wheels, make_sdist]
95
97
  runs-on: ubuntu-latest
96
- if: ${{ github.event.inputs.upload-testpypi == 'true' }}
98
+ if: ${{ github.event_name != 'release' && github.event.inputs.upload-testpypi }}
97
99
  environment:
98
100
  name: testpypi
99
101
  url: https://test.pypi.org/p/bblean
@@ -115,7 +117,7 @@ jobs:
115
117
  publish_to_pypi:
116
118
  needs: [build_wheels, make_sdist]
117
119
  runs-on: ubuntu-latest
118
- if: ${{ github.event.inputs.upload-pypi == 'true' }}
120
+ if: ${{ github.event_name == 'release' || github.event.inputs.upload-pypi }}
119
121
  environment:
120
122
  name: pypi
121
123
  url: https://pypi.org/p/bblean
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bblean
3
- Version: 0.6.0b1
3
+ Version: 0.6.1b0
4
4
  Summary: BitBirch-Lean Python package
5
5
  Author: The Miranda-Quintana Lab and other BitBirch developers
6
6
  Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>
@@ -85,14 +85,20 @@ tuning](https://mqcomplab.github.io/bblean/devdocs/user-guide/parameters.html) g
85
85
 
86
86
  ## Installation
87
87
 
88
- BitBIRCH-Lean requires Python 3.11 or higher, and can be installed in Linux or macOS.
89
- Via pip, which automatically includes C++ extensions:
88
+ BitBIRCH-Lean requires Python 3.11 or higher, and can be installed in Windows, Linux or
89
+ macOS via pip, which automatically includes C++ extensions:
90
90
 
91
91
  ```bash
92
92
  pip install bblean
93
+ # Alternatively you can use 'uv pip install'
94
+ bb --help
93
95
  ```
96
+
94
97
  We recommend installing `bblean` in a conda environment or a `venv`.
95
98
 
99
+ Memory usage and C++ extensions are most optimized for Linux / macOS. We support windows
100
+ on a best-effort basis, some releases may not have Windows support.
101
+
96
102
  ### From source
97
103
 
98
104
  To build from source instead (editable mode):
@@ -42,14 +42,20 @@ tuning](https://mqcomplab.github.io/bblean/devdocs/user-guide/parameters.html) g
42
42
 
43
43
  ## Installation
44
44
 
45
- BitBIRCH-Lean requires Python 3.11 or higher, and can be installed in Linux or macOS.
46
- Via pip, which automatically includes C++ extensions:
45
+ BitBIRCH-Lean requires Python 3.11 or higher, and can be installed in Windows, Linux or
46
+ macOS via pip, which automatically includes C++ extensions:
47
47
 
48
48
  ```bash
49
49
  pip install bblean
50
+ # Alternatively you can use 'uv pip install'
51
+ bb --help
50
52
  ```
53
+
51
54
  We recommend installing `bblean` in a conda environment or a `venv`.
52
55
 
56
+ Memory usage and C++ extensions are most optimized for Linux / macOS. We support windows
57
+ on a best-effort basis, some releases may not have Windows support.
58
+
53
59
  ### From source
54
60
 
55
61
  To build from source instead (editable mode):
@@ -633,6 +633,7 @@ class BitBirch:
633
633
  X = X[:max_fps]
634
634
  threshold = self.threshold
635
635
  branching_factor = self.branching_factor
636
+
636
637
  n_features = _validate_n_features(X, input_is_packed, n_features)
637
638
  d_type = X.dtype
638
639
 
@@ -718,6 +719,7 @@ class BitBirch:
718
719
  """
719
720
  threshold = self.threshold
720
721
  branching_factor = self.branching_factor
722
+
721
723
  n_features = _validate_n_features(X, input_is_packed, n_features)
722
724
  d_type = X.dtype
723
725
 
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.6.0b1'
32
- __version_tuple__ = version_tuple = (0, 6, 0, 'b1')
31
+ __version__ = version = '0.6.1b0'
32
+ __version_tuple__ = version_tuple = (0, 6, 1, 'b0')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -47,6 +47,8 @@
47
47
  # ./LICENSES/GPL-3.0-only.txt. If not, see <http://www.gnu.org/licenses/gpl-3.0.html>.
48
48
  r"""BitBirch 'Lean' class for fast, memory-efficient O(N) clustering"""
49
49
  from __future__ import annotations # Stringize type annotations for no runtime overhead
50
+ import pickle
51
+ import sys
50
52
  import typing_extensions as tpx
51
53
  import os
52
54
  import random
@@ -1316,6 +1318,40 @@ class BitBirch:
1316
1318
  parts.append(f"tolerance={self.tolerance}")
1317
1319
  return f"{self.__class__.__name__}({', '.join(parts)})"
1318
1320
 
1321
+ def save(self, path: Path | str) -> None:
1322
+ r""":meta private:"""
1323
+ # TODO: BitBIRCH is highly recursive. pickling may crash python,
1324
+ # an alternative solution would be better
1325
+ msg = (
1326
+ "Saving large BitBIRCH trees may result in large memory peaks."
1327
+ " An alternative serialization method may be implemented in the future"
1328
+ )
1329
+ warnings.warn(msg)
1330
+ _old_limit = sys.getrecursionlimit()
1331
+ sys.setrecursionlimit(1_000_000_000)
1332
+ with open(path, mode="wb") as f:
1333
+ pickle.dump(self, f)
1334
+ sys.setrecursionlimit(_old_limit)
1335
+
1336
+ @classmethod
1337
+ def load(cls, path: Path | str) -> tpx.Self:
1338
+ r""":meta private:"""
1339
+ # TODO: BitBIRCH is highly recursive. pickling may crash python,
1340
+ # an alternative solution would be better
1341
+ msg = (
1342
+ "Loading large BitBIRCH trees may result in large memory peaks."
1343
+ " An alternative serialization method may be implemented in the future"
1344
+ )
1345
+ warnings.warn(msg)
1346
+ _old_limit = sys.getrecursionlimit()
1347
+ sys.setrecursionlimit(1_000_000_000)
1348
+ with open(path, mode="rb") as f:
1349
+ tree = pickle.load(f)
1350
+ sys.setrecursionlimit(_old_limit)
1351
+ if not isinstance(tree, cls):
1352
+ raise ValueError("Path does not contain a bitbirch object")
1353
+ return tree
1354
+
1319
1355
  def global_clustering(
1320
1356
  self,
1321
1357
  n_clusters: int,
@@ -957,7 +957,6 @@ def _run(
957
957
  bool,
958
958
  Option(
959
959
  "--monitor-mem/--no-monitor-mem",
960
- "--monitor-rss/--no-monitor-rss",
961
960
  help="Monitor RAM used by all processes",
962
961
  rich_help_panel="Advanced",
963
962
  ),
@@ -966,7 +965,6 @@ def _run(
966
965
  float,
967
966
  Option(
968
967
  "--monitor-mem-seconds",
969
- "--monitor-rss-seconds",
970
968
  help="Interval in seconds for RAM monitoring",
971
969
  rich_help_panel="Debug",
972
970
  hidden=True,
@@ -1098,26 +1096,29 @@ def _run(
1098
1096
 
1099
1097
  timer.end_timing("total", console, indent=False)
1100
1098
  console.print_peak_mem(out_dir, indent=False)
1099
+ if save_tree:
1100
+ if variant != "lean":
1101
+ console.print("Can't save tree for non-lean variants", style="red")
1102
+ else:
1103
+ # TODO: Find alternative solution
1104
+ tree.save_pickle(out_dir / "bitbirch.pkl")
1101
1105
  if variant == "lean":
1102
- if save_tree:
1103
- # TODO: BitBIRCH is highly recursive. pickling may crash python,
1104
- # an alternative solution would be better
1105
- _old_limit = sys.getrecursionlimit()
1106
- sys.setrecursionlimit(100_000)
1107
- with open(out_dir / "bitbirch.pkl", mode="wb") as f:
1108
- pickle.dump(tree, f)
1109
- sys.setrecursionlimit(_old_limit)
1110
1106
  tree.delete_internal_nodes()
1111
- # Dump outputs (peak memory, timings, config, cluster ids)
1112
- if save_centroids:
1107
+ # Dump outputs (peak memory, timings, config, cluster ids)
1108
+ if save_centroids:
1109
+ if variant != "lean":
1110
+ console.print("Can't save centroids for non-lean variants", style="red")
1111
+ with open(out_dir / "clusters.pkl", mode="wb") as f:
1112
+ pickle.dump(tree.get_cluster_mol_ids(), f)
1113
+ else:
1113
1114
  output = tree.get_centroids_mol_ids()
1114
1115
  with open(out_dir / "clusters.pkl", mode="wb") as f:
1115
1116
  pickle.dump(output["mol_ids"], f)
1116
1117
  with open(out_dir / "cluster-centroids-packed.pkl", mode="wb") as f:
1117
1118
  pickle.dump(output["centroids"], f)
1118
- else:
1119
- with open(out_dir / "clusters.pkl", mode="wb") as f:
1120
- pickle.dump(tree.get_cluster_mol_ids(), f)
1119
+ else:
1120
+ with open(out_dir / "clusters.pkl", mode="wb") as f:
1121
+ pickle.dump(tree.get_cluster_mol_ids(), f)
1121
1122
 
1122
1123
  collect_system_specs_and_dump_config(ctx.params)
1123
1124
  timer.dump(out_dir / "timings.json")
@@ -1284,8 +1285,7 @@ def _multiround(
1284
1285
  monitor_rss: Annotated[
1285
1286
  bool,
1286
1287
  Option(
1287
- "--monitor-mem",
1288
- "--monitor-rss",
1288
+ "--monitor-mem/--no-monitor-mem",
1289
1289
  help="Monitor RAM used by all processes",
1290
1290
  rich_help_panel="Advanced",
1291
1291
  ),
@@ -1294,7 +1294,6 @@ def _multiround(
1294
1294
  float,
1295
1295
  Option(
1296
1296
  "--monitor-mem-seconds",
1297
- "--monitor-rss-seconds",
1298
1297
  help="Interval in seconds for RAM monitoring",
1299
1298
  rich_help_panel="Debug",
1300
1299
  hidden=True,
@@ -69,7 +69,7 @@ uint32_t _popcount_1d(const py::array_t<uint8_t>& arr) {
69
69
  #endif
70
70
  uint32_t count{0}; // Output scalar
71
71
  py::ssize_t steps = arr.shape(0);
72
- if (is_8byte_aligned(arr) and (steps % 64 == 0)) {
72
+ if (is_8byte_aligned(arr) && (steps % 64 == 0)) {
73
73
  #ifdef DEBUG_LOGS
74
74
  py::print("DEBUG: _popcount_1d fn triggered uint64 + popcount 64");
75
75
  #endif
@@ -110,7 +110,7 @@ py::array_t<uint32_t> _popcount_2d(const CArrayForcecast<uint8_t>& arr) {
110
110
  print_8byte_alignment_check(arr);
111
111
  #endif
112
112
  py::ssize_t steps = arr.shape(1);
113
- if (is_8byte_aligned(arr) and (steps % 64 == 0)) {
113
+ if (is_8byte_aligned(arr) && (steps % 64 == 0)) {
114
114
  #ifdef DEBUG_LOGS
115
115
  py::print("DEBUG: _popcount_2d fn triggered uint64 + popcount 64");
116
116
  #endif
@@ -243,7 +243,7 @@ py::array_t<uint8_t> centroid_from_sum(const CArrayForcecast<T>& linear_sum,
243
243
  }
244
244
  }
245
245
 
246
- if (not pack) {
246
+ if (!pack) {
247
247
  return centroid_unpacked;
248
248
  }
249
249
 
@@ -351,7 +351,7 @@ py::array_t<double> jt_sim_packed_precalc_cardinalities(
351
351
  }
352
352
  auto out = py::array_t<double>(n_samples);
353
353
 
354
- if (is_8byte_aligned(arr) and is_8byte_aligned(vec) and
354
+ if (is_8byte_aligned(arr) && is_8byte_aligned(vec) &&
355
355
  (n_features % 64 == 0)) {
356
356
  #ifdef DEBUG_LOGS
357
357
  py::print("DEBUG: jt_sim_packed fn triggered uint64 + popcount 64");
@@ -298,13 +298,8 @@ class _FinalTreeMergingRound(_TreeMergingRound):
298
298
 
299
299
  # Save clusters and exit
300
300
  if self.save_tree:
301
- # TODO: BitBIRCH is highly recursive. pickling may crash python,
302
- # an alternative solution would be better
303
- _old_limit = sys.getrecursionlimit()
304
- sys.setrecursionlimit(100_000)
305
- with open(self.out_dir / "bitbirch.pkl", mode="wb") as f:
306
- pickle.dump(tree, f)
307
- sys.setrecursionlimit(_old_limit)
301
+ # TODO: Find alternative solution
302
+ tree.save_pickle(self.out_dir / "bitbirch.pkl")
308
303
  tree.delete_internal_nodes()
309
304
  if self.save_centroids:
310
305
  output = tree.get_centroids_mol_ids()
@@ -399,13 +399,17 @@ def dump_mol_images(
399
399
  clusters: list[list[int]],
400
400
  cluster_idx: int = 0,
401
401
  batch_size: int = 30,
402
+ limit: int = -1,
402
403
  ) -> None:
403
404
  r"""Dump smiles associated with a specific cluster as ``*.png`` image files"""
404
405
  if isinstance(smiles, str):
405
406
  smiles = [smiles]
406
407
  smiles = np.asarray(smiles)
407
408
  idxs = clusters[cluster_idx]
409
+ num = 0
408
410
  for i, idx_seq in enumerate(batched(idxs, batch_size)):
411
+ if num + len(idx_seq) > limit:
412
+ idx_seq = idx_seq[: num + len(idx_seq) - limit]
409
413
  mols = []
410
414
  for smi in smiles[list(idx_seq)]:
411
415
  mol = Chem.MolFromSmiles(smi)
@@ -415,6 +419,9 @@ def dump_mol_images(
415
419
  img = Draw.MolsToGridImage(mols, molsPerRow=5)
416
420
  with open(f"cluster_{cluster_idx}_{i}.png", "wb") as f:
417
421
  f.write(img.data)
422
+ num += len(idx_seq)
423
+ if num >= limit:
424
+ break
418
425
 
419
426
 
420
427
  # For internal use, dispatches a visualization workflow and optionally saves
@@ -131,8 +131,7 @@ class BitBirch(
131
131
  .astype(np.uint8, copy=False)
132
132
  .view(np.bool)
133
133
  )
134
- # TODO: Even when both inputs are bool, this function warns for some reason
135
- # I believe this may be a sklearn bug
134
+ # TODO: Due to a sklearn bug this performs unnecessary casts
136
135
  centers = self.subcluster_centers_.astype(np.uint8, copy=False).view(np.bool)
137
136
  argmin = pairwise_distances_argmin(X, centers, metric="jaccard")
138
137
  return self.subcluster_labels_[argmin]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bblean
3
- Version: 0.6.0b1
3
+ Version: 0.6.1b0
4
4
  Summary: BitBirch-Lean Python package
5
5
  Author: The Miranda-Quintana Lab and other BitBirch developers
6
6
  Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>
@@ -85,14 +85,20 @@ tuning](https://mqcomplab.github.io/bblean/devdocs/user-guide/parameters.html) g
85
85
 
86
86
  ## Installation
87
87
 
88
- BitBIRCH-Lean requires Python 3.11 or higher, and can be installed in Linux or macOS.
89
- Via pip, which automatically includes C++ extensions:
88
+ BitBIRCH-Lean requires Python 3.11 or higher, and can be installed in Windows, Linux or
89
+ macOS via pip, which automatically includes C++ extensions:
90
90
 
91
91
  ```bash
92
92
  pip install bblean
93
+ # Alternatively you can use 'uv pip install'
94
+ bb --help
93
95
  ```
96
+
94
97
  We recommend installing `bblean` in a conda environment or a `venv`.
95
98
 
99
+ Memory usage and C++ extensions are most optimized for Linux / macOS. We support windows
100
+ on a best-effort basis, some releases may not have Windows support.
101
+
96
102
  ### From source
97
103
 
98
104
  To build from source instead (editable mode):
@@ -23,13 +23,15 @@ APPLE_SILICON = platform.machine().lower() in ["arm64", "aarch64"]
23
23
  ARM = platform.machine().lower().startswith("arm") and not APPLE_SILICON
24
24
 
25
25
  # Build C++ extensions (recommended)
26
+ extra_compile_args = []
26
27
  if os.getenv("BITBIRCH_BUILD_CPP"):
27
28
  import pybind11
28
29
  from pybind11.setup_helpers import Pybind11Extension, WIN
29
30
 
30
31
  # setuptools paths must be relative
31
32
  ext_sources = [str((Path(name) / "csrc" / "similarity.cpp"))]
32
- extra_compile_args = ["-O3"] # -O3 includes -ftree-vectorize
33
+ if not WIN:
34
+ extra_compile_args.append("-O3") # -O3 includes -ftree-vectorize
33
35
  if not WIN:
34
36
  if X86:
35
37
  if os.getenv("BITBIRCH_BUILD_NATIVE"):
@@ -254,6 +254,7 @@ def test_multiround() -> None:
254
254
  "--no-verbose",
255
255
  "--set-mid-merge",
256
256
  "tolerance-legacy",
257
+ "--no-monitor-mem",
257
258
  ],
258
259
  )
259
260
  with open(out_dir / "clusters.pkl", mode="rb") as f:
@@ -288,7 +289,18 @@ def test_run() -> None:
288
289
  np.save(dir / "fingerprints.npy", fps)
289
290
  out_dir = dir / "output"
290
291
  result = runner.invoke(
291
- app, ["run", str(dir), "-o", str(out_dir), "-b", "50", "-t", "0.65"]
292
+ app,
293
+ [
294
+ "run",
295
+ str(dir),
296
+ "-o",
297
+ str(out_dir),
298
+ "-b",
299
+ "50",
300
+ "-t",
301
+ "0.65",
302
+ "--no-monitor-mem",
303
+ ],
292
304
  )
293
305
  with open(out_dir / "clusters.pkl", mode="rb") as f:
294
306
  obj = pickle.load(f)
@@ -1,11 +1,19 @@
1
1
  import sys
2
+
3
+ import pytest
2
4
  import numpy as np
5
+
3
6
  from bblean.bitbirch import BitBirch
4
7
  from bblean.fingerprints import make_fake_fingerprints, unpack_fingerprints
5
8
  from inline_snapshot import snapshot
6
9
 
7
10
 
8
11
  def test_random_fps_consistency() -> None:
12
+ # TODO For some strage reason this test *fails on macOS and Windows*
13
+ # The kmeans implementation of sklearn seems to work different in linux and macOS
14
+ if sys.platform != "linux":
15
+ pytest.skip("Currently global clustering is non-deterministic on mac / windows")
16
+
9
17
  fps = make_fake_fingerprints(3000, n_features=2048, seed=126205095409235, pack=True)
10
18
  tree = BitBirch(branching_factor=50, threshold=0.65, merge_criterion="diameter")
11
19
  tree.fit(fps, n_features=2048)
@@ -29,33 +37,31 @@ def test_random_fps_consistency() -> None:
29
37
  [235, 255, 123, 255, 255],
30
38
  ]
31
39
  )
32
- # TODO For some strage reason this test *fails on macOS*
33
- # The kmeans implementation of sklearn seems to work different in linux and macOS
34
- if sys.platform != "darwin":
35
- tree.global_clustering(
36
- 20,
37
- method="kmeans",
38
- n_init=1,
39
- init=unpack_fingerprints(np.vstack(output_cent))[::2][:20],
40
- max_iter=10,
41
- )
42
- output_mol_ids = tree.get_cluster_mol_ids(global_clusters=True, sort=False)
43
- output_med = tree.get_medoids(fps, global_clusters=True, sort=False)
44
- assert [o[:5] for o in output_mol_ids[:5]] == snapshot(
45
- [
46
- [16, 1023, 1793, 2, 15],
47
- [1873, 1882, 1912, 1954, 1970],
48
- [12, 1877, 1861, 2068, 2012],
49
- [1560, 1901, 2065, 2037, 2396],
50
- [62, 73, 75, 87, 121],
51
- ]
52
- )
53
- assert output_med[:5, :5].tolist() == snapshot(
54
- [
55
- [255, 127, 252, 111, 223],
56
- [255, 255, 95, 255, 239],
57
- [123, 239, 238, 135, 126],
58
- [223, 14, 207, 187, 104],
59
- [255, 255, 255, 247, 255],
60
- ]
61
- )
40
+
41
+ tree.global_clustering(
42
+ 20,
43
+ method="kmeans",
44
+ n_init=1,
45
+ init=unpack_fingerprints(np.vstack(output_cent))[::2][:20],
46
+ max_iter=10,
47
+ )
48
+ output_mol_ids = tree.get_cluster_mol_ids(global_clusters=True, sort=False)
49
+ output_med = tree.get_medoids(fps, global_clusters=True, sort=False)
50
+ assert [o[:5] for o in output_mol_ids[:5]] == snapshot(
51
+ [
52
+ [16, 1023, 1793, 2, 15],
53
+ [1873, 1882, 1912, 1954, 1970],
54
+ [12, 1877, 1861, 2068, 2012],
55
+ [1560, 1901, 2065, 2037, 2396],
56
+ [62, 73, 75, 87, 121],
57
+ ]
58
+ )
59
+ assert output_med[:5, :5].tolist() == snapshot(
60
+ [
61
+ [255, 127, 252, 111, 223],
62
+ [255, 255, 95, 255, 239],
63
+ [123, 239, 238, 135, 126],
64
+ [223, 14, 207, 187, 104],
65
+ [255, 255, 255, 247, 255],
66
+ ]
67
+ )
@@ -77,7 +77,7 @@ def test_speed_regression(subtests) -> None:
77
77
  # all_max_allowed_ns = [1_200_000_000, 1_900_000_000, 2_500_000_000]
78
78
  # For the ubuntu-24.04 in gh CI the following are required:
79
79
  if CSIM_AVAIL:
80
- all_max_allowed_ns = [900_000_000, 1_400_000_000, 2_000_000_000]
80
+ all_max_allowed_ns = [900_000_000, 1_500_000_000, 2_000_000_000]
81
81
  else:
82
82
  all_max_allowed_ns = [1_700_000_000, 2_600_000_000, 3_600_000_000]
83
83
  for fps_num, max_allowed_ns in zip(all_fps_nums, all_max_allowed_ns):
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes