bblean 0.7.5__tar.gz → 0.7.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. {bblean-0.7.5 → bblean-0.7.7}/PKG-INFO +1 -1
  2. {bblean-0.7.5 → bblean-0.7.7}/bblean/_version.py +2 -2
  3. {bblean-0.7.5 → bblean-0.7.7}/bblean/bitbirch.py +18 -7
  4. {bblean-0.7.5 → bblean-0.7.7}/bblean/cli.py +10 -2
  5. {bblean-0.7.5 → bblean-0.7.7}/bblean/smiles.py +25 -4
  6. {bblean-0.7.5 → bblean-0.7.7}/bblean.egg-info/PKG-INFO +1 -1
  7. {bblean-0.7.5 → bblean-0.7.7}/.cruft.json +0 -0
  8. {bblean-0.7.5 → bblean-0.7.7}/.flake8 +0 -0
  9. {bblean-0.7.5 → bblean-0.7.7}/.github/CODEOWNERS +0 -0
  10. {bblean-0.7.5 → bblean-0.7.7}/.github/workflows/ci-cpp.yaml +0 -0
  11. {bblean-0.7.5 → bblean-0.7.7}/.github/workflows/ci.yaml +0 -0
  12. {bblean-0.7.5 → bblean-0.7.7}/.github/workflows/upload-to-pypi.yaml +0 -0
  13. {bblean-0.7.5 → bblean-0.7.7}/.gitignore +0 -0
  14. {bblean-0.7.5 → bblean-0.7.7}/.pre-commit-config.yaml +0 -0
  15. {bblean-0.7.5 → bblean-0.7.7}/LICENSE +0 -0
  16. {bblean-0.7.5 → bblean-0.7.7}/LICENSES/BSD-3-Clause.txt +0 -0
  17. {bblean-0.7.5 → bblean-0.7.7}/LICENSES/GPL-3.0-only.txt +0 -0
  18. {bblean-0.7.5 → bblean-0.7.7}/README.md +0 -0
  19. {bblean-0.7.5 → bblean-0.7.7}/bblean/__init__.py +0 -0
  20. {bblean-0.7.5 → bblean-0.7.7}/bblean/_config.py +0 -0
  21. {bblean-0.7.5 → bblean-0.7.7}/bblean/_console.py +0 -0
  22. {bblean-0.7.5 → bblean-0.7.7}/bblean/_legacy/__init__.py +0 -0
  23. {bblean-0.7.5 → bblean-0.7.7}/bblean/_legacy/bb_int64.py +0 -0
  24. {bblean-0.7.5 → bblean-0.7.7}/bblean/_legacy/bb_uint8.py +0 -0
  25. {bblean-0.7.5 → bblean-0.7.7}/bblean/_memory.py +0 -0
  26. {bblean-0.7.5 → bblean-0.7.7}/bblean/_merges.py +0 -0
  27. {bblean-0.7.5 → bblean-0.7.7}/bblean/_py_similarity.py +0 -0
  28. {bblean-0.7.5 → bblean-0.7.7}/bblean/_timer.py +0 -0
  29. {bblean-0.7.5 → bblean-0.7.7}/bblean/analysis.py +0 -0
  30. {bblean-0.7.5 → bblean-0.7.7}/bblean/csrc/README.md +0 -0
  31. {bblean-0.7.5 → bblean-0.7.7}/bblean/csrc/similarity.cpp +0 -0
  32. {bblean-0.7.5 → bblean-0.7.7}/bblean/fingerprints.py +0 -0
  33. {bblean-0.7.5 → bblean-0.7.7}/bblean/metrics.py +0 -0
  34. {bblean-0.7.5 → bblean-0.7.7}/bblean/multiround.py +0 -0
  35. {bblean-0.7.5 → bblean-0.7.7}/bblean/plotting.py +0 -0
  36. {bblean-0.7.5 → bblean-0.7.7}/bblean/similarity.py +0 -0
  37. {bblean-0.7.5 → bblean-0.7.7}/bblean/sklearn.py +0 -0
  38. {bblean-0.7.5 → bblean-0.7.7}/bblean/utils.py +0 -0
  39. {bblean-0.7.5 → bblean-0.7.7}/bblean-demo-v2.gif +0 -0
  40. {bblean-0.7.5 → bblean-0.7.7}/bblean-demo.cast +0 -0
  41. {bblean-0.7.5 → bblean-0.7.7}/bblean.egg-info/SOURCES.txt +0 -0
  42. {bblean-0.7.5 → bblean-0.7.7}/bblean.egg-info/dependency_links.txt +0 -0
  43. {bblean-0.7.5 → bblean-0.7.7}/bblean.egg-info/entry_points.txt +0 -0
  44. {bblean-0.7.5 → bblean-0.7.7}/bblean.egg-info/requires.txt +0 -0
  45. {bblean-0.7.5 → bblean-0.7.7}/bblean.egg-info/top_level.txt +0 -0
  46. {bblean-0.7.5 → bblean-0.7.7}/docs/src/_static/api.svg +0 -0
  47. {bblean-0.7.5 → bblean-0.7.7}/docs/src/_static/installing.svg +0 -0
  48. {bblean-0.7.5 → bblean-0.7.7}/docs/src/_static/logo-dark-bw.svg +0 -0
  49. {bblean-0.7.5 → bblean-0.7.7}/docs/src/_static/logo-light-bw.svg +0 -0
  50. {bblean-0.7.5 → bblean-0.7.7}/docs/src/_static/publications.svg +0 -0
  51. {bblean-0.7.5 → bblean-0.7.7}/docs/src/_static/style.css +0 -0
  52. {bblean-0.7.5 → bblean-0.7.7}/docs/src/_static/user-guide.svg +0 -0
  53. {bblean-0.7.5 → bblean-0.7.7}/docs/src/_templates/module.rst +0 -0
  54. {bblean-0.7.5 → bblean-0.7.7}/docs/src/api-reference.rst +0 -0
  55. {bblean-0.7.5 → bblean-0.7.7}/docs/src/conf.py +0 -0
  56. {bblean-0.7.5 → bblean-0.7.7}/docs/src/index.rst +0 -0
  57. {bblean-0.7.5 → bblean-0.7.7}/docs/src/installing.rst +0 -0
  58. {bblean-0.7.5 → bblean-0.7.7}/docs/src/publications.rst +0 -0
  59. {bblean-0.7.5 → bblean-0.7.7}/docs/src/user-guide/linux_memory_setup.rst +0 -0
  60. {bblean-0.7.5 → bblean-0.7.7}/docs/src/user-guide/notebooks/bitbirch_best_practices.ipynb +0 -0
  61. {bblean-0.7.5 → bblean-0.7.7}/docs/src/user-guide/notebooks/bitbirch_quickstart.ipynb +0 -0
  62. {bblean-0.7.5 → bblean-0.7.7}/docs/src/user-guide/parameters.rst +0 -0
  63. {bblean-0.7.5 → bblean-0.7.7}/docs/src/user-guide.rst +0 -0
  64. {bblean-0.7.5 → bblean-0.7.7}/environment.yaml +0 -0
  65. {bblean-0.7.5 → bblean-0.7.7}/examples/best_practices/best_practices_functions.py +0 -0
  66. {bblean-0.7.5 → bblean-0.7.7}/examples/best_practices/best_practices_plots.py +0 -0
  67. {bblean-0.7.5 → bblean-0.7.7}/examples/best_practices/bitbirch_best_practices.ipynb +0 -0
  68. {bblean-0.7.5 → bblean-0.7.7}/examples/best_practices/bitbirch_best_practices_RDKit.ipynb +0 -0
  69. {bblean-0.7.5 → bblean-0.7.7}/examples/best_practices/bitbirch_parameter.ipynb +0 -0
  70. {bblean-0.7.5 → bblean-0.7.7}/examples/biogen_logS.csv +0 -0
  71. {bblean-0.7.5 → bblean-0.7.7}/examples/bitbirch_best_practices.ipynb +0 -0
  72. {bblean-0.7.5 → bblean-0.7.7}/examples/bitbirch_quickstart.ipynb +0 -0
  73. {bblean-0.7.5 → bblean-0.7.7}/examples/chembl-33-natural-products-subset.smi +0 -0
  74. {bblean-0.7.5 → bblean-0.7.7}/examples/dataset_splitting.ipynb +0 -0
  75. {bblean-0.7.5 → bblean-0.7.7}/pyproject.toml +0 -0
  76. {bblean-0.7.5 → bblean-0.7.7}/setup.cfg +0 -0
  77. {bblean-0.7.5 → bblean-0.7.7}/setup.py +0 -0
  78. {bblean-0.7.5 → bblean-0.7.7}/tests/chembl-sample-3k.smi +0 -0
  79. {bblean-0.7.5 → bblean-0.7.7}/tests/chembl-sample-bad.smi +0 -0
  80. {bblean-0.7.5 → bblean-0.7.7}/tests/legacy_merges.py +0 -0
  81. {bblean-0.7.5 → bblean-0.7.7}/tests/legacy_metrics.py +0 -0
  82. {bblean-0.7.5 → bblean-0.7.7}/tests/test_bb_consistency.py +0 -0
  83. {bblean-0.7.5 → bblean-0.7.7}/tests/test_cli.py +0 -0
  84. {bblean-0.7.5 → bblean-0.7.7}/tests/test_fake_fps.py +0 -0
  85. {bblean-0.7.5 → bblean-0.7.7}/tests/test_fingerprints.py +0 -0
  86. {bblean-0.7.5 → bblean-0.7.7}/tests/test_global_clustering.py +0 -0
  87. {bblean-0.7.5 → bblean-0.7.7}/tests/test_import_bblean.py +0 -0
  88. {bblean-0.7.5 → bblean-0.7.7}/tests/test_merges.py +0 -0
  89. {bblean-0.7.5 → bblean-0.7.7}/tests/test_metrics.py +0 -0
  90. {bblean-0.7.5 → bblean-0.7.7}/tests/test_multiround.py +0 -0
  91. {bblean-0.7.5 → bblean-0.7.7}/tests/test_refine.py +0 -0
  92. {bblean-0.7.5 → bblean-0.7.7}/tests/test_regression.py +0 -0
  93. {bblean-0.7.5 → bblean-0.7.7}/tests/test_sampling.py +0 -0
  94. {bblean-0.7.5 → bblean-0.7.7}/tests/test_similarity.py +0 -0
  95. {bblean-0.7.5 → bblean-0.7.7}/tests/test_simple.py +0 -0
  96. {bblean-0.7.5 → bblean-0.7.7}/tests/test_sklearn.py +0 -0
  97. {bblean-0.7.5 → bblean-0.7.7}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bblean
3
- Version: 0.7.5
3
+ Version: 0.7.7
4
4
  Summary: BitBirch-Lean Python package
5
5
  Author: The Miranda-Quintana Lab and other BitBirch developers
6
6
  Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.7.5'
32
- __version_tuple__ = version_tuple = (0, 7, 5)
31
+ __version__ = version = '0.7.7'
32
+ __version_tuple__ = version_tuple = (0, 7, 7)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -532,6 +532,7 @@ class _CentroidsMolIds(tp.TypedDict):
532
532
 
533
533
 
534
534
  class _MedoidsMolIds(tp.TypedDict):
535
+ medoid_idxs: NDArray[np.int64]
535
536
  medoids: NDArray[np.uint8]
536
537
  mol_ids: list[list[int]]
537
538
 
@@ -926,32 +927,42 @@ class BitBirch:
926
927
  input_is_packed: bool = True,
927
928
  n_features: int | None = None,
928
929
  ) -> _MedoidsMolIds:
929
- """Get a dict with medoids and mol indices of the leaves"""
930
+ r"""Get a dict with medoid idxs, medoids and mol indices of the leaves
931
+
932
+ The medoid indices are indices into the cluster mol ids, not into the fps array
933
+ """
930
934
  cluster_members = self.get_cluster_mol_ids(
931
935
  sort=sort, global_clusters=global_clusters
932
936
  )
933
937
 
934
938
  if input_is_packed:
935
939
  fps = _unpack_fingerprints(fps, n_features=n_features)
936
- cluster_medoids = self._unpacked_medoids_from_members(fps, cluster_members)
940
+ cluster_medoid_idxs, cluster_medoids = self._unpacked_medoids_from_members(
941
+ fps, cluster_members
942
+ )
937
943
  if pack:
938
944
  cluster_medoids = pack_fingerprints(cluster_medoids)
939
- return {"medoids": cluster_medoids, "mol_ids": cluster_members}
945
+ return {
946
+ "medoid_idxs": cluster_medoid_idxs,
947
+ "medoids": cluster_medoids,
948
+ "mol_ids": cluster_members,
949
+ }
940
950
 
941
951
  @staticmethod
942
952
  def _unpacked_medoids_from_members(
943
953
  unpacked_fps: NDArray[np.uint8], cluster_members: tp.Sequence[list[int]]
944
- ) -> NDArray[np.uint8]:
954
+ ) -> tuple[NDArray[np.int64], NDArray[np.uint8]]:
945
955
  cluster_medoids = np.zeros(
946
956
  (len(cluster_members), unpacked_fps.shape[1]), dtype=np.uint8
947
957
  )
958
+ cluster_medoid_idxs = np.zeros((len(cluster_members),), dtype=np.int64)
948
959
  for idx, members in enumerate(cluster_members):
949
- cluster_medoids[idx, :] = jt_isim_medoid(
960
+ cluster_medoid_idxs[idx], cluster_medoids[idx, :] = jt_isim_medoid(
950
961
  unpacked_fps[members],
951
962
  input_is_packed=False,
952
963
  pack=False,
953
- )[1]
954
- return cluster_medoids
964
+ )
965
+ return cluster_medoid_idxs, cluster_medoids
955
966
 
956
967
  def get_medoids(
957
968
  self,
@@ -1550,6 +1550,14 @@ def _fps_from_smiles(
1550
1550
  help="Whether the smiles file has the format <smiles><tab><field><tab>...",
1551
1551
  ),
1552
1552
  ] = False,
1553
+ replace_dummy_atoms: Annotated[
1554
+ bool,
1555
+ Option(
1556
+ "--replace-dummy/--no-replace-dummy",
1557
+ help="Whether to replace dummy atoms such as [U], [Np], etc. used in synthon spaces", # noqa
1558
+ hidden=True,
1559
+ ),
1560
+ ] = False,
1553
1561
  ) -> None:
1554
1562
  r"""Generate a `*.npy` fingerprints file from one or more `*.smi` smiles files
1555
1563
 
@@ -1656,7 +1664,7 @@ def _fps_from_smiles(
1656
1664
  pool.map(
1657
1665
  create_fp_file,
1658
1666
  _iter_idxs_and_smiles_batches(
1659
- smiles_paths, num_per_batch, tab_separated
1667
+ smiles_paths, num_per_batch, tab_separated, replace_dummy_atoms
1660
1668
  ),
1661
1669
  )
1662
1670
  timer.end_timing("total", console, indent=False)
@@ -1698,7 +1706,7 @@ def _fps_from_smiles(
1698
1706
  pool.starmap(
1699
1707
  fps_array_filler,
1700
1708
  _iter_ranges_and_smiles_batches(
1701
- smiles_paths, num_per_batch, tab_separated
1709
+ smiles_paths, num_per_batch, tab_separated, replace_dummy_atoms
1702
1710
  ),
1703
1711
  )
1704
1712
  fps = np.ndarray((smiles_num, out_dim), dtype=dtype, buffer=fps_shmem.buf)
@@ -31,8 +31,12 @@ def calc_num_smiles(smiles_paths: SmilesPaths) -> int:
31
31
  return sum(1 for _ in iter_smiles_from_paths(smiles_paths))
32
32
 
33
33
 
34
+ # NOTE: replace_dummy is the procedure used in RDKit as of Dec 2024 for Synthon spaces
35
+ # synthons marked with [U], [Np], [Pu], [Am]. These need to be converted
34
36
  def iter_smiles_from_paths(
35
- smiles_paths: SmilesPaths, tab_separated: bool = False
37
+ smiles_paths: SmilesPaths,
38
+ tab_separated: bool = False,
39
+ replace_dummy_atoms: bool = False,
36
40
  ) -> tp.Iterator[str]:
37
41
  r"""Iterate over smiles in a sequence of smiles paths
38
42
 
@@ -44,10 +48,21 @@ def iter_smiles_from_paths(
44
48
  for smi_path in smiles_paths:
45
49
  with open(smi_path, mode="rt", encoding="utf-8") as f:
46
50
  for smi in f:
47
- smi = smi if not tab_separated else smi.split("\t")[0]
51
+ if tab_separated:
52
+ smi = smi.split("\t")[0]
53
+
48
54
  # Skip headers
49
55
  if smi.lower().strip() == "smiles":
50
56
  continue
57
+
58
+ # Replace 'dummy' atoms from synthon spaces
59
+ if replace_dummy_atoms:
60
+ smi = (
61
+ smi.replace("[U]", "[1*]")
62
+ .replace("[Np]", "[2*]")
63
+ .replace("[Pu]", "[3*]")
64
+ .replace("[Am]", "[4*]")
65
+ )
51
66
  yield smi
52
67
 
53
68
 
@@ -55,10 +70,12 @@ def _iter_ranges_and_smiles_batches(
55
70
  smiles_paths: SmilesPaths,
56
71
  num_per_batch: int,
57
72
  tab_separated: bool = False,
73
+ replace_dummy_atoms: bool = False,
58
74
  ) -> tp.Iterable[tuple[tuple[int, int], tuple[str, ...]]]:
59
75
  start_idx = 0
60
76
  for batch in batched(
61
- iter_smiles_from_paths(smiles_paths, tab_separated), num_per_batch
77
+ iter_smiles_from_paths(smiles_paths, tab_separated, replace_dummy_atoms),
78
+ num_per_batch,
62
79
  ):
63
80
  size = len(batch)
64
81
  end_idx = start_idx + size
@@ -70,7 +87,11 @@ def _iter_idxs_and_smiles_batches(
70
87
  smiles_paths: SmilesPaths,
71
88
  num_per_batch: int,
72
89
  tab_separated: bool = False,
90
+ replace_dummy_atoms: bool = False,
73
91
  ) -> tp.Iterable[tuple[int, tuple[str, ...]]]:
74
92
  yield from enumerate(
75
- batched(iter_smiles_from_paths(smiles_paths, tab_separated), num_per_batch)
93
+ batched(
94
+ iter_smiles_from_paths(smiles_paths, tab_separated, replace_dummy_atoms),
95
+ num_per_batch,
96
+ )
76
97
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bblean
3
- Version: 0.7.5
3
+ Version: 0.7.7
4
4
  Summary: BitBirch-Lean Python package
5
5
  Author: The Miranda-Quintana Lab and other BitBirch developers
6
6
  Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes