bblean 0.7.5__tar.gz → 0.7.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bblean-0.7.5 → bblean-0.7.7}/PKG-INFO +1 -1
- {bblean-0.7.5 → bblean-0.7.7}/bblean/_version.py +2 -2
- {bblean-0.7.5 → bblean-0.7.7}/bblean/bitbirch.py +18 -7
- {bblean-0.7.5 → bblean-0.7.7}/bblean/cli.py +10 -2
- {bblean-0.7.5 → bblean-0.7.7}/bblean/smiles.py +25 -4
- {bblean-0.7.5 → bblean-0.7.7}/bblean.egg-info/PKG-INFO +1 -1
- {bblean-0.7.5 → bblean-0.7.7}/.cruft.json +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/.flake8 +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/.github/CODEOWNERS +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/.github/workflows/ci-cpp.yaml +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/.github/workflows/ci.yaml +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/.github/workflows/upload-to-pypi.yaml +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/.gitignore +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/.pre-commit-config.yaml +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/LICENSE +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/LICENSES/BSD-3-Clause.txt +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/LICENSES/GPL-3.0-only.txt +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/README.md +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/__init__.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/_config.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/_console.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/_legacy/__init__.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/_legacy/bb_int64.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/_legacy/bb_uint8.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/_memory.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/_merges.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/_py_similarity.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/_timer.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/analysis.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/csrc/README.md +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/csrc/similarity.cpp +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/fingerprints.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/metrics.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/multiround.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/plotting.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/similarity.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/sklearn.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean/utils.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean-demo-v2.gif +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean-demo.cast +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean.egg-info/SOURCES.txt +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean.egg-info/dependency_links.txt +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean.egg-info/entry_points.txt +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean.egg-info/requires.txt +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/bblean.egg-info/top_level.txt +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/_static/api.svg +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/_static/installing.svg +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/_static/logo-dark-bw.svg +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/_static/logo-light-bw.svg +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/_static/publications.svg +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/_static/style.css +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/_static/user-guide.svg +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/_templates/module.rst +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/api-reference.rst +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/conf.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/index.rst +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/installing.rst +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/publications.rst +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/user-guide/linux_memory_setup.rst +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/user-guide/notebooks/bitbirch_best_practices.ipynb +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/user-guide/notebooks/bitbirch_quickstart.ipynb +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/user-guide/parameters.rst +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/docs/src/user-guide.rst +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/environment.yaml +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/examples/best_practices/best_practices_functions.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/examples/best_practices/best_practices_plots.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/examples/best_practices/bitbirch_best_practices.ipynb +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/examples/best_practices/bitbirch_best_practices_RDKit.ipynb +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/examples/best_practices/bitbirch_parameter.ipynb +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/examples/biogen_logS.csv +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/examples/bitbirch_best_practices.ipynb +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/examples/bitbirch_quickstart.ipynb +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/examples/chembl-33-natural-products-subset.smi +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/examples/dataset_splitting.ipynb +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/pyproject.toml +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/setup.cfg +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/setup.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/chembl-sample-3k.smi +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/chembl-sample-bad.smi +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/legacy_merges.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/legacy_metrics.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_bb_consistency.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_cli.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_fake_fps.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_fingerprints.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_global_clustering.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_import_bblean.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_merges.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_metrics.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_multiround.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_refine.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_regression.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_sampling.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_similarity.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_simple.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_sklearn.py +0 -0
- {bblean-0.7.5 → bblean-0.7.7}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bblean
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.7
|
|
4
4
|
Summary: BitBirch-Lean Python package
|
|
5
5
|
Author: The Miranda-Quintana Lab and other BitBirch developers
|
|
6
6
|
Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.7.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 7,
|
|
31
|
+
__version__ = version = '0.7.7'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 7, 7)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -532,6 +532,7 @@ class _CentroidsMolIds(tp.TypedDict):
|
|
|
532
532
|
|
|
533
533
|
|
|
534
534
|
class _MedoidsMolIds(tp.TypedDict):
|
|
535
|
+
medoid_idxs: NDArray[np.int64]
|
|
535
536
|
medoids: NDArray[np.uint8]
|
|
536
537
|
mol_ids: list[list[int]]
|
|
537
538
|
|
|
@@ -926,32 +927,42 @@ class BitBirch:
|
|
|
926
927
|
input_is_packed: bool = True,
|
|
927
928
|
n_features: int | None = None,
|
|
928
929
|
) -> _MedoidsMolIds:
|
|
929
|
-
"""Get a dict with medoids and mol indices of the leaves
|
|
930
|
+
r"""Get a dict with medoid idxs, medoids and mol indices of the leaves
|
|
931
|
+
|
|
932
|
+
The medoid indices are indices into the cluster mol ids, not into the fps array
|
|
933
|
+
"""
|
|
930
934
|
cluster_members = self.get_cluster_mol_ids(
|
|
931
935
|
sort=sort, global_clusters=global_clusters
|
|
932
936
|
)
|
|
933
937
|
|
|
934
938
|
if input_is_packed:
|
|
935
939
|
fps = _unpack_fingerprints(fps, n_features=n_features)
|
|
936
|
-
cluster_medoids = self._unpacked_medoids_from_members(
|
|
940
|
+
cluster_medoid_idxs, cluster_medoids = self._unpacked_medoids_from_members(
|
|
941
|
+
fps, cluster_members
|
|
942
|
+
)
|
|
937
943
|
if pack:
|
|
938
944
|
cluster_medoids = pack_fingerprints(cluster_medoids)
|
|
939
|
-
return {
|
|
945
|
+
return {
|
|
946
|
+
"medoid_idxs": cluster_medoid_idxs,
|
|
947
|
+
"medoids": cluster_medoids,
|
|
948
|
+
"mol_ids": cluster_members,
|
|
949
|
+
}
|
|
940
950
|
|
|
941
951
|
@staticmethod
|
|
942
952
|
def _unpacked_medoids_from_members(
|
|
943
953
|
unpacked_fps: NDArray[np.uint8], cluster_members: tp.Sequence[list[int]]
|
|
944
|
-
) -> NDArray[np.uint8]:
|
|
954
|
+
) -> tuple[NDArray[np.int64], NDArray[np.uint8]]:
|
|
945
955
|
cluster_medoids = np.zeros(
|
|
946
956
|
(len(cluster_members), unpacked_fps.shape[1]), dtype=np.uint8
|
|
947
957
|
)
|
|
958
|
+
cluster_medoid_idxs = np.zeros((len(cluster_members),), dtype=np.int64)
|
|
948
959
|
for idx, members in enumerate(cluster_members):
|
|
949
|
-
cluster_medoids[idx, :] = jt_isim_medoid(
|
|
960
|
+
cluster_medoid_idxs[idx], cluster_medoids[idx, :] = jt_isim_medoid(
|
|
950
961
|
unpacked_fps[members],
|
|
951
962
|
input_is_packed=False,
|
|
952
963
|
pack=False,
|
|
953
|
-
)
|
|
954
|
-
return cluster_medoids
|
|
964
|
+
)
|
|
965
|
+
return cluster_medoid_idxs, cluster_medoids
|
|
955
966
|
|
|
956
967
|
def get_medoids(
|
|
957
968
|
self,
|
|
@@ -1550,6 +1550,14 @@ def _fps_from_smiles(
|
|
|
1550
1550
|
help="Whether the smiles file has the format <smiles><tab><field><tab>...",
|
|
1551
1551
|
),
|
|
1552
1552
|
] = False,
|
|
1553
|
+
replace_dummy_atoms: Annotated[
|
|
1554
|
+
bool,
|
|
1555
|
+
Option(
|
|
1556
|
+
"--replace-dummy/--no-replace-dummy",
|
|
1557
|
+
help="Whether to replace dummy atoms such as [U], [Np], etc. used in synthon spaces", # noqa
|
|
1558
|
+
hidden=True,
|
|
1559
|
+
),
|
|
1560
|
+
] = False,
|
|
1553
1561
|
) -> None:
|
|
1554
1562
|
r"""Generate a `*.npy` fingerprints file from one or more `*.smi` smiles files
|
|
1555
1563
|
|
|
@@ -1656,7 +1664,7 @@ def _fps_from_smiles(
|
|
|
1656
1664
|
pool.map(
|
|
1657
1665
|
create_fp_file,
|
|
1658
1666
|
_iter_idxs_and_smiles_batches(
|
|
1659
|
-
smiles_paths, num_per_batch, tab_separated
|
|
1667
|
+
smiles_paths, num_per_batch, tab_separated, replace_dummy_atoms
|
|
1660
1668
|
),
|
|
1661
1669
|
)
|
|
1662
1670
|
timer.end_timing("total", console, indent=False)
|
|
@@ -1698,7 +1706,7 @@ def _fps_from_smiles(
|
|
|
1698
1706
|
pool.starmap(
|
|
1699
1707
|
fps_array_filler,
|
|
1700
1708
|
_iter_ranges_and_smiles_batches(
|
|
1701
|
-
smiles_paths, num_per_batch, tab_separated
|
|
1709
|
+
smiles_paths, num_per_batch, tab_separated, replace_dummy_atoms
|
|
1702
1710
|
),
|
|
1703
1711
|
)
|
|
1704
1712
|
fps = np.ndarray((smiles_num, out_dim), dtype=dtype, buffer=fps_shmem.buf)
|
|
@@ -31,8 +31,12 @@ def calc_num_smiles(smiles_paths: SmilesPaths) -> int:
|
|
|
31
31
|
return sum(1 for _ in iter_smiles_from_paths(smiles_paths))
|
|
32
32
|
|
|
33
33
|
|
|
34
|
+
# NOTE: replace_dummy is the procedure used in RDKit as of Dec 2024 for Synthon spaces
|
|
35
|
+
# synthons marked with [U], [Np], [Pu], [Am]. These need to be converted
|
|
34
36
|
def iter_smiles_from_paths(
|
|
35
|
-
smiles_paths: SmilesPaths,
|
|
37
|
+
smiles_paths: SmilesPaths,
|
|
38
|
+
tab_separated: bool = False,
|
|
39
|
+
replace_dummy_atoms: bool = False,
|
|
36
40
|
) -> tp.Iterator[str]:
|
|
37
41
|
r"""Iterate over smiles in a sequence of smiles paths
|
|
38
42
|
|
|
@@ -44,10 +48,21 @@ def iter_smiles_from_paths(
|
|
|
44
48
|
for smi_path in smiles_paths:
|
|
45
49
|
with open(smi_path, mode="rt", encoding="utf-8") as f:
|
|
46
50
|
for smi in f:
|
|
47
|
-
|
|
51
|
+
if tab_separated:
|
|
52
|
+
smi = smi.split("\t")[0]
|
|
53
|
+
|
|
48
54
|
# Skip headers
|
|
49
55
|
if smi.lower().strip() == "smiles":
|
|
50
56
|
continue
|
|
57
|
+
|
|
58
|
+
# Replace 'dummy' atoms from synthon spaces
|
|
59
|
+
if replace_dummy_atoms:
|
|
60
|
+
smi = (
|
|
61
|
+
smi.replace("[U]", "[1*]")
|
|
62
|
+
.replace("[Np]", "[2*]")
|
|
63
|
+
.replace("[Pu]", "[3*]")
|
|
64
|
+
.replace("[Am]", "[4*]")
|
|
65
|
+
)
|
|
51
66
|
yield smi
|
|
52
67
|
|
|
53
68
|
|
|
@@ -55,10 +70,12 @@ def _iter_ranges_and_smiles_batches(
|
|
|
55
70
|
smiles_paths: SmilesPaths,
|
|
56
71
|
num_per_batch: int,
|
|
57
72
|
tab_separated: bool = False,
|
|
73
|
+
replace_dummy_atoms: bool = False,
|
|
58
74
|
) -> tp.Iterable[tuple[tuple[int, int], tuple[str, ...]]]:
|
|
59
75
|
start_idx = 0
|
|
60
76
|
for batch in batched(
|
|
61
|
-
iter_smiles_from_paths(smiles_paths, tab_separated),
|
|
77
|
+
iter_smiles_from_paths(smiles_paths, tab_separated, replace_dummy_atoms),
|
|
78
|
+
num_per_batch,
|
|
62
79
|
):
|
|
63
80
|
size = len(batch)
|
|
64
81
|
end_idx = start_idx + size
|
|
@@ -70,7 +87,11 @@ def _iter_idxs_and_smiles_batches(
|
|
|
70
87
|
smiles_paths: SmilesPaths,
|
|
71
88
|
num_per_batch: int,
|
|
72
89
|
tab_separated: bool = False,
|
|
90
|
+
replace_dummy_atoms: bool = False,
|
|
73
91
|
) -> tp.Iterable[tuple[int, tuple[str, ...]]]:
|
|
74
92
|
yield from enumerate(
|
|
75
|
-
batched(
|
|
93
|
+
batched(
|
|
94
|
+
iter_smiles_from_paths(smiles_paths, tab_separated, replace_dummy_atoms),
|
|
95
|
+
num_per_batch,
|
|
96
|
+
)
|
|
76
97
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bblean
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.7
|
|
4
4
|
Summary: BitBirch-Lean Python package
|
|
5
5
|
Author: The Miranda-Quintana Lab and other BitBirch developers
|
|
6
6
|
Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|