PyPI - bblean - Versions diffs - 0.7.6__tar.gz → 0.7.8__tar.gz - Mend

bblean 0.7.6tar.gz → 0.7.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

{bblean-0.7.6 → bblean-0.7.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bblean
-Version: 0.7.6
+Version: 0.7.8
 Summary: BitBirch-Lean Python package
 Author: The Miranda-Quintana Lab and other BitBirch developers
 Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>

{bblean-0.7.6 → bblean-0.7.8}/bblean/_version.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.7.6'
-__version_tuple__ = version_tuple = (0, 7, 6)
+__version__ = version = '0.7.8'
+__version_tuple__ = version_tuple = (0, 7, 8)
 __commit_id__ = commit_id = None

{bblean-0.7.6 → bblean-0.7.8}/bblean/cli.py RENAMED Viewed

@@ -76,6 +76,85 @@ def _main(
     pass
+@app.command("compare", rich_help_panel="Analysis", hidden=True)
+def _compare(
+    clusters_a_path: Annotated[Path, Argument()],
+    clusters_b_path: Annotated[Path, Argument()],
+    ari: Annotated[
+        bool,
+        Option("--ari/--no-ari", help="Adjusted Rand index"),
+    ] = True,
+    ami: Annotated[
+        bool,
+        Option("--ami/--no-ami", help="Adjusted mutual information (slow)"),
+    ] = True,
+    top: Annotated[
+        int,
+        Option("-t", "--top"),
+    ] = 30,
+    use_first_clustering_indices: Annotated[
+        bool,
+        Option("--use-first-clustering-indices/--no-use-first-clustering-indices"),
+    ] = False,
+    verbose: Annotated[
+        bool,
+        Option("-v/-V", "--verbose/--no-verbose"),
+    ] = True,
+) -> None:
+    r"""Compare two clusterings of the same data, using different metrics"""
+    import pickle
+    import numpy as np
+    from sklearn.metrics import adjusted_mutual_info_score, adjusted_rand_score
+    from bblean._console import get_console
+    console = get_console(silent=not verbose)
+    if clusters_a_path.is_dir():
+        clusters_a_path = clusters_a_path / "clusters.pkl"
+    if clusters_b_path.is_dir():
+        clusters_b_path = clusters_b_path / "clusters.pkl"
+    with console.status("[italic]Collecting labels...[/italic]", spinner="dots"):
+        with open(clusters_a_path, mode="rb") as f:
+            clusters = pickle.load(f)
+            total = sum(len(c) for c in clusters)
+            true_labels = np.empty(total, dtype=np.uint64)
+            for i, mol_ids in enumerate(clusters):
+                true_labels[mol_ids] = i
+            idxs_a = np.concatenate(clusters[:top])
+        with open(clusters_b_path, mode="rb") as f:
+            clusters = pickle.load(f)
+            total = sum(len(c) for c in clusters)
+            pred_labels = np.empty(total, dtype=np.uint64)
+            for i, mol_ids in enumerate(clusters):
+                pred_labels[mol_ids] = i
+            idxs_b = np.concatenate(clusters[:top])
+    if use_first_clustering_indices:
+        idxs = idxs_a
+    else:
+        idxs = np.unique(np.concatenate((idxs_a, idxs_b)))
+    true_labels = true_labels[idxs]
+    pred_labels = pred_labels[idxs]
+    timer = Timer()
+    timer.init_timing("total")
+    if ami:
+        with console.status("[italic]Calc. AMI score...[/italic]", spinner="dots"):
+            ami_score = adjusted_mutual_info_score(true_labels, pred_labels)
+        console.print(f"Adjusted Mutual Information (AMI): {ami_score:.4f}")
+    if ari:
+        with console.status("[italic]Calc. ARI score...[/italic]", spinner="dots"):
+            ari_score = adjusted_rand_score(true_labels, pred_labels)
+        console.print(f"Adjusted Rand Index (ARI): {ari_score:.4f}")
+    timer.end_timing("total", console, indent=False)
 @app.command("summary", rich_help_panel="Analysis")
 def _table_summary(
     clusters_path: Annotated[
@@ -1550,6 +1629,14 @@ def _fps_from_smiles(
             help="Whether the smiles file has the format <smiles><tab><field><tab>...",
         ),
     ] = False,
+    replace_dummy_atoms: Annotated[
+        bool,
+        Option(
+            "--replace-dummy/--no-replace-dummy",
+            help="Whether to replace dummy atoms such as [U], [Np], etc. used in synthon spaces",  # noqa
+            hidden=True,
+        ),
+    ] = False,
 ) -> None:
     r"""Generate a `*.npy` fingerprints file from one or more `*.smi` smiles files
@@ -1656,7 +1743,7 @@ def _fps_from_smiles(
                 pool.map(
                     create_fp_file,
                     _iter_idxs_and_smiles_batches(
-                        smiles_paths, num_per_batch, tab_separated
+                        smiles_paths, num_per_batch, tab_separated, replace_dummy_atoms
                     ),
                 )
         timer.end_timing("total", console, indent=False)
@@ -1698,7 +1785,7 @@ def _fps_from_smiles(
             pool.starmap(
                 fps_array_filler,
                 _iter_ranges_and_smiles_batches(
-                    smiles_paths, num_per_batch, tab_separated
+                    smiles_paths, num_per_batch, tab_separated, replace_dummy_atoms
                 ),
             )
         fps = np.ndarray((smiles_num, out_dim), dtype=dtype, buffer=fps_shmem.buf)

{bblean-0.7.6 → bblean-0.7.8}/bblean/smiles.py RENAMED Viewed

@@ -31,8 +31,12 @@ def calc_num_smiles(smiles_paths: SmilesPaths) -> int:
     return sum(1 for _ in iter_smiles_from_paths(smiles_paths))
+# NOTE: replace_dummy is the procedure used in RDKit as of Dec 2024 for Synthon spaces
+# synthons marked with [U], [Np], [Pu], [Am]. These need to be converted
 def iter_smiles_from_paths(
-    smiles_paths: SmilesPaths, tab_separated: bool = False
+    smiles_paths: SmilesPaths,
+    tab_separated: bool = False,
+    replace_dummy_atoms: bool = False,
 ) -> tp.Iterator[str]:
     r"""Iterate over smiles in a sequence of smiles paths
@@ -44,10 +48,21 @@ def iter_smiles_from_paths(
     for smi_path in smiles_paths:
         with open(smi_path, mode="rt", encoding="utf-8") as f:
             for smi in f:
-                smi = smi if not tab_separated else smi.split("\t")[0]
+                if tab_separated:
+                    smi = smi.split("\t")[0]
                 # Skip headers
                 if smi.lower().strip() == "smiles":
                     continue
+                # Replace 'dummy' atoms from synthon spaces
+                if replace_dummy_atoms:
+                    smi = (
+                        smi.replace("[U]", "[1*]")
+                        .replace("[Np]", "[2*]")
+                        .replace("[Pu]", "[3*]")
+                        .replace("[Am]", "[4*]")
+                    )
                 yield smi
@@ -55,10 +70,12 @@ def _iter_ranges_and_smiles_batches(
     smiles_paths: SmilesPaths,
     num_per_batch: int,
     tab_separated: bool = False,
+    replace_dummy_atoms: bool = False,
 ) -> tp.Iterable[tuple[tuple[int, int], tuple[str, ...]]]:
     start_idx = 0
     for batch in batched(
-        iter_smiles_from_paths(smiles_paths, tab_separated), num_per_batch
+        iter_smiles_from_paths(smiles_paths, tab_separated, replace_dummy_atoms),
+        num_per_batch,
     ):
         size = len(batch)
         end_idx = start_idx + size
@@ -70,7 +87,11 @@ def _iter_idxs_and_smiles_batches(
     smiles_paths: SmilesPaths,
     num_per_batch: int,
     tab_separated: bool = False,
+    replace_dummy_atoms: bool = False,
 ) -> tp.Iterable[tuple[int, tuple[str, ...]]]:
     yield from enumerate(
-        batched(iter_smiles_from_paths(smiles_paths, tab_separated), num_per_batch)
+        batched(
+            iter_smiles_from_paths(smiles_paths, tab_separated, replace_dummy_atoms),
+            num_per_batch,
+        )
     )

{bblean-0.7.6 → bblean-0.7.8}/bblean.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bblean
-Version: 0.7.6
+Version: 0.7.8
 Summary: BitBirch-Lean Python package
 Author: The Miranda-Quintana Lab and other BitBirch developers
 Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>