bblean 0.7.6__tar.gz → 0.7.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. {bblean-0.7.6 → bblean-0.7.8}/PKG-INFO +1 -1
  2. {bblean-0.7.6 → bblean-0.7.8}/bblean/_version.py +2 -2
  3. {bblean-0.7.6 → bblean-0.7.8}/bblean/cli.py +89 -2
  4. {bblean-0.7.6 → bblean-0.7.8}/bblean/smiles.py +25 -4
  5. {bblean-0.7.6 → bblean-0.7.8}/bblean.egg-info/PKG-INFO +1 -1
  6. {bblean-0.7.6 → bblean-0.7.8}/.cruft.json +0 -0
  7. {bblean-0.7.6 → bblean-0.7.8}/.flake8 +0 -0
  8. {bblean-0.7.6 → bblean-0.7.8}/.github/CODEOWNERS +0 -0
  9. {bblean-0.7.6 → bblean-0.7.8}/.github/workflows/ci-cpp.yaml +0 -0
  10. {bblean-0.7.6 → bblean-0.7.8}/.github/workflows/ci.yaml +0 -0
  11. {bblean-0.7.6 → bblean-0.7.8}/.github/workflows/upload-to-pypi.yaml +0 -0
  12. {bblean-0.7.6 → bblean-0.7.8}/.gitignore +0 -0
  13. {bblean-0.7.6 → bblean-0.7.8}/.pre-commit-config.yaml +0 -0
  14. {bblean-0.7.6 → bblean-0.7.8}/LICENSE +0 -0
  15. {bblean-0.7.6 → bblean-0.7.8}/LICENSES/BSD-3-Clause.txt +0 -0
  16. {bblean-0.7.6 → bblean-0.7.8}/LICENSES/GPL-3.0-only.txt +0 -0
  17. {bblean-0.7.6 → bblean-0.7.8}/README.md +0 -0
  18. {bblean-0.7.6 → bblean-0.7.8}/bblean/__init__.py +0 -0
  19. {bblean-0.7.6 → bblean-0.7.8}/bblean/_config.py +0 -0
  20. {bblean-0.7.6 → bblean-0.7.8}/bblean/_console.py +0 -0
  21. {bblean-0.7.6 → bblean-0.7.8}/bblean/_legacy/__init__.py +0 -0
  22. {bblean-0.7.6 → bblean-0.7.8}/bblean/_legacy/bb_int64.py +0 -0
  23. {bblean-0.7.6 → bblean-0.7.8}/bblean/_legacy/bb_uint8.py +0 -0
  24. {bblean-0.7.6 → bblean-0.7.8}/bblean/_memory.py +0 -0
  25. {bblean-0.7.6 → bblean-0.7.8}/bblean/_merges.py +0 -0
  26. {bblean-0.7.6 → bblean-0.7.8}/bblean/_py_similarity.py +0 -0
  27. {bblean-0.7.6 → bblean-0.7.8}/bblean/_timer.py +0 -0
  28. {bblean-0.7.6 → bblean-0.7.8}/bblean/analysis.py +0 -0
  29. {bblean-0.7.6 → bblean-0.7.8}/bblean/bitbirch.py +0 -0
  30. {bblean-0.7.6 → bblean-0.7.8}/bblean/csrc/README.md +0 -0
  31. {bblean-0.7.6 → bblean-0.7.8}/bblean/csrc/similarity.cpp +0 -0
  32. {bblean-0.7.6 → bblean-0.7.8}/bblean/fingerprints.py +0 -0
  33. {bblean-0.7.6 → bblean-0.7.8}/bblean/metrics.py +0 -0
  34. {bblean-0.7.6 → bblean-0.7.8}/bblean/multiround.py +0 -0
  35. {bblean-0.7.6 → bblean-0.7.8}/bblean/plotting.py +0 -0
  36. {bblean-0.7.6 → bblean-0.7.8}/bblean/similarity.py +0 -0
  37. {bblean-0.7.6 → bblean-0.7.8}/bblean/sklearn.py +0 -0
  38. {bblean-0.7.6 → bblean-0.7.8}/bblean/utils.py +0 -0
  39. {bblean-0.7.6 → bblean-0.7.8}/bblean-demo-v2.gif +0 -0
  40. {bblean-0.7.6 → bblean-0.7.8}/bblean-demo.cast +0 -0
  41. {bblean-0.7.6 → bblean-0.7.8}/bblean.egg-info/SOURCES.txt +0 -0
  42. {bblean-0.7.6 → bblean-0.7.8}/bblean.egg-info/dependency_links.txt +0 -0
  43. {bblean-0.7.6 → bblean-0.7.8}/bblean.egg-info/entry_points.txt +0 -0
  44. {bblean-0.7.6 → bblean-0.7.8}/bblean.egg-info/requires.txt +0 -0
  45. {bblean-0.7.6 → bblean-0.7.8}/bblean.egg-info/top_level.txt +0 -0
  46. {bblean-0.7.6 → bblean-0.7.8}/docs/src/_static/api.svg +0 -0
  47. {bblean-0.7.6 → bblean-0.7.8}/docs/src/_static/installing.svg +0 -0
  48. {bblean-0.7.6 → bblean-0.7.8}/docs/src/_static/logo-dark-bw.svg +0 -0
  49. {bblean-0.7.6 → bblean-0.7.8}/docs/src/_static/logo-light-bw.svg +0 -0
  50. {bblean-0.7.6 → bblean-0.7.8}/docs/src/_static/publications.svg +0 -0
  51. {bblean-0.7.6 → bblean-0.7.8}/docs/src/_static/style.css +0 -0
  52. {bblean-0.7.6 → bblean-0.7.8}/docs/src/_static/user-guide.svg +0 -0
  53. {bblean-0.7.6 → bblean-0.7.8}/docs/src/_templates/module.rst +0 -0
  54. {bblean-0.7.6 → bblean-0.7.8}/docs/src/api-reference.rst +0 -0
  55. {bblean-0.7.6 → bblean-0.7.8}/docs/src/conf.py +0 -0
  56. {bblean-0.7.6 → bblean-0.7.8}/docs/src/index.rst +0 -0
  57. {bblean-0.7.6 → bblean-0.7.8}/docs/src/installing.rst +0 -0
  58. {bblean-0.7.6 → bblean-0.7.8}/docs/src/publications.rst +0 -0
  59. {bblean-0.7.6 → bblean-0.7.8}/docs/src/user-guide/linux_memory_setup.rst +0 -0
  60. {bblean-0.7.6 → bblean-0.7.8}/docs/src/user-guide/notebooks/bitbirch_best_practices.ipynb +0 -0
  61. {bblean-0.7.6 → bblean-0.7.8}/docs/src/user-guide/notebooks/bitbirch_quickstart.ipynb +0 -0
  62. {bblean-0.7.6 → bblean-0.7.8}/docs/src/user-guide/parameters.rst +0 -0
  63. {bblean-0.7.6 → bblean-0.7.8}/docs/src/user-guide.rst +0 -0
  64. {bblean-0.7.6 → bblean-0.7.8}/environment.yaml +0 -0
  65. {bblean-0.7.6 → bblean-0.7.8}/examples/best_practices/best_practices_functions.py +0 -0
  66. {bblean-0.7.6 → bblean-0.7.8}/examples/best_practices/best_practices_plots.py +0 -0
  67. {bblean-0.7.6 → bblean-0.7.8}/examples/best_practices/bitbirch_best_practices.ipynb +0 -0
  68. {bblean-0.7.6 → bblean-0.7.8}/examples/best_practices/bitbirch_best_practices_RDKit.ipynb +0 -0
  69. {bblean-0.7.6 → bblean-0.7.8}/examples/best_practices/bitbirch_parameter.ipynb +0 -0
  70. {bblean-0.7.6 → bblean-0.7.8}/examples/biogen_logS.csv +0 -0
  71. {bblean-0.7.6 → bblean-0.7.8}/examples/bitbirch_best_practices.ipynb +0 -0
  72. {bblean-0.7.6 → bblean-0.7.8}/examples/bitbirch_quickstart.ipynb +0 -0
  73. {bblean-0.7.6 → bblean-0.7.8}/examples/chembl-33-natural-products-subset.smi +0 -0
  74. {bblean-0.7.6 → bblean-0.7.8}/examples/dataset_splitting.ipynb +0 -0
  75. {bblean-0.7.6 → bblean-0.7.8}/pyproject.toml +0 -0
  76. {bblean-0.7.6 → bblean-0.7.8}/setup.cfg +0 -0
  77. {bblean-0.7.6 → bblean-0.7.8}/setup.py +0 -0
  78. {bblean-0.7.6 → bblean-0.7.8}/tests/chembl-sample-3k.smi +0 -0
  79. {bblean-0.7.6 → bblean-0.7.8}/tests/chembl-sample-bad.smi +0 -0
  80. {bblean-0.7.6 → bblean-0.7.8}/tests/legacy_merges.py +0 -0
  81. {bblean-0.7.6 → bblean-0.7.8}/tests/legacy_metrics.py +0 -0
  82. {bblean-0.7.6 → bblean-0.7.8}/tests/test_bb_consistency.py +0 -0
  83. {bblean-0.7.6 → bblean-0.7.8}/tests/test_cli.py +0 -0
  84. {bblean-0.7.6 → bblean-0.7.8}/tests/test_fake_fps.py +0 -0
  85. {bblean-0.7.6 → bblean-0.7.8}/tests/test_fingerprints.py +0 -0
  86. {bblean-0.7.6 → bblean-0.7.8}/tests/test_global_clustering.py +0 -0
  87. {bblean-0.7.6 → bblean-0.7.8}/tests/test_import_bblean.py +0 -0
  88. {bblean-0.7.6 → bblean-0.7.8}/tests/test_merges.py +0 -0
  89. {bblean-0.7.6 → bblean-0.7.8}/tests/test_metrics.py +0 -0
  90. {bblean-0.7.6 → bblean-0.7.8}/tests/test_multiround.py +0 -0
  91. {bblean-0.7.6 → bblean-0.7.8}/tests/test_refine.py +0 -0
  92. {bblean-0.7.6 → bblean-0.7.8}/tests/test_regression.py +0 -0
  93. {bblean-0.7.6 → bblean-0.7.8}/tests/test_sampling.py +0 -0
  94. {bblean-0.7.6 → bblean-0.7.8}/tests/test_similarity.py +0 -0
  95. {bblean-0.7.6 → bblean-0.7.8}/tests/test_simple.py +0 -0
  96. {bblean-0.7.6 → bblean-0.7.8}/tests/test_sklearn.py +0 -0
  97. {bblean-0.7.6 → bblean-0.7.8}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bblean
3
- Version: 0.7.6
3
+ Version: 0.7.8
4
4
  Summary: BitBirch-Lean Python package
5
5
  Author: The Miranda-Quintana Lab and other BitBirch developers
6
6
  Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.7.6'
32
- __version_tuple__ = version_tuple = (0, 7, 6)
31
+ __version__ = version = '0.7.8'
32
+ __version_tuple__ = version_tuple = (0, 7, 8)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -76,6 +76,85 @@ def _main(
76
76
  pass
77
77
 
78
78
 
79
+ @app.command("compare", rich_help_panel="Analysis", hidden=True)
80
+ def _compare(
81
+ clusters_a_path: Annotated[Path, Argument()],
82
+ clusters_b_path: Annotated[Path, Argument()],
83
+ ari: Annotated[
84
+ bool,
85
+ Option("--ari/--no-ari", help="Adjusted Rand index"),
86
+ ] = True,
87
+ ami: Annotated[
88
+ bool,
89
+ Option("--ami/--no-ami", help="Adjusted mutual information (slow)"),
90
+ ] = True,
91
+ top: Annotated[
92
+ int,
93
+ Option("-t", "--top"),
94
+ ] = 30,
95
+ use_first_clustering_indices: Annotated[
96
+ bool,
97
+ Option("--use-first-clustering-indices/--no-use-first-clustering-indices"),
98
+ ] = False,
99
+ verbose: Annotated[
100
+ bool,
101
+ Option("-v/-V", "--verbose/--no-verbose"),
102
+ ] = True,
103
+ ) -> None:
104
+ r"""Compare two clusterings of the same data, using different metrics"""
105
+ import pickle
106
+ import numpy as np
107
+
108
+ from sklearn.metrics import adjusted_mutual_info_score, adjusted_rand_score
109
+
110
+ from bblean._console import get_console
111
+
112
+ console = get_console(silent=not verbose)
113
+
114
+ if clusters_a_path.is_dir():
115
+ clusters_a_path = clusters_a_path / "clusters.pkl"
116
+
117
+ if clusters_b_path.is_dir():
118
+ clusters_b_path = clusters_b_path / "clusters.pkl"
119
+
120
+ with console.status("[italic]Collecting labels...[/italic]", spinner="dots"):
121
+ with open(clusters_a_path, mode="rb") as f:
122
+ clusters = pickle.load(f)
123
+ total = sum(len(c) for c in clusters)
124
+ true_labels = np.empty(total, dtype=np.uint64)
125
+ for i, mol_ids in enumerate(clusters):
126
+ true_labels[mol_ids] = i
127
+ idxs_a = np.concatenate(clusters[:top])
128
+
129
+ with open(clusters_b_path, mode="rb") as f:
130
+ clusters = pickle.load(f)
131
+ total = sum(len(c) for c in clusters)
132
+ pred_labels = np.empty(total, dtype=np.uint64)
133
+ for i, mol_ids in enumerate(clusters):
134
+ pred_labels[mol_ids] = i
135
+ idxs_b = np.concatenate(clusters[:top])
136
+ if use_first_clustering_indices:
137
+ idxs = idxs_a
138
+ else:
139
+ idxs = np.unique(np.concatenate((idxs_a, idxs_b)))
140
+
141
+ true_labels = true_labels[idxs]
142
+ pred_labels = pred_labels[idxs]
143
+
144
+ timer = Timer()
145
+ timer.init_timing("total")
146
+ if ami:
147
+ with console.status("[italic]Calc. AMI score...[/italic]", spinner="dots"):
148
+ ami_score = adjusted_mutual_info_score(true_labels, pred_labels)
149
+ console.print(f"Adjusted Mutual Information (AMI): {ami_score:.4f}")
150
+
151
+ if ari:
152
+ with console.status("[italic]Calc. ARI score...[/italic]", spinner="dots"):
153
+ ari_score = adjusted_rand_score(true_labels, pred_labels)
154
+ console.print(f"Adjusted Rand Index (ARI): {ari_score:.4f}")
155
+ timer.end_timing("total", console, indent=False)
156
+
157
+
79
158
  @app.command("summary", rich_help_panel="Analysis")
80
159
  def _table_summary(
81
160
  clusters_path: Annotated[
@@ -1550,6 +1629,14 @@ def _fps_from_smiles(
1550
1629
  help="Whether the smiles file has the format <smiles><tab><field><tab>...",
1551
1630
  ),
1552
1631
  ] = False,
1632
+ replace_dummy_atoms: Annotated[
1633
+ bool,
1634
+ Option(
1635
+ "--replace-dummy/--no-replace-dummy",
1636
+ help="Whether to replace dummy atoms such as [U], [Np], etc. used in synthon spaces", # noqa
1637
+ hidden=True,
1638
+ ),
1639
+ ] = False,
1553
1640
  ) -> None:
1554
1641
  r"""Generate a `*.npy` fingerprints file from one or more `*.smi` smiles files
1555
1642
 
@@ -1656,7 +1743,7 @@ def _fps_from_smiles(
1656
1743
  pool.map(
1657
1744
  create_fp_file,
1658
1745
  _iter_idxs_and_smiles_batches(
1659
- smiles_paths, num_per_batch, tab_separated
1746
+ smiles_paths, num_per_batch, tab_separated, replace_dummy_atoms
1660
1747
  ),
1661
1748
  )
1662
1749
  timer.end_timing("total", console, indent=False)
@@ -1698,7 +1785,7 @@ def _fps_from_smiles(
1698
1785
  pool.starmap(
1699
1786
  fps_array_filler,
1700
1787
  _iter_ranges_and_smiles_batches(
1701
- smiles_paths, num_per_batch, tab_separated
1788
+ smiles_paths, num_per_batch, tab_separated, replace_dummy_atoms
1702
1789
  ),
1703
1790
  )
1704
1791
  fps = np.ndarray((smiles_num, out_dim), dtype=dtype, buffer=fps_shmem.buf)
@@ -31,8 +31,12 @@ def calc_num_smiles(smiles_paths: SmilesPaths) -> int:
31
31
  return sum(1 for _ in iter_smiles_from_paths(smiles_paths))
32
32
 
33
33
 
34
+ # NOTE: replace_dummy is the procedure used in RDKit as of Dec 2024 for Synthon spaces
35
+ # synthons marked with [U], [Np], [Pu], [Am]. These need to be converted
34
36
  def iter_smiles_from_paths(
35
- smiles_paths: SmilesPaths, tab_separated: bool = False
37
+ smiles_paths: SmilesPaths,
38
+ tab_separated: bool = False,
39
+ replace_dummy_atoms: bool = False,
36
40
  ) -> tp.Iterator[str]:
37
41
  r"""Iterate over smiles in a sequence of smiles paths
38
42
 
@@ -44,10 +48,21 @@ def iter_smiles_from_paths(
44
48
  for smi_path in smiles_paths:
45
49
  with open(smi_path, mode="rt", encoding="utf-8") as f:
46
50
  for smi in f:
47
- smi = smi if not tab_separated else smi.split("\t")[0]
51
+ if tab_separated:
52
+ smi = smi.split("\t")[0]
53
+
48
54
  # Skip headers
49
55
  if smi.lower().strip() == "smiles":
50
56
  continue
57
+
58
+ # Replace 'dummy' atoms from synthon spaces
59
+ if replace_dummy_atoms:
60
+ smi = (
61
+ smi.replace("[U]", "[1*]")
62
+ .replace("[Np]", "[2*]")
63
+ .replace("[Pu]", "[3*]")
64
+ .replace("[Am]", "[4*]")
65
+ )
51
66
  yield smi
52
67
 
53
68
 
@@ -55,10 +70,12 @@ def _iter_ranges_and_smiles_batches(
55
70
  smiles_paths: SmilesPaths,
56
71
  num_per_batch: int,
57
72
  tab_separated: bool = False,
73
+ replace_dummy_atoms: bool = False,
58
74
  ) -> tp.Iterable[tuple[tuple[int, int], tuple[str, ...]]]:
59
75
  start_idx = 0
60
76
  for batch in batched(
61
- iter_smiles_from_paths(smiles_paths, tab_separated), num_per_batch
77
+ iter_smiles_from_paths(smiles_paths, tab_separated, replace_dummy_atoms),
78
+ num_per_batch,
62
79
  ):
63
80
  size = len(batch)
64
81
  end_idx = start_idx + size
@@ -70,7 +87,11 @@ def _iter_idxs_and_smiles_batches(
70
87
  smiles_paths: SmilesPaths,
71
88
  num_per_batch: int,
72
89
  tab_separated: bool = False,
90
+ replace_dummy_atoms: bool = False,
73
91
  ) -> tp.Iterable[tuple[int, tuple[str, ...]]]:
74
92
  yield from enumerate(
75
- batched(iter_smiles_from_paths(smiles_paths, tab_separated), num_per_batch)
93
+ batched(
94
+ iter_smiles_from_paths(smiles_paths, tab_separated, replace_dummy_atoms),
95
+ num_per_batch,
96
+ )
76
97
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bblean
3
- Version: 0.7.6
3
+ Version: 0.7.8
4
4
  Summary: BitBirch-Lean Python package
5
5
  Author: The Miranda-Quintana Lab and other BitBirch developers
6
6
  Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes