bblean 0.6.1b0__tar.gz → 0.7.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bblean-0.6.1b0 → bblean-0.7.3}/.github/workflows/upload-to-pypi.yaml +1 -1
- {bblean-0.6.1b0 → bblean-0.7.3}/PKG-INFO +2 -2
- {bblean-0.6.1b0 → bblean-0.7.3}/README.md +1 -1
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/_py_similarity.py +1 -9
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/_version.py +2 -2
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/bitbirch.py +6 -6
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/cli.py +53 -3
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/csrc/similarity.cpp +77 -26
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/fingerprints.py +5 -1
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/multiround.py +30 -10
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/similarity.py +70 -15
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/smiles.py +20 -5
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean.egg-info/PKG-INFO +2 -2
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean.egg-info/SOURCES.txt +5 -2
- {bblean-0.6.1b0/examples → bblean-0.7.3/docs/src/user-guide/notebooks}/bitbirch_quickstart.ipynb +1 -1
- bblean-0.7.3/examples/best_practices/best_practices_functions.py +188 -0
- bblean-0.7.3/examples/best_practices/best_practices_plots.py +465 -0
- bblean-0.7.3/examples/best_practices/bitbirch_best_practices.ipynb +601 -0
- bblean-0.7.3/examples/best_practices/bitbirch_best_practices_RDKit.ipynb +571 -0
- bblean-0.7.3/examples/best_practices/bitbirch_parameter.ipynb +1755 -0
- {bblean-0.6.1b0/docs/src/user-guide/notebooks → bblean-0.7.3/examples}/bitbirch_quickstart.ipynb +1 -1
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_cli.py +60 -5
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_global_clustering.py +5 -25
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_similarity.py +12 -1
- bblean-0.6.1b0/docs/src/user-guide/notebooks/bitbirch_best_practices.ipynb +0 -526
- bblean-0.6.1b0/examples/bitbirch_best_practices.ipynb +0 -526
- {bblean-0.6.1b0 → bblean-0.7.3}/.cruft.json +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/.flake8 +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/.github/CODEOWNERS +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/.github/workflows/ci-cpp.yaml +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/.github/workflows/ci.yaml +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/.gitignore +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/.pre-commit-config.yaml +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/LICENSE +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/LICENSES/BSD-3-Clause.txt +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/LICENSES/GPL-3.0-only.txt +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/__init__.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/_config.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/_console.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/_legacy/__init__.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/_legacy/bb_int64.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/_legacy/bb_uint8.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/_memory.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/_merges.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/_timer.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/analysis.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/csrc/README.md +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/metrics.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/plotting.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/sklearn.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean/utils.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean-demo-v2.gif +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean-demo.cast +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean.egg-info/dependency_links.txt +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean.egg-info/entry_points.txt +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean.egg-info/requires.txt +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/bblean.egg-info/top_level.txt +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/_static/api.svg +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/_static/installing.svg +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/_static/logo-dark-bw.svg +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/_static/logo-light-bw.svg +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/_static/publications.svg +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/_static/style.css +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/_static/user-guide.svg +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/_templates/module.rst +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/api-reference.rst +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/conf.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/index.rst +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/installing.rst +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/publications.rst +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/user-guide/linux_memory_setup.rst +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/user-guide/parameters.rst +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/docs/src/user-guide.rst +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/environment.yaml +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/examples/biogen_logS.csv +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/examples/chembl-33-natural-products-subset.smi +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/examples/dataset_splitting.ipynb +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/pyproject.toml +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/setup.cfg +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/setup.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/chembl-sample-3k.smi +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/chembl-sample-bad.smi +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/legacy_merges.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/legacy_metrics.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_bb_consistency.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_fake_fps.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_fingerprints.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_import_bblean.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_merges.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_metrics.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_multiround.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_refine.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_regression.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_sampling.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_simple.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_sklearn.py +0 -0
- {bblean-0.6.1b0 → bblean-0.7.3}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bblean
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.3
|
|
4
4
|
Summary: BitBirch-Lean Python package
|
|
5
5
|
Author: The Miranda-Quintana Lab and other BitBirch developers
|
|
6
6
|
Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>
|
|
@@ -236,7 +236,7 @@ tree = bblean.BitBirch(branching_factor=50, threshold=0.65, merge_criterion="dia
|
|
|
236
236
|
tree.fit(fps)
|
|
237
237
|
|
|
238
238
|
# Refine the tree (if needed)
|
|
239
|
-
tree.set_merge(
|
|
239
|
+
tree.set_merge("tolerance-diameter", tolerance=0.0)
|
|
240
240
|
tree.refine_inplace(fps)
|
|
241
241
|
|
|
242
242
|
# Visualize the results
|
|
@@ -193,7 +193,7 @@ tree = bblean.BitBirch(branching_factor=50, threshold=0.65, merge_criterion="dia
|
|
|
193
193
|
tree.fit(fps)
|
|
194
194
|
|
|
195
195
|
# Refine the tree (if needed)
|
|
196
|
-
tree.set_merge(
|
|
196
|
+
tree.set_merge("tolerance-diameter", tolerance=0.0)
|
|
197
197
|
tree.refine_inplace(fps)
|
|
198
198
|
|
|
199
199
|
# Visualize the results
|
|
@@ -76,18 +76,10 @@ def jt_compl_isim(
|
|
|
76
76
|
warnings.warn(msg, RuntimeWarning, stacklevel=2)
|
|
77
77
|
return np.full(len(fps), fill_value=np.nan, dtype=np.float64)
|
|
78
78
|
linear_sum = np.sum(fps, axis=0)
|
|
79
|
-
n_objects = len(fps) - 1
|
|
80
79
|
comp_sims = [jt_isim_from_sum(linear_sum - fp, n_objects) for fp in fps]
|
|
81
|
-
|
|
82
80
|
return np.array(comp_sims, dtype=np.float64)
|
|
83
81
|
|
|
84
82
|
|
|
85
|
-
def _jt_isim_medoid_index(
|
|
86
|
-
fps: NDArray[np.uint8], input_is_packed: bool = True, n_features: int | None = None
|
|
87
|
-
) -> int:
|
|
88
|
-
return np.argmin(jt_compl_isim(fps, input_is_packed, n_features)).item()
|
|
89
|
-
|
|
90
|
-
|
|
91
83
|
def jt_isim_medoid(
|
|
92
84
|
fps: NDArray[np.uint8],
|
|
93
85
|
input_is_packed: bool = True,
|
|
@@ -110,7 +102,7 @@ def jt_isim_medoid(
|
|
|
110
102
|
if len(fps) < 3:
|
|
111
103
|
idx = 0 # Medoid undefined for sets of 3 or more fingerprints
|
|
112
104
|
else:
|
|
113
|
-
idx =
|
|
105
|
+
idx = np.argmin(jt_compl_isim(fps, input_is_packed, n_features)).item()
|
|
114
106
|
m = fps[idx]
|
|
115
107
|
if pack:
|
|
116
108
|
return idx, pack_fingerprints(m)
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.
|
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.7.3'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 7, 3)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -648,7 +648,7 @@ class BitBirch:
|
|
|
648
648
|
|
|
649
649
|
@merge_criterion.setter
|
|
650
650
|
def merge_criterion(self, value: str) -> None:
|
|
651
|
-
self.set_merge(
|
|
651
|
+
self.set_merge(merge_criterion=value)
|
|
652
652
|
|
|
653
653
|
@property
|
|
654
654
|
def tolerance(self) -> float | None:
|
|
@@ -673,7 +673,7 @@ class BitBirch:
|
|
|
673
673
|
|
|
674
674
|
def set_merge(
|
|
675
675
|
self,
|
|
676
|
-
|
|
676
|
+
merge_criterion: str | MergeAcceptFunction | None = None,
|
|
677
677
|
*,
|
|
678
678
|
tolerance: float | None = None,
|
|
679
679
|
threshold: float | None = None,
|
|
@@ -689,10 +689,10 @@ class BitBirch:
|
|
|
689
689
|
"the global set_merge() function has *not* been used"
|
|
690
690
|
)
|
|
691
691
|
_tolerance = 0.05 if tolerance is None else tolerance
|
|
692
|
-
if isinstance(
|
|
693
|
-
self._merge_accept_fn =
|
|
694
|
-
elif isinstance(
|
|
695
|
-
self._merge_accept_fn = get_merge_accept_fn(
|
|
692
|
+
if isinstance(merge_criterion, MergeAcceptFunction):
|
|
693
|
+
self._merge_accept_fn = merge_criterion
|
|
694
|
+
elif isinstance(merge_criterion, str):
|
|
695
|
+
self._merge_accept_fn = get_merge_accept_fn(merge_criterion, _tolerance)
|
|
696
696
|
if hasattr(self._merge_accept_fn, "tolerance"):
|
|
697
697
|
self._merge_accept_fn.tolerance = _tolerance
|
|
698
698
|
elif tolerance is not None:
|
|
@@ -1101,7 +1101,7 @@ def _run(
|
|
|
1101
1101
|
console.print("Can't save tree for non-lean variants", style="red")
|
|
1102
1102
|
else:
|
|
1103
1103
|
# TODO: Find alternative solution
|
|
1104
|
-
tree.
|
|
1104
|
+
tree.save(out_dir / "bitbirch.pkl")
|
|
1105
1105
|
if variant == "lean":
|
|
1106
1106
|
tree.delete_internal_nodes()
|
|
1107
1107
|
# Dump outputs (peak memory, timings, config, cluster ids)
|
|
@@ -1196,6 +1196,14 @@ def _multiround(
|
|
|
1196
1196
|
bool,
|
|
1197
1197
|
Option("--save-centroids/--no-save-centroids", rich_help_panel="Advanced"),
|
|
1198
1198
|
] = True,
|
|
1199
|
+
sort_fps: Annotated[
|
|
1200
|
+
bool,
|
|
1201
|
+
Option(
|
|
1202
|
+
"--sort-fps/--no-sort-fps",
|
|
1203
|
+
help="Sort the fingerprints by popcount before launching the initial round",
|
|
1204
|
+
rich_help_panel="Advanced",
|
|
1205
|
+
),
|
|
1206
|
+
] = False,
|
|
1199
1207
|
mid_merge_criterion: Annotated[
|
|
1200
1208
|
str,
|
|
1201
1209
|
Option(
|
|
@@ -1389,6 +1397,7 @@ def _multiround(
|
|
|
1389
1397
|
midsection_threshold_change=mid_threshold_change,
|
|
1390
1398
|
tolerance=tolerance,
|
|
1391
1399
|
# Advanced
|
|
1400
|
+
sort_fps=sort_fps,
|
|
1392
1401
|
save_tree=save_tree,
|
|
1393
1402
|
save_centroids=save_centroids,
|
|
1394
1403
|
bin_size=bin_size,
|
|
@@ -1529,6 +1538,13 @@ def _fps_from_smiles(
|
|
|
1529
1538
|
),
|
|
1530
1539
|
),
|
|
1531
1540
|
] = False,
|
|
1541
|
+
tab_separated: Annotated[
|
|
1542
|
+
bool,
|
|
1543
|
+
Option(
|
|
1544
|
+
"--tab-sep/--no-tab-sep",
|
|
1545
|
+
help="Whether the smiles file has the format <smiles><tab><field><tab>...",
|
|
1546
|
+
),
|
|
1547
|
+
] = False,
|
|
1532
1548
|
) -> None:
|
|
1533
1549
|
r"""Generate a `*.npy` fingerprints file from one or more `*.smi` smiles files
|
|
1534
1550
|
|
|
@@ -1634,7 +1650,9 @@ def _fps_from_smiles(
|
|
|
1634
1650
|
with mp_context.Pool(processes=num_ps) as pool:
|
|
1635
1651
|
pool.map(
|
|
1636
1652
|
create_fp_file,
|
|
1637
|
-
_iter_idxs_and_smiles_batches(
|
|
1653
|
+
_iter_idxs_and_smiles_batches(
|
|
1654
|
+
smiles_paths, num_per_batch, tab_separated
|
|
1655
|
+
),
|
|
1638
1656
|
)
|
|
1639
1657
|
timer.end_timing("total", console, indent=False)
|
|
1640
1658
|
stem = out_name.split(".")[0]
|
|
@@ -1674,7 +1692,9 @@ def _fps_from_smiles(
|
|
|
1674
1692
|
with mp_context.Pool(processes=num_ps) as pool:
|
|
1675
1693
|
pool.starmap(
|
|
1676
1694
|
fps_array_filler,
|
|
1677
|
-
_iter_ranges_and_smiles_batches(
|
|
1695
|
+
_iter_ranges_and_smiles_batches(
|
|
1696
|
+
smiles_paths, num_per_batch, tab_separated
|
|
1697
|
+
),
|
|
1678
1698
|
)
|
|
1679
1699
|
fps = np.ndarray((smiles_num, out_dim), dtype=dtype, buffer=fps_shmem.buf)
|
|
1680
1700
|
mask = np.ndarray((smiles_num,), dtype=np.bool, buffer=invalid_mask_shmem.buf)
|
|
@@ -1851,3 +1871,33 @@ def _merge_fps(
|
|
|
1851
1871
|
return
|
|
1852
1872
|
np.save(out_dir / stem, np.concatenate(arrays))
|
|
1853
1873
|
console.print(f"Finished. Outputs written to {str(out_dir / stem)}.npy")
|
|
1874
|
+
|
|
1875
|
+
|
|
1876
|
+
@app.command("fps-sort", rich_help_panel="Fingerprints")
|
|
1877
|
+
def _sort_fps(
|
|
1878
|
+
in_file: Annotated[
|
|
1879
|
+
Path,
|
|
1880
|
+
Argument(help="`*.npy` file with packed fingerprints"),
|
|
1881
|
+
],
|
|
1882
|
+
out_dir: Annotated[
|
|
1883
|
+
Path | None,
|
|
1884
|
+
Option("-o", "--out-dir", show_default=False),
|
|
1885
|
+
] = None,
|
|
1886
|
+
seed: Annotated[
|
|
1887
|
+
int | None,
|
|
1888
|
+
Option("--seed", hidden=True, rich_help_panel="Debug"),
|
|
1889
|
+
] = None,
|
|
1890
|
+
) -> None:
|
|
1891
|
+
import numpy as np
|
|
1892
|
+
from bblean._py_similarity import _popcount
|
|
1893
|
+
|
|
1894
|
+
fps = np.load(in_file)
|
|
1895
|
+
stem = in_file.stem
|
|
1896
|
+
counts = _popcount(fps)
|
|
1897
|
+
sort_idxs = np.argsort(counts)
|
|
1898
|
+
fps = fps[sort_idxs]
|
|
1899
|
+
if out_dir is None:
|
|
1900
|
+
out_dir = Path.cwd()
|
|
1901
|
+
out_dir.mkdir(exist_ok=True)
|
|
1902
|
+
out_dir = out_dir.resolve()
|
|
1903
|
+
np.save(out_dir / f"sorted-{stem}.npy", fps)
|
|
@@ -300,6 +300,75 @@ double jt_isim_from_sum(const CArrayForcecast<uint64_t>& linear_sum,
|
|
|
300
300
|
return a / ((a + (n_objects * sum_kq)) - sum_kqsq);
|
|
301
301
|
}
|
|
302
302
|
|
|
303
|
+
// NOTE: This is only *slightly* faster for C++ than numpy, **only if the
|
|
304
|
+
// array is uint8_t** if the array is uint64 already, it is slower
|
|
305
|
+
template <typename T>
|
|
306
|
+
py::array_t<uint64_t> add_rows(const CArrayForcecast<T>& arr) {
|
|
307
|
+
if (arr.ndim() != 2) {
|
|
308
|
+
throw std::runtime_error("Input array must be 2-dimensional");
|
|
309
|
+
}
|
|
310
|
+
auto arr_ptr = arr.data();
|
|
311
|
+
auto out = py::array_t<uint64_t>(arr.shape(1));
|
|
312
|
+
auto out_ptr = out.mutable_data();
|
|
313
|
+
std::memset(out_ptr, 0, out.nbytes());
|
|
314
|
+
py::ssize_t n_samples = arr.shape(0);
|
|
315
|
+
py::ssize_t n_features = arr.shape(1);
|
|
316
|
+
// Check GCC / CLang vectorize this
|
|
317
|
+
for (py::ssize_t i = 0; i < n_samples; ++i) {
|
|
318
|
+
const uint8_t* arr_row_ptr = arr_ptr + i * n_features;
|
|
319
|
+
for (py::ssize_t j = 0; j < n_features; ++j) {
|
|
320
|
+
out_ptr[j] += static_cast<uint64_t>(arr_row_ptr[j]);
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
return out;
|
|
324
|
+
}
|
|
325
|
+
py::array_t<double> _nochecks_jt_compl_isim_unpacked_u8(
|
|
326
|
+
const py::array_t<uint8_t, py::array::c_style>& fps) {
|
|
327
|
+
py::ssize_t n_objects = fps.shape(0);
|
|
328
|
+
py::ssize_t n_features = fps.shape(1);
|
|
329
|
+
auto out = py::array_t<double>(n_objects);
|
|
330
|
+
auto out_ptr = out.mutable_data();
|
|
331
|
+
|
|
332
|
+
if (n_objects < 3) {
|
|
333
|
+
PyErr_WarnEx(PyExc_RuntimeWarning,
|
|
334
|
+
"Invalid num fps in compl_isim. Expected n_objects >= 3",
|
|
335
|
+
1);
|
|
336
|
+
for (py::ssize_t i{0}; i != n_objects; ++i) {
|
|
337
|
+
out_ptr[i] = std::numeric_limits<double>::quiet_NaN();
|
|
338
|
+
}
|
|
339
|
+
return out;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
auto linear_sum = add_rows<uint8_t>(fps);
|
|
343
|
+
auto ls_cptr = linear_sum.data();
|
|
344
|
+
|
|
345
|
+
py::array_t<uint64_t> shifted_linear_sum(n_features);
|
|
346
|
+
auto shifted_ls_ptr = shifted_linear_sum.mutable_data();
|
|
347
|
+
|
|
348
|
+
auto in_cptr = fps.data();
|
|
349
|
+
for (py::ssize_t i{0}; i != n_objects; ++i) {
|
|
350
|
+
for (py::ssize_t j{0}; j != n_features; ++j) {
|
|
351
|
+
shifted_ls_ptr[j] = ls_cptr[j] - in_cptr[i * n_features + j];
|
|
352
|
+
}
|
|
353
|
+
// For all compl isim N is n_objects - 1
|
|
354
|
+
out_ptr[i] = jt_isim_from_sum(shifted_linear_sum, n_objects - 1);
|
|
355
|
+
}
|
|
356
|
+
return out;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
py::array_t<double> jt_compl_isim(
|
|
360
|
+
const CArrayForcecast<uint8_t>& fps, bool input_is_packed = true,
|
|
361
|
+
std::optional<py::ssize_t> n_features_opt = std::nullopt) {
|
|
362
|
+
if (fps.ndim() != 2) {
|
|
363
|
+
throw std::runtime_error("fps arr must be 2D");
|
|
364
|
+
}
|
|
365
|
+
if (input_is_packed) {
|
|
366
|
+
return _nochecks_jt_compl_isim_unpacked_u8(
|
|
367
|
+
_nochecks_unpack_fingerprints_2d(fps, n_features_opt));
|
|
368
|
+
}
|
|
369
|
+
return _nochecks_jt_compl_isim_unpacked_u8(fps);
|
|
370
|
+
}
|
|
371
|
+
|
|
303
372
|
// Contraint: T must be uint64_t or uint8_t
|
|
304
373
|
template <typename T>
|
|
305
374
|
void _calc_arr_vec_jt(const py::array_t<uint8_t>& arr,
|
|
@@ -372,33 +441,10 @@ py::array_t<double> jt_sim_packed_precalc_cardinalities(
|
|
|
372
441
|
}
|
|
373
442
|
|
|
374
443
|
py::array_t<double> _jt_sim_arr_vec_packed(const py::array_t<uint8_t>& arr,
|
|
375
|
-
|
|
444
|
+
const py::array_t<uint8_t>& vec) {
|
|
376
445
|
return jt_sim_packed_precalc_cardinalities(arr, vec, _popcount_2d(arr));
|
|
377
446
|
}
|
|
378
447
|
|
|
379
|
-
// NOTE: This is only *slightly* faster for C++ than numpy, **only if the
|
|
380
|
-
// array is uint8_t** if the array is uint64 already, it is slower
|
|
381
|
-
template <typename T>
|
|
382
|
-
py::array_t<uint64_t> add_rows(const CArrayForcecast<T>& arr) {
|
|
383
|
-
if (arr.ndim() != 2) {
|
|
384
|
-
throw std::runtime_error("Input array must be 2-dimensional");
|
|
385
|
-
}
|
|
386
|
-
auto arr_ptr = arr.data();
|
|
387
|
-
auto out = py::array_t<uint64_t>(arr.shape(1));
|
|
388
|
-
auto out_ptr = out.mutable_data();
|
|
389
|
-
std::memset(out_ptr, 0, out.nbytes());
|
|
390
|
-
py::ssize_t n_samples = arr.shape(0);
|
|
391
|
-
py::ssize_t n_features = arr.shape(1);
|
|
392
|
-
// Check GCC / CLang vectorize this
|
|
393
|
-
for (py::ssize_t i = 0; i < n_samples; ++i) {
|
|
394
|
-
const uint8_t* arr_row_ptr = arr_ptr + i * n_features;
|
|
395
|
-
for (py::ssize_t j = 0; j < n_features; ++j) {
|
|
396
|
-
out_ptr[j] += static_cast<uint64_t>(arr_row_ptr[j]);
|
|
397
|
-
}
|
|
398
|
-
}
|
|
399
|
-
return out;
|
|
400
|
-
}
|
|
401
|
-
|
|
402
448
|
double jt_isim_unpacked_u8(const CArrayForcecast<uint8_t>& arr) {
|
|
403
449
|
return jt_isim_from_sum(add_rows<uint8_t>(arr), arr.shape(0));
|
|
404
450
|
}
|
|
@@ -406,8 +452,9 @@ double jt_isim_unpacked_u8(const CArrayForcecast<uint8_t>& arr) {
|
|
|
406
452
|
double jt_isim_packed_u8(
|
|
407
453
|
const CArrayForcecast<uint8_t>& arr,
|
|
408
454
|
std::optional<py::ssize_t> n_features_opt = std::nullopt) {
|
|
409
|
-
return jt_isim_from_sum(
|
|
410
|
-
|
|
455
|
+
return jt_isim_from_sum(
|
|
456
|
+
add_rows<uint8_t>(unpack_fingerprints(arr, n_features_opt)),
|
|
457
|
+
arr.shape(0));
|
|
411
458
|
}
|
|
412
459
|
|
|
413
460
|
py::tuple jt_most_dissimilar_packed(
|
|
@@ -510,6 +557,10 @@ PYBIND11_MODULE(_cpp_similarity, m) {
|
|
|
510
557
|
m.def("jt_isim_unpacked_u8", &jt_isim_unpacked_u8,
|
|
511
558
|
"iSIM Tanimoto calculation", py::arg("arr"));
|
|
512
559
|
|
|
560
|
+
m.def("jt_compl_isim", &jt_compl_isim, "Complementary iSIM tanimoto",
|
|
561
|
+
py::arg("fps"), py::arg("input_is_packed") = true,
|
|
562
|
+
py::arg("n_features") = std::nullopt);
|
|
563
|
+
|
|
513
564
|
m.def("_jt_sim_arr_vec_packed", &_jt_sim_arr_vec_packed,
|
|
514
565
|
"Tanimoto similarity between a matrix of packed fps and a single "
|
|
515
566
|
"packed fp",
|
|
@@ -115,7 +115,11 @@ def _get_generator(kind: str, n_features: int) -> tp.Any:
|
|
|
115
115
|
return rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=n_features)
|
|
116
116
|
elif kind == "ecfp6":
|
|
117
117
|
return rdFingerprintGenerator.GetMorganGenerator(radius=3, fpSize=n_features)
|
|
118
|
-
|
|
118
|
+
elif kind == "topological":
|
|
119
|
+
return rdFingerprintGenerator.GetTopologicalTorsionGenerator(fpSize=n_features)
|
|
120
|
+
elif kind == "ap":
|
|
121
|
+
return rdFingerprintGenerator.GetAtomPairGenerator(fpSize=n_features)
|
|
122
|
+
raise ValueError(f"Unknown kind {kind}. Use 'rdkit|ecfp4|ecfp6|topological|ap'")
|
|
119
123
|
|
|
120
124
|
|
|
121
125
|
def _get_sanitize_flags(sanitize: str) -> tp.Any:
|
|
@@ -65,6 +65,7 @@ from bblean._config import DEFAULTS
|
|
|
65
65
|
from bblean.utils import batched
|
|
66
66
|
from bblean.bitbirch import BitBirch
|
|
67
67
|
from bblean.fingerprints import _get_fps_file_num
|
|
68
|
+
from bblean._py_similarity import _popcount
|
|
68
69
|
|
|
69
70
|
__all__ = ["run_multiround_bitbirch"]
|
|
70
71
|
|
|
@@ -157,6 +158,7 @@ class _InitialRound:
|
|
|
157
158
|
max_fps: int | None = None,
|
|
158
159
|
merge_criterion: str = DEFAULTS.merge_criterion,
|
|
159
160
|
input_is_packed: bool = True,
|
|
161
|
+
sort_fps: bool = False,
|
|
160
162
|
) -> None:
|
|
161
163
|
self.n_features = n_features
|
|
162
164
|
self.refinement_before_midsection = refinement_before_midsection
|
|
@@ -171,6 +173,7 @@ class _InitialRound:
|
|
|
171
173
|
self.refine_merge_criterion = refine_merge_criterion
|
|
172
174
|
self.input_is_packed = input_is_packed
|
|
173
175
|
self.refine_threshold_change = refine_threshold_change
|
|
176
|
+
self._sort_fps = sort_fps
|
|
174
177
|
|
|
175
178
|
def __call__(self, file_info: tuple[str, Path, int, int]) -> None:
|
|
176
179
|
file_label, fp_file, start_idx, end_idx = file_info
|
|
@@ -182,6 +185,14 @@ class _InitialRound:
|
|
|
182
185
|
threshold=self.threshold,
|
|
183
186
|
merge_criterion=self.merge_criterion,
|
|
184
187
|
)
|
|
188
|
+
if self._sort_fps:
|
|
189
|
+
fp_input = np.load(fp_file)
|
|
190
|
+
counts = _popcount(fp_input)
|
|
191
|
+
sort_idxs = np.argsort(counts)
|
|
192
|
+
fp_input = fp_input[sort_idxs]
|
|
193
|
+
else:
|
|
194
|
+
fp_input = fp_file
|
|
195
|
+
|
|
185
196
|
range_ = range(start_idx, end_idx)
|
|
186
197
|
tree.fit(
|
|
187
198
|
fp_file,
|
|
@@ -201,7 +212,7 @@ class _InitialRound:
|
|
|
201
212
|
# Finish the first refinement step internally in this round
|
|
202
213
|
tree.reset()
|
|
203
214
|
tree.set_merge(
|
|
204
|
-
self.refine_merge_criterion,
|
|
215
|
+
merge_criterion=self.refine_merge_criterion,
|
|
205
216
|
tolerance=self.tolerance,
|
|
206
217
|
threshold=self.threshold + self.refine_threshold_change,
|
|
207
218
|
)
|
|
@@ -225,7 +236,7 @@ class _TreeMergingRound:
|
|
|
225
236
|
round_idx: int,
|
|
226
237
|
out_dir: Path | str,
|
|
227
238
|
split_largest_cluster: bool,
|
|
228
|
-
|
|
239
|
+
merge_criterion: str,
|
|
229
240
|
all_fp_paths: tp.Sequence[Path] = (),
|
|
230
241
|
) -> None:
|
|
231
242
|
self.all_fp_paths = list(all_fp_paths)
|
|
@@ -235,14 +246,14 @@ class _TreeMergingRound:
|
|
|
235
246
|
self.round_idx = round_idx
|
|
236
247
|
self.out_dir = Path(out_dir)
|
|
237
248
|
self.split_largest_cluster = split_largest_cluster
|
|
238
|
-
self.
|
|
249
|
+
self.merge_criterion = merge_criterion
|
|
239
250
|
|
|
240
251
|
def __call__(self, batch_info: tuple[str, tp.Sequence[tuple[Path, Path]]]) -> None:
|
|
241
252
|
batch_label, batch_path_pairs = batch_info
|
|
242
253
|
tree = BitBirch(
|
|
243
254
|
branching_factor=self.branching_factor,
|
|
244
255
|
threshold=self.threshold,
|
|
245
|
-
merge_criterion=self.
|
|
256
|
+
merge_criterion=self.merge_criterion,
|
|
246
257
|
tolerance=self.tolerance,
|
|
247
258
|
)
|
|
248
259
|
# Rebuild a tree, inserting all BitFeatures from the corresponding batch
|
|
@@ -270,13 +281,20 @@ class _FinalTreeMergingRound(_TreeMergingRound):
|
|
|
270
281
|
branching_factor: int,
|
|
271
282
|
threshold: float,
|
|
272
283
|
tolerance: float,
|
|
273
|
-
|
|
284
|
+
merge_criterion: str,
|
|
274
285
|
out_dir: Path | str,
|
|
275
286
|
save_tree: bool,
|
|
276
287
|
save_centroids: bool,
|
|
277
288
|
) -> None:
|
|
278
289
|
super().__init__(
|
|
279
|
-
branching_factor,
|
|
290
|
+
branching_factor,
|
|
291
|
+
threshold,
|
|
292
|
+
tolerance,
|
|
293
|
+
-1,
|
|
294
|
+
out_dir,
|
|
295
|
+
False,
|
|
296
|
+
merge_criterion,
|
|
297
|
+
(),
|
|
280
298
|
)
|
|
281
299
|
self.save_tree = save_tree
|
|
282
300
|
self.save_centroids = save_centroids
|
|
@@ -286,7 +304,7 @@ class _FinalTreeMergingRound(_TreeMergingRound):
|
|
|
286
304
|
tree = BitBirch(
|
|
287
305
|
branching_factor=self.branching_factor,
|
|
288
306
|
threshold=self.threshold,
|
|
289
|
-
merge_criterion=self.
|
|
307
|
+
merge_criterion=self.merge_criterion,
|
|
290
308
|
tolerance=self.tolerance,
|
|
291
309
|
)
|
|
292
310
|
# Rebuild a tree, inserting all BitFeatures from the corresponding batch
|
|
@@ -299,7 +317,7 @@ class _FinalTreeMergingRound(_TreeMergingRound):
|
|
|
299
317
|
# Save clusters and exit
|
|
300
318
|
if self.save_tree:
|
|
301
319
|
# TODO: Find alternative solution
|
|
302
|
-
tree.
|
|
320
|
+
tree.save(self.out_dir / "bitbirch.pkl")
|
|
303
321
|
tree.delete_internal_nodes()
|
|
304
322
|
if self.save_centroids:
|
|
305
323
|
output = tree.get_centroids_mol_ids()
|
|
@@ -353,6 +371,7 @@ def run_multiround_bitbirch(
|
|
|
353
371
|
mp_context: tp.Any = None,
|
|
354
372
|
save_tree: bool = False,
|
|
355
373
|
save_centroids: bool = True,
|
|
374
|
+
sort_fps: bool = False,
|
|
356
375
|
# Debug
|
|
357
376
|
max_fps: int | None = None,
|
|
358
377
|
verbose: bool = False,
|
|
@@ -399,6 +418,7 @@ def run_multiround_bitbirch(
|
|
|
399
418
|
console.print(f"(Initial) Round {round_idx}: Cluster initial batch of fingerprints")
|
|
400
419
|
|
|
401
420
|
initial_fn = _InitialRound(
|
|
421
|
+
sort_fps=sort_fps,
|
|
402
422
|
n_features=n_features,
|
|
403
423
|
refinement_before_midsection=refinement_before_midsection,
|
|
404
424
|
max_fps=max_fps,
|
|
@@ -436,7 +456,7 @@ def run_multiround_bitbirch(
|
|
|
436
456
|
round_idx=round_idx,
|
|
437
457
|
all_fp_paths=input_files,
|
|
438
458
|
split_largest_cluster=split_largest_after_each_midsection_round,
|
|
439
|
-
|
|
459
|
+
merge_criterion=midsection_merge_criterion,
|
|
440
460
|
threshold=threshold + midsection_threshold_change,
|
|
441
461
|
**common_kwargs,
|
|
442
462
|
)
|
|
@@ -464,7 +484,7 @@ def run_multiround_bitbirch(
|
|
|
464
484
|
final_fn = _FinalTreeMergingRound(
|
|
465
485
|
save_tree=save_tree,
|
|
466
486
|
save_centroids=save_centroids,
|
|
467
|
-
|
|
487
|
+
merge_criterion=final_merge_criterion,
|
|
468
488
|
threshold=threshold + midsection_threshold_change,
|
|
469
489
|
**common_kwargs,
|
|
470
490
|
)
|
|
@@ -34,12 +34,8 @@ __all__ = [
|
|
|
34
34
|
"jt_sim_matrix_packed",
|
|
35
35
|
]
|
|
36
36
|
|
|
37
|
-
from bblean._py_similarity import
|
|
38
|
-
|
|
39
|
-
centroid,
|
|
40
|
-
jt_compl_isim,
|
|
41
|
-
jt_isim_medoid,
|
|
42
|
-
)
|
|
37
|
+
from bblean._py_similarity import centroid_from_sum, centroid
|
|
38
|
+
from bblean.fingerprints import pack_fingerprints, unpack_fingerprints
|
|
43
39
|
|
|
44
40
|
# jt_isim_packed and jt_isim_unpacked are not exposed, only used within functions for
|
|
45
41
|
# speed
|
|
@@ -49,6 +45,7 @@ if os.getenv("BITBIRCH_NO_EXTENSIONS"):
|
|
|
49
45
|
jt_isim_from_sum,
|
|
50
46
|
jt_isim_unpacked,
|
|
51
47
|
jt_isim_packed,
|
|
48
|
+
jt_compl_isim,
|
|
52
49
|
_jt_sim_arr_vec_packed,
|
|
53
50
|
jt_most_dissimilar_packed,
|
|
54
51
|
)
|
|
@@ -56,11 +53,13 @@ else:
|
|
|
56
53
|
try:
|
|
57
54
|
from bblean._cpp_similarity import ( # type: ignore
|
|
58
55
|
jt_isim_from_sum,
|
|
59
|
-
_jt_sim_arr_vec_packed,
|
|
60
56
|
jt_isim_unpacked_u8,
|
|
61
57
|
jt_isim_packed_u8,
|
|
58
|
+
jt_compl_isim, # TODO: Does it need wrappers for non-uint8?
|
|
59
|
+
_jt_sim_arr_vec_packed,
|
|
62
60
|
jt_most_dissimilar_packed,
|
|
63
|
-
|
|
61
|
+
# Needed for wrappers
|
|
62
|
+
unpack_fingerprints as _unpack_fingerprints,
|
|
64
63
|
)
|
|
65
64
|
|
|
66
65
|
# Wrap these two since doing
|
|
@@ -80,7 +79,7 @@ else:
|
|
|
80
79
|
if arr.dtype == np.uint64:
|
|
81
80
|
return jt_isim_from_sum(
|
|
82
81
|
np.sum(
|
|
83
|
-
|
|
82
|
+
_unpack_fingerprints(arr, n_features), # type: ignore
|
|
84
83
|
axis=0,
|
|
85
84
|
dtype=np.uint64,
|
|
86
85
|
),
|
|
@@ -93,6 +92,7 @@ else:
|
|
|
93
92
|
jt_isim_from_sum,
|
|
94
93
|
jt_isim_unpacked,
|
|
95
94
|
jt_isim_packed,
|
|
95
|
+
jt_compl_isim,
|
|
96
96
|
_jt_sim_arr_vec_packed,
|
|
97
97
|
jt_most_dissimilar_packed,
|
|
98
98
|
)
|
|
@@ -103,6 +103,35 @@ else:
|
|
|
103
103
|
)
|
|
104
104
|
|
|
105
105
|
|
|
106
|
+
def jt_isim_medoid(
|
|
107
|
+
fps: NDArray[np.uint8],
|
|
108
|
+
input_is_packed: bool = True,
|
|
109
|
+
n_features: int | None = None,
|
|
110
|
+
pack: bool = True,
|
|
111
|
+
) -> tuple[int, NDArray[np.uint8]]:
|
|
112
|
+
r"""Calculate the (Tanimoto) medoid of a set of fingerprints, using iSIM
|
|
113
|
+
|
|
114
|
+
Returns both the index of the medoid in the input array and the medoid itself
|
|
115
|
+
|
|
116
|
+
.. note::
|
|
117
|
+
Returns the first (or only) fingerprint for array of size 2 and 1 respectively.
|
|
118
|
+
Raises ValueError for arrays of size 0
|
|
119
|
+
|
|
120
|
+
"""
|
|
121
|
+
if not fps.size:
|
|
122
|
+
raise ValueError("Size of fingerprints set must be > 0")
|
|
123
|
+
if input_is_packed:
|
|
124
|
+
fps = unpack_fingerprints(fps, n_features)
|
|
125
|
+
if len(fps) < 3:
|
|
126
|
+
idx = 0 # Medoid undefined for sets of 3 or more fingerprints
|
|
127
|
+
else:
|
|
128
|
+
idx = np.argmin(jt_compl_isim(fps, input_is_packed, n_features)).item()
|
|
129
|
+
m = fps[idx]
|
|
130
|
+
if pack:
|
|
131
|
+
return idx, pack_fingerprints(m)
|
|
132
|
+
return idx, m
|
|
133
|
+
|
|
134
|
+
|
|
106
135
|
def jt_isim(
|
|
107
136
|
fps: NDArray[np.integer],
|
|
108
137
|
input_is_packed: bool = True,
|
|
@@ -149,7 +178,11 @@ def jt_isim_diameter(
|
|
|
149
178
|
r"""Calculate the Tanimoto diameter of a set of fingerprints"""
|
|
150
179
|
return jt_isim_diameter_from_sum(
|
|
151
180
|
np.sum(
|
|
152
|
-
|
|
181
|
+
(
|
|
182
|
+
unpack_fingerprints(arr.astype(np.uint8, copy=False), n_features)
|
|
183
|
+
if input_is_packed
|
|
184
|
+
else arr
|
|
185
|
+
),
|
|
153
186
|
axis=0,
|
|
154
187
|
dtype=np.uint64,
|
|
155
188
|
), # type: ignore
|
|
@@ -165,7 +198,11 @@ def jt_isim_radius(
|
|
|
165
198
|
r"""Calculate the Tanimoto radius of a set of fingerprints"""
|
|
166
199
|
return jt_isim_radius_from_sum(
|
|
167
200
|
np.sum(
|
|
168
|
-
|
|
201
|
+
(
|
|
202
|
+
unpack_fingerprints(arr.astype(np.uint8, copy=False), n_features)
|
|
203
|
+
if input_is_packed
|
|
204
|
+
else arr
|
|
205
|
+
),
|
|
169
206
|
axis=0,
|
|
170
207
|
dtype=np.uint64,
|
|
171
208
|
), # type: ignore
|
|
@@ -181,7 +218,11 @@ def jt_isim_radius_compl(
|
|
|
181
218
|
r"""Calculate the complement of the Tanimoto radius of a set of fingerprints"""
|
|
182
219
|
return jt_isim_radius_compl_from_sum(
|
|
183
220
|
np.sum(
|
|
184
|
-
|
|
221
|
+
(
|
|
222
|
+
unpack_fingerprints(arr.astype(np.uint8, copy=False), n_features)
|
|
223
|
+
if input_is_packed
|
|
224
|
+
else arr
|
|
225
|
+
),
|
|
185
226
|
axis=0,
|
|
186
227
|
dtype=np.uint64,
|
|
187
228
|
), # type: ignore
|
|
@@ -252,14 +293,28 @@ def estimate_jt_std(
|
|
|
252
293
|
n_samples: int | None = None,
|
|
253
294
|
input_is_packed: bool = True,
|
|
254
295
|
n_features: int | None = None,
|
|
296
|
+
min_samples: int = 1_000_000,
|
|
255
297
|
) -> float:
|
|
256
|
-
r"""Estimate std of
|
|
298
|
+
r"""Estimate the std of all pairwise Tanimoto.
|
|
299
|
+
|
|
300
|
+
Returns
|
|
301
|
+
-------
|
|
302
|
+
std : float
|
|
303
|
+
The standard deviation of all pairwise Tanimoto among the sampled fingerprints.
|
|
304
|
+
"""
|
|
257
305
|
num_fps = len(fps)
|
|
306
|
+
if num_fps > min_samples:
|
|
307
|
+
np.random.seed(42)
|
|
308
|
+
random_choices = np.random.choice(num_fps, size=min_samples, replace=False)
|
|
309
|
+
fps = fps[random_choices]
|
|
310
|
+
num_fps = len(fps)
|
|
258
311
|
if n_samples is None:
|
|
259
|
-
|
|
312
|
+
# Heuristic: use at least 50 samples, or 1 per 10,000 fingerprints,
|
|
313
|
+
# to balance statistical representativeness and computational efficiency
|
|
314
|
+
n_samples = max(num_fps // 10_000, 50)
|
|
260
315
|
sample_idxs = jt_stratified_sampling(fps, n_samples, input_is_packed, n_features)
|
|
261
316
|
|
|
262
|
-
# Work with
|
|
317
|
+
# Work with only the sampled fingerprints
|
|
263
318
|
fps = fps[sample_idxs]
|
|
264
319
|
num_fps = len(fps)
|
|
265
320
|
pairs = np.empty(num_fps * (num_fps - 1) // 2, dtype=np.float64)
|