bblean 0.7.8__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bblean-0.7.8 → bblean-0.8.0}/PKG-INFO +1 -1
- {bblean-0.7.8 → bblean-0.8.0}/bblean/_version.py +2 -2
- {bblean-0.7.8 → bblean-0.8.0}/bblean/bitbirch.py +59 -58
- {bblean-0.7.8 → bblean-0.8.0}/bblean.egg-info/PKG-INFO +1 -1
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_simple.py +18 -2
- {bblean-0.7.8 → bblean-0.8.0}/.cruft.json +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/.flake8 +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/.github/CODEOWNERS +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/.github/workflows/ci-cpp.yaml +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/.github/workflows/ci.yaml +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/.github/workflows/upload-to-pypi.yaml +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/.gitignore +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/.pre-commit-config.yaml +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/LICENSE +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/LICENSES/BSD-3-Clause.txt +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/LICENSES/GPL-3.0-only.txt +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/README.md +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/__init__.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/_config.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/_console.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/_legacy/__init__.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/_legacy/bb_int64.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/_legacy/bb_uint8.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/_memory.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/_merges.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/_py_similarity.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/_timer.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/analysis.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/cli.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/csrc/README.md +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/csrc/similarity.cpp +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/fingerprints.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/metrics.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/multiround.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/plotting.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/similarity.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/sklearn.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/smiles.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean/utils.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean-demo-v2.gif +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean-demo.cast +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean.egg-info/SOURCES.txt +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean.egg-info/dependency_links.txt +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean.egg-info/entry_points.txt +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean.egg-info/requires.txt +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/bblean.egg-info/top_level.txt +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/_static/api.svg +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/_static/installing.svg +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/_static/logo-dark-bw.svg +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/_static/logo-light-bw.svg +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/_static/publications.svg +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/_static/style.css +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/_static/user-guide.svg +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/_templates/module.rst +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/api-reference.rst +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/conf.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/index.rst +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/installing.rst +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/publications.rst +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/user-guide/linux_memory_setup.rst +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/user-guide/notebooks/bitbirch_best_practices.ipynb +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/user-guide/notebooks/bitbirch_quickstart.ipynb +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/user-guide/parameters.rst +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/docs/src/user-guide.rst +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/environment.yaml +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/examples/best_practices/best_practices_functions.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/examples/best_practices/best_practices_plots.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/examples/best_practices/bitbirch_best_practices.ipynb +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/examples/best_practices/bitbirch_best_practices_RDKit.ipynb +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/examples/best_practices/bitbirch_parameter.ipynb +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/examples/biogen_logS.csv +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/examples/bitbirch_best_practices.ipynb +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/examples/bitbirch_quickstart.ipynb +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/examples/chembl-33-natural-products-subset.smi +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/examples/dataset_splitting.ipynb +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/pyproject.toml +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/setup.cfg +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/setup.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/chembl-sample-3k.smi +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/chembl-sample-bad.smi +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/legacy_merges.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/legacy_metrics.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_bb_consistency.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_cli.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_fake_fps.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_fingerprints.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_global_clustering.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_import_bblean.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_merges.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_metrics.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_multiround.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_refine.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_regression.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_sampling.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_similarity.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_sklearn.py +0 -0
- {bblean-0.7.8 → bblean-0.8.0}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bblean
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: BitBirch-Lean Python package
|
|
5
5
|
Author: The Miranda-Quintana Lab and other BitBirch developers
|
|
6
6
|
Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.
|
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.8.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 8, 0)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -47,6 +47,7 @@
|
|
|
47
47
|
# ./LICENSES/GPL-3.0-only.txt. If not, see <http://www.gnu.org/licenses/gpl-3.0.html>.
|
|
48
48
|
r"""BitBirch 'Lean' class for fast, memory-efficient O(N) clustering"""
|
|
49
49
|
from __future__ import annotations # Stringize type annotations for no runtime overhead
|
|
50
|
+
import itertools
|
|
50
51
|
import pickle
|
|
51
52
|
import sys
|
|
52
53
|
import typing_extensions as tpx
|
|
@@ -171,8 +172,8 @@ def _split_node(node: "_BFNode") -> tuple["_BFSubcluster", "_BFSubcluster"]:
|
|
|
171
172
|
"""
|
|
172
173
|
n_features = node.n_features
|
|
173
174
|
branching_factor = node.branching_factor
|
|
174
|
-
new_subcluster1 = _BFSubcluster(n_features
|
|
175
|
-
new_subcluster2 = _BFSubcluster(n_features
|
|
175
|
+
new_subcluster1 = _BFSubcluster.empty(n_features)
|
|
176
|
+
new_subcluster2 = _BFSubcluster.empty(n_features)
|
|
176
177
|
|
|
177
178
|
node1 = _BFNode(branching_factor, n_features)
|
|
178
179
|
node2 = node # Rename for clarity
|
|
@@ -394,13 +395,15 @@ class _BFSubcluster:
|
|
|
394
395
|
|
|
395
396
|
def __init__(
|
|
396
397
|
self,
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
n_features: int = 2048,
|
|
401
|
-
buffer: NDArray[np.integer] | None = None,
|
|
398
|
+
buffer: NDArray[np.integer],
|
|
399
|
+
mol_indices: tp.Sequence[int],
|
|
400
|
+
packed_centroid: NDArray[np.uint8] | None = None,
|
|
402
401
|
check_indices: bool = True,
|
|
403
|
-
):
|
|
402
|
+
) -> None:
|
|
403
|
+
# If packed centroid is passed, it must be equal to the packed centroid
|
|
404
|
+
# of the linear sum (this is not checked)
|
|
405
|
+
if mol_indices and check_indices and buffer[-1] != len(mol_indices):
|
|
406
|
+
raise ValueError("len mol_indices must be equal to buffer[-1] if specified")
|
|
404
407
|
# NOTE: Internally, _buffer holds both "linear_sum" and "n_samples" It is
|
|
405
408
|
# guaranteed to always have the minimum required uint dtype It should not be
|
|
406
409
|
# accessed by external classes, only used internally. The individual parts can
|
|
@@ -409,44 +412,40 @@ class _BFSubcluster:
|
|
|
409
412
|
#
|
|
410
413
|
# IMPORTANT: To mutate instances of this class, *always* use the public API
|
|
411
414
|
# given by replace|add_to_n_samples_and_linear_sum(...)
|
|
412
|
-
|
|
413
|
-
if linear_sum is not None:
|
|
414
|
-
raise ValueError("'linear_sum' and 'buffer' are mutually exclusive")
|
|
415
|
-
if check_indices and len(mol_indices) != buffer[-1]:
|
|
416
|
-
raise ValueError(
|
|
417
|
-
"Expected len(mol_indices) == buffer[-1],"
|
|
418
|
-
f" but found {len(mol_indices)} != {buffer[-1]}"
|
|
419
|
-
)
|
|
420
|
-
self._buffer = buffer
|
|
421
|
-
self.packed_centroid = centroid_from_sum(buffer[:-1], buffer[-1], pack=True)
|
|
422
|
-
else:
|
|
423
|
-
if linear_sum is not None:
|
|
424
|
-
if check_indices and len(mol_indices) != 1:
|
|
425
|
-
raise ValueError(
|
|
426
|
-
"Expected len(mol_indices) == 1,"
|
|
427
|
-
f" but found {len(mol_indices)} != 1"
|
|
428
|
-
)
|
|
429
|
-
buffer = np.empty((len(linear_sum) + 1,), dtype=np.uint8)
|
|
430
|
-
buffer[:-1] = linear_sum
|
|
431
|
-
buffer[-1] = 1
|
|
432
|
-
self._buffer = buffer
|
|
433
|
-
self.packed_centroid = pack_fingerprints(
|
|
434
|
-
linear_sum.astype(np.uint8, copy=False)
|
|
435
|
-
)
|
|
436
|
-
else:
|
|
437
|
-
# Empty subcluster
|
|
438
|
-
if check_indices and len(mol_indices) != 0:
|
|
439
|
-
raise ValueError(
|
|
440
|
-
"Expected len(mol_indices) == 0 for empty subcluster,"
|
|
441
|
-
f" but found {len(mol_indices)} != 0"
|
|
442
|
-
)
|
|
443
|
-
self._buffer = np.zeros((n_features + 1,), dtype=np.uint8)
|
|
444
|
-
self.packed_centroid = np.empty(
|
|
445
|
-
0, dtype=np.uint8
|
|
446
|
-
) # Will be overwritten
|
|
415
|
+
self._buffer = buffer
|
|
447
416
|
self.mol_indices = list(mol_indices)
|
|
417
|
+
if packed_centroid is not None:
|
|
418
|
+
self.packed_centroid = packed_centroid
|
|
419
|
+
else:
|
|
420
|
+
self.packed_centroid = centroid_from_sum(buffer[:-1], buffer[-1], pack=True)
|
|
448
421
|
self.child: tp.Optional["_BFNode"] = None
|
|
449
422
|
|
|
423
|
+
@classmethod
|
|
424
|
+
def empty(cls, n_features: int) -> tpx.Self:
|
|
425
|
+
packed_centroid = np.empty(0, dtype=np.uint8) # Will be overwritten
|
|
426
|
+
return cls(
|
|
427
|
+
np.zeros((n_features + 1,), dtype=np.uint8),
|
|
428
|
+
[],
|
|
429
|
+
packed_centroid,
|
|
430
|
+
check_indices=False,
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
@classmethod
|
|
434
|
+
def from_fingerprint(
|
|
435
|
+
cls, fp: NDArray[np.uint8], index: int, weight: int | None = None
|
|
436
|
+
) -> tpx.Self:
|
|
437
|
+
if weight is not None:
|
|
438
|
+
buffer = np.empty((len(fp) + 1,), dtype=min_safe_uint(weight))
|
|
439
|
+
buffer[:-1] = fp
|
|
440
|
+
buffer[-1] = 1
|
|
441
|
+
buffer *= weight
|
|
442
|
+
else:
|
|
443
|
+
buffer = np.empty((len(fp) + 1,), dtype=np.uint8)
|
|
444
|
+
buffer[:-1] = fp
|
|
445
|
+
buffer[-1] = 1
|
|
446
|
+
packed_centroid = pack_fingerprints(fp)
|
|
447
|
+
return cls(buffer, [index], packed_centroid, check_indices=False)
|
|
448
|
+
|
|
450
449
|
@property
|
|
451
450
|
def unpacked_centroid(self) -> NDArray[np.uint8]:
|
|
452
451
|
return _unpack_fingerprints(self.packed_centroid, self.n_features)
|
|
@@ -711,6 +710,7 @@ class BitBirch:
|
|
|
711
710
|
input_is_packed: bool = True,
|
|
712
711
|
n_features: int | None = None,
|
|
713
712
|
max_fps: int | None = None,
|
|
713
|
+
weights: tp.Iterable[int] | None = None,
|
|
714
714
|
) -> tpx.Self:
|
|
715
715
|
r"""Build a BF Tree for the input data.
|
|
716
716
|
|
|
@@ -763,15 +763,19 @@ class BitBirch:
|
|
|
763
763
|
else:
|
|
764
764
|
iterable = zip(reinsert_indices, arr_iterable)
|
|
765
765
|
|
|
766
|
+
it_weights: tp.Iterator[int | None]
|
|
767
|
+
if weights is None:
|
|
768
|
+
it_weights = itertools.repeat(None)
|
|
769
|
+
else:
|
|
770
|
+
it_weights = iter(weights)
|
|
771
|
+
|
|
766
772
|
threshold = self.threshold
|
|
767
773
|
branching_factor = self.branching_factor
|
|
768
774
|
merge_accept_fn = self._merge_accept_fn
|
|
769
775
|
|
|
770
776
|
arr_idx = 0
|
|
771
777
|
for idx, fp in iterable:
|
|
772
|
-
subcluster = _BFSubcluster(
|
|
773
|
-
linear_sum=fp, mol_indices=[idx], n_features=n_features
|
|
774
|
-
)
|
|
778
|
+
subcluster = _BFSubcluster.from_fingerprint(fp, idx, next(it_weights))
|
|
775
779
|
split = self._root.insert_bf_subcluster(
|
|
776
780
|
subcluster, merge_accept_fn, threshold
|
|
777
781
|
)
|
|
@@ -791,22 +795,22 @@ class BitBirch:
|
|
|
791
795
|
def _fit_buffers(
|
|
792
796
|
self,
|
|
793
797
|
X: _Input | Path | str,
|
|
794
|
-
reinsert_index_seqs:
|
|
795
|
-
|
|
796
|
-
) = "omit",
|
|
798
|
+
reinsert_index_seqs: tp.Iterable[tp.Sequence[int]] | None,
|
|
799
|
+
check_indices: bool = True,
|
|
797
800
|
) -> tpx.Self:
|
|
798
801
|
r"""Build a BF Tree starting from buffers
|
|
799
802
|
|
|
800
803
|
Buffers are arrays of the form:
|
|
801
804
|
- buffer[0:-1] = linear_sum
|
|
802
805
|
- buffer[-1] = n_samples
|
|
803
|
-
|
|
806
|
+
X is either an array or a list of such buffers
|
|
804
807
|
|
|
805
808
|
If `reinsert_index_seqs` is passed, X corresponds only to the buffers to be
|
|
806
809
|
reinserted into the tree, and `reinsert_index_seqs` are the sequences
|
|
807
810
|
of indices associated with such buffers.
|
|
808
811
|
|
|
809
|
-
If `reinsert_index_seqs` is
|
|
812
|
+
If `reinsert_index_seqs` is None, then no indices are collected in the tree.
|
|
813
|
+
Num samples is mutually exclusive with reinsert_index_seqs.
|
|
810
814
|
|
|
811
815
|
Parameters
|
|
812
816
|
----------
|
|
@@ -840,16 +844,13 @@ class BitBirch:
|
|
|
840
844
|
branching_factor = self.branching_factor
|
|
841
845
|
idx_provider: tp.Iterable[tp.Sequence[int]]
|
|
842
846
|
arr_idx = 0
|
|
843
|
-
if reinsert_index_seqs
|
|
844
|
-
idx_provider = (()
|
|
845
|
-
check = False
|
|
847
|
+
if reinsert_index_seqs is None:
|
|
848
|
+
idx_provider = itertools.repeat(())
|
|
846
849
|
else:
|
|
847
850
|
idx_provider = reinsert_index_seqs
|
|
848
|
-
|
|
851
|
+
|
|
849
852
|
for idxs, buf in zip(idx_provider, arr_iterable):
|
|
850
|
-
subcluster = _BFSubcluster(
|
|
851
|
-
buffer=buf, mol_indices=idxs, n_features=n_features, check_indices=check
|
|
852
|
-
)
|
|
853
|
+
subcluster = _BFSubcluster(buf, idxs, check_indices=check_indices)
|
|
853
854
|
split = self._root.insert_bf_subcluster(
|
|
854
855
|
subcluster, merge_accept_fn, threshold
|
|
855
856
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bblean
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: BitBirch-Lean Python package
|
|
5
5
|
Author: The Miranda-Quintana Lab and other BitBirch developers
|
|
6
6
|
Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
import itertools
|
|
1
2
|
import pytest
|
|
2
3
|
import numpy as np
|
|
3
4
|
|
|
4
|
-
from bblean.bitbirch import BitBirch
|
|
5
|
-
from bblean.fingerprints import pack_fingerprints
|
|
5
|
+
from bblean.bitbirch import BitBirch
|
|
6
|
+
from bblean.fingerprints import pack_fingerprints, make_fake_fingerprints
|
|
7
|
+
|
|
8
|
+
from inline_snapshot import snapshot
|
|
6
9
|
|
|
7
10
|
# NOTE: Results on this file don't depend on branching factor / threshold
|
|
8
11
|
|
|
@@ -37,3 +40,16 @@ def test_bb_cluster_simple_repeated_fps() -> None:
|
|
|
37
40
|
)
|
|
38
41
|
ids = BitBirch().fit(mixed_fp, n_features=2048).get_cluster_mol_ids()
|
|
39
42
|
assert ids == [list(range(repeats))]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_bb_cluster_3_fps() -> None:
|
|
46
|
+
fps = make_fake_fingerprints(3, n_features=8, seed=12620509540149709235, pack=True)
|
|
47
|
+
|
|
48
|
+
data = BitBirch().fit(fps).get_cluster_mol_ids()
|
|
49
|
+
assert data == snapshot([[0], [1], [2]])
|
|
50
|
+
data = BitBirch().fit(fps, weights=itertools.repeat(5)).get_cluster_mol_ids()
|
|
51
|
+
assert data == snapshot([[1, 2], [0]])
|
|
52
|
+
data = BitBirch().fit(fps, weights=itertools.repeat(10000)).get_cluster_mol_ids()
|
|
53
|
+
assert data == snapshot([[1, 2], [0]])
|
|
54
|
+
data = BitBirch().fit(fps, weights=itertools.repeat(1000000)).get_cluster_mol_ids()
|
|
55
|
+
assert data == snapshot([[1, 2], [0]])
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|