bblean 0.6.0b2__cp312-cp312-win_amd64.whl → 0.7.2b0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
@@ -633,6 +633,7 @@ class BitBirch:
633
633
  X = X[:max_fps]
634
634
  threshold = self.threshold
635
635
  branching_factor = self.branching_factor
636
+
636
637
  n_features = _validate_n_features(X, input_is_packed, n_features)
637
638
  d_type = X.dtype
638
639
 
@@ -718,6 +719,7 @@ class BitBirch:
718
719
  """
719
720
  threshold = self.threshold
720
721
  branching_factor = self.branching_factor
722
+
721
723
  n_features = _validate_n_features(X, input_is_packed, n_features)
722
724
  d_type = X.dtype
723
725
 
bblean/_py_similarity.py CHANGED
@@ -76,18 +76,10 @@ def jt_compl_isim(
76
76
  warnings.warn(msg, RuntimeWarning, stacklevel=2)
77
77
  return np.full(len(fps), fill_value=np.nan, dtype=np.float64)
78
78
  linear_sum = np.sum(fps, axis=0)
79
- n_objects = len(fps) - 1
80
79
  comp_sims = [jt_isim_from_sum(linear_sum - fp, n_objects) for fp in fps]
81
-
82
80
  return np.array(comp_sims, dtype=np.float64)
83
81
 
84
82
 
85
- def _jt_isim_medoid_index(
86
- fps: NDArray[np.uint8], input_is_packed: bool = True, n_features: int | None = None
87
- ) -> int:
88
- return np.argmin(jt_compl_isim(fps, input_is_packed, n_features)).item()
89
-
90
-
91
83
  def jt_isim_medoid(
92
84
  fps: NDArray[np.uint8],
93
85
  input_is_packed: bool = True,
@@ -110,7 +102,7 @@ def jt_isim_medoid(
110
102
  if len(fps) < 3:
111
103
  idx = 0 # Medoid undefined for sets of 3 or more fingerprints
112
104
  else:
113
- idx = _jt_isim_medoid_index(fps, input_is_packed=False)
105
+ idx = np.argmin(jt_compl_isim(fps, input_is_packed, n_features)).item()
114
106
  m = fps[idx]
115
107
  if pack:
116
108
  return idx, pack_fingerprints(m)
bblean/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.6.0b2'
32
- __version_tuple__ = version_tuple = (0, 6, 0, 'b2')
31
+ __version__ = version = '0.7.2b0'
32
+ __version_tuple__ = version_tuple = (0, 7, 2, 'b0')
33
33
 
34
- __commit_id__ = commit_id = None
34
+ __commit_id__ = commit_id = 'g36216813a'
bblean/bitbirch.py CHANGED
@@ -47,6 +47,8 @@
47
47
  # ./LICENSES/GPL-3.0-only.txt. If not, see <http://www.gnu.org/licenses/gpl-3.0.html>.
48
48
  r"""BitBirch 'Lean' class for fast, memory-efficient O(N) clustering"""
49
49
  from __future__ import annotations # Stringize type annotations for no runtime overhead
50
+ import pickle
51
+ import sys
50
52
  import typing_extensions as tpx
51
53
  import os
52
54
  import random
@@ -646,7 +648,7 @@ class BitBirch:
646
648
 
647
649
  @merge_criterion.setter
648
650
  def merge_criterion(self, value: str) -> None:
649
- self.set_merge(criterion=value)
651
+ self.set_merge(merge_criterion=value)
650
652
 
651
653
  @property
652
654
  def tolerance(self) -> float | None:
@@ -671,7 +673,7 @@ class BitBirch:
671
673
 
672
674
  def set_merge(
673
675
  self,
674
- criterion: str | MergeAcceptFunction | None = None,
676
+ merge_criterion: str | MergeAcceptFunction | None = None,
675
677
  *,
676
678
  tolerance: float | None = None,
677
679
  threshold: float | None = None,
@@ -687,10 +689,10 @@ class BitBirch:
687
689
  "the global set_merge() function has *not* been used"
688
690
  )
689
691
  _tolerance = 0.05 if tolerance is None else tolerance
690
- if isinstance(criterion, MergeAcceptFunction):
691
- self._merge_accept_fn = criterion
692
- elif isinstance(criterion, str):
693
- self._merge_accept_fn = get_merge_accept_fn(criterion, _tolerance)
692
+ if isinstance(merge_criterion, MergeAcceptFunction):
693
+ self._merge_accept_fn = merge_criterion
694
+ elif isinstance(merge_criterion, str):
695
+ self._merge_accept_fn = get_merge_accept_fn(merge_criterion, _tolerance)
694
696
  if hasattr(self._merge_accept_fn, "tolerance"):
695
697
  self._merge_accept_fn.tolerance = _tolerance
696
698
  elif tolerance is not None:
@@ -1316,6 +1318,40 @@ class BitBirch:
1316
1318
  parts.append(f"tolerance={self.tolerance}")
1317
1319
  return f"{self.__class__.__name__}({', '.join(parts)})"
1318
1320
 
1321
+ def save(self, path: Path | str) -> None:
1322
+ r""":meta private:"""
1323
+ # TODO: BitBIRCH is highly recursive. pickling may crash python,
1324
+ # an alternative solution would be better
1325
+ msg = (
1326
+ "Saving large BitBIRCH trees may result in large memory peaks."
1327
+ " An alternative serialization method may be implemented in the future"
1328
+ )
1329
+ warnings.warn(msg)
1330
+ _old_limit = sys.getrecursionlimit()
1331
+ sys.setrecursionlimit(1_000_000_000)
1332
+ with open(path, mode="wb") as f:
1333
+ pickle.dump(self, f)
1334
+ sys.setrecursionlimit(_old_limit)
1335
+
1336
+ @classmethod
1337
+ def load(cls, path: Path | str) -> tpx.Self:
1338
+ r""":meta private:"""
1339
+ # TODO: BitBIRCH is highly recursive. pickling may crash python,
1340
+ # an alternative solution would be better
1341
+ msg = (
1342
+ "Loading large BitBIRCH trees may result in large memory peaks."
1343
+ " An alternative serialization method may be implemented in the future"
1344
+ )
1345
+ warnings.warn(msg)
1346
+ _old_limit = sys.getrecursionlimit()
1347
+ sys.setrecursionlimit(1_000_000_000)
1348
+ with open(path, mode="rb") as f:
1349
+ tree = pickle.load(f)
1350
+ sys.setrecursionlimit(_old_limit)
1351
+ if not isinstance(tree, cls):
1352
+ raise ValueError("Path does not contain a bitbirch object")
1353
+ return tree
1354
+
1319
1355
  def global_clustering(
1320
1356
  self,
1321
1357
  n_clusters: int,
bblean/cli.py CHANGED
@@ -1096,26 +1096,29 @@ def _run(
1096
1096
 
1097
1097
  timer.end_timing("total", console, indent=False)
1098
1098
  console.print_peak_mem(out_dir, indent=False)
1099
+ if save_tree:
1100
+ if variant != "lean":
1101
+ console.print("Can't save tree for non-lean variants", style="red")
1102
+ else:
1103
+ # TODO: Find alternative solution
1104
+ tree.save(out_dir / "bitbirch.pkl")
1099
1105
  if variant == "lean":
1100
- if save_tree:
1101
- # TODO: BitBIRCH is highly recursive. pickling may crash python,
1102
- # an alternative solution would be better
1103
- _old_limit = sys.getrecursionlimit()
1104
- sys.setrecursionlimit(100_000)
1105
- with open(out_dir / "bitbirch.pkl", mode="wb") as f:
1106
- pickle.dump(tree, f)
1107
- sys.setrecursionlimit(_old_limit)
1108
1106
  tree.delete_internal_nodes()
1109
- # Dump outputs (peak memory, timings, config, cluster ids)
1110
- if save_centroids:
1107
+ # Dump outputs (peak memory, timings, config, cluster ids)
1108
+ if save_centroids:
1109
+ if variant != "lean":
1110
+ console.print("Can't save centroids for non-lean variants", style="red")
1111
+ with open(out_dir / "clusters.pkl", mode="wb") as f:
1112
+ pickle.dump(tree.get_cluster_mol_ids(), f)
1113
+ else:
1111
1114
  output = tree.get_centroids_mol_ids()
1112
1115
  with open(out_dir / "clusters.pkl", mode="wb") as f:
1113
1116
  pickle.dump(output["mol_ids"], f)
1114
1117
  with open(out_dir / "cluster-centroids-packed.pkl", mode="wb") as f:
1115
1118
  pickle.dump(output["centroids"], f)
1116
- else:
1117
- with open(out_dir / "clusters.pkl", mode="wb") as f:
1118
- pickle.dump(tree.get_cluster_mol_ids(), f)
1119
+ else:
1120
+ with open(out_dir / "clusters.pkl", mode="wb") as f:
1121
+ pickle.dump(tree.get_cluster_mol_ids(), f)
1119
1122
 
1120
1123
  collect_system_specs_and_dump_config(ctx.params)
1121
1124
  timer.dump(out_dir / "timings.json")
@@ -1193,6 +1196,14 @@ def _multiround(
1193
1196
  bool,
1194
1197
  Option("--save-centroids/--no-save-centroids", rich_help_panel="Advanced"),
1195
1198
  ] = True,
1199
+ sort_fps: Annotated[
1200
+ bool,
1201
+ Option(
1202
+ "--sort-fps/--no-sort-fps",
1203
+ help="Sort the fingerprints by popcount before launching the initial round",
1204
+ rich_help_panel="Advanced",
1205
+ ),
1206
+ ] = False,
1196
1207
  mid_merge_criterion: Annotated[
1197
1208
  str,
1198
1209
  Option(
@@ -1386,6 +1397,7 @@ def _multiround(
1386
1397
  midsection_threshold_change=mid_threshold_change,
1387
1398
  tolerance=tolerance,
1388
1399
  # Advanced
1400
+ sort_fps=sort_fps,
1389
1401
  save_tree=save_tree,
1390
1402
  save_centroids=save_centroids,
1391
1403
  bin_size=bin_size,
@@ -1526,6 +1538,13 @@ def _fps_from_smiles(
1526
1538
  ),
1527
1539
  ),
1528
1540
  ] = False,
1541
+ tab_separated: Annotated[
1542
+ bool,
1543
+ Option(
1544
+ "--tab-sep/--no-tab-sep",
1545
+ help="Whether the smiles file has the format <smiles><tab><field><tab>...",
1546
+ ),
1547
+ ] = False,
1529
1548
  ) -> None:
1530
1549
  r"""Generate a `*.npy` fingerprints file from one or more `*.smi` smiles files
1531
1550
 
@@ -1631,7 +1650,9 @@ def _fps_from_smiles(
1631
1650
  with mp_context.Pool(processes=num_ps) as pool:
1632
1651
  pool.map(
1633
1652
  create_fp_file,
1634
- _iter_idxs_and_smiles_batches(smiles_paths, num_per_batch),
1653
+ _iter_idxs_and_smiles_batches(
1654
+ smiles_paths, num_per_batch, tab_separated
1655
+ ),
1635
1656
  )
1636
1657
  timer.end_timing("total", console, indent=False)
1637
1658
  stem = out_name.split(".")[0]
@@ -1671,7 +1692,9 @@ def _fps_from_smiles(
1671
1692
  with mp_context.Pool(processes=num_ps) as pool:
1672
1693
  pool.starmap(
1673
1694
  fps_array_filler,
1674
- _iter_ranges_and_smiles_batches(smiles_paths, num_per_batch),
1695
+ _iter_ranges_and_smiles_batches(
1696
+ smiles_paths, num_per_batch, tab_separated
1697
+ ),
1675
1698
  )
1676
1699
  fps = np.ndarray((smiles_num, out_dim), dtype=dtype, buffer=fps_shmem.buf)
1677
1700
  mask = np.ndarray((smiles_num,), dtype=np.bool, buffer=invalid_mask_shmem.buf)
@@ -1848,3 +1871,33 @@ def _merge_fps(
1848
1871
  return
1849
1872
  np.save(out_dir / stem, np.concatenate(arrays))
1850
1873
  console.print(f"Finished. Outputs written to {str(out_dir / stem)}.npy")
1874
+
1875
+
1876
+ @app.command("fps-sort", rich_help_panel="Fingerprints")
1877
+ def _sort_fps(
1878
+ in_file: Annotated[
1879
+ Path,
1880
+ Argument(help="`*.npy` file with packed fingerprints"),
1881
+ ],
1882
+ out_dir: Annotated[
1883
+ Path | None,
1884
+ Option("-o", "--out-dir", show_default=False),
1885
+ ] = None,
1886
+ seed: Annotated[
1887
+ int | None,
1888
+ Option("--seed", hidden=True, rich_help_panel="Debug"),
1889
+ ] = None,
1890
+ ) -> None:
1891
+ import numpy as np
1892
+ from bblean._py_similarity import _popcount
1893
+
1894
+ fps = np.load(in_file)
1895
+ stem = in_file.stem
1896
+ counts = _popcount(fps)
1897
+ sort_idxs = np.argsort(counts)
1898
+ fps = fps[sort_idxs]
1899
+ if out_dir is None:
1900
+ out_dir = Path.cwd()
1901
+ out_dir.mkdir(exist_ok=True)
1902
+ out_dir = out_dir.resolve()
1903
+ np.save(out_dir / f"sorted-{stem}.npy", fps)
@@ -300,6 +300,75 @@ double jt_isim_from_sum(const CArrayForcecast<uint64_t>& linear_sum,
300
300
  return a / ((a + (n_objects * sum_kq)) - sum_kqsq);
301
301
  }
302
302
 
303
+ // NOTE: This is only *slightly* faster for C++ than numpy, **only if the
304
+ // array is uint8_t** if the array is uint64 already, it is slower
305
+ template <typename T>
306
+ py::array_t<uint64_t> add_rows(const CArrayForcecast<T>& arr) {
307
+ if (arr.ndim() != 2) {
308
+ throw std::runtime_error("Input array must be 2-dimensional");
309
+ }
310
+ auto arr_ptr = arr.data();
311
+ auto out = py::array_t<uint64_t>(arr.shape(1));
312
+ auto out_ptr = out.mutable_data();
313
+ std::memset(out_ptr, 0, out.nbytes());
314
+ py::ssize_t n_samples = arr.shape(0);
315
+ py::ssize_t n_features = arr.shape(1);
316
+ // Check GCC / CLang vectorize this
317
+ for (py::ssize_t i = 0; i < n_samples; ++i) {
318
+ const uint8_t* arr_row_ptr = arr_ptr + i * n_features;
319
+ for (py::ssize_t j = 0; j < n_features; ++j) {
320
+ out_ptr[j] += static_cast<uint64_t>(arr_row_ptr[j]);
321
+ }
322
+ }
323
+ return out;
324
+ }
325
+ py::array_t<double> _nochecks_jt_compl_isim_unpacked_u8(
326
+ const py::array_t<uint8_t, py::array::c_style>& fps) {
327
+ py::ssize_t n_objects = fps.shape(0);
328
+ py::ssize_t n_features = fps.shape(1);
329
+ auto out = py::array_t<double>(n_objects);
330
+ auto out_ptr = out.mutable_data();
331
+
332
+ if (n_objects < 3) {
333
+ PyErr_WarnEx(PyExc_RuntimeWarning,
334
+ "Invalid num fps in compl_isim. Expected n_objects >= 3",
335
+ 1);
336
+ for (py::ssize_t i{0}; i != n_objects; ++i) {
337
+ out_ptr[i] = std::numeric_limits<double>::quiet_NaN();
338
+ }
339
+ return out;
340
+ }
341
+
342
+ auto linear_sum = add_rows<uint8_t>(fps);
343
+ auto ls_cptr = linear_sum.data();
344
+
345
+ py::array_t<uint64_t> shifted_linear_sum(n_features);
346
+ auto shifted_ls_ptr = shifted_linear_sum.mutable_data();
347
+
348
+ auto in_cptr = fps.data();
349
+ for (py::ssize_t i{0}; i != n_objects; ++i) {
350
+ for (py::ssize_t j{0}; j != n_features; ++j) {
351
+ shifted_ls_ptr[j] = ls_cptr[j] - in_cptr[i * n_features + j];
352
+ }
353
+ // For all compl isim N is n_objects - 1
354
+ out_ptr[i] = jt_isim_from_sum(shifted_linear_sum, n_objects - 1);
355
+ }
356
+ return out;
357
+ }
358
+
359
+ py::array_t<double> jt_compl_isim(
360
+ const CArrayForcecast<uint8_t>& fps, bool input_is_packed = true,
361
+ std::optional<py::ssize_t> n_features_opt = std::nullopt) {
362
+ if (fps.ndim() != 2) {
363
+ throw std::runtime_error("fps arr must be 2D");
364
+ }
365
+ if (input_is_packed) {
366
+ return _nochecks_jt_compl_isim_unpacked_u8(
367
+ _nochecks_unpack_fingerprints_2d(fps, n_features_opt));
368
+ }
369
+ return _nochecks_jt_compl_isim_unpacked_u8(fps);
370
+ }
371
+
303
372
  // Contraint: T must be uint64_t or uint8_t
304
373
  template <typename T>
305
374
  void _calc_arr_vec_jt(const py::array_t<uint8_t>& arr,
@@ -372,33 +441,10 @@ py::array_t<double> jt_sim_packed_precalc_cardinalities(
372
441
  }
373
442
 
374
443
  py::array_t<double> _jt_sim_arr_vec_packed(const py::array_t<uint8_t>& arr,
375
- const py::array_t<uint8_t>& vec) {
444
+ const py::array_t<uint8_t>& vec) {
376
445
  return jt_sim_packed_precalc_cardinalities(arr, vec, _popcount_2d(arr));
377
446
  }
378
447
 
379
- // NOTE: This is only *slightly* faster for C++ than numpy, **only if the
380
- // array is uint8_t** if the array is uint64 already, it is slower
381
- template <typename T>
382
- py::array_t<uint64_t> add_rows(const CArrayForcecast<T>& arr) {
383
- if (arr.ndim() != 2) {
384
- throw std::runtime_error("Input array must be 2-dimensional");
385
- }
386
- auto arr_ptr = arr.data();
387
- auto out = py::array_t<uint64_t>(arr.shape(1));
388
- auto out_ptr = out.mutable_data();
389
- std::memset(out_ptr, 0, out.nbytes());
390
- py::ssize_t n_samples = arr.shape(0);
391
- py::ssize_t n_features = arr.shape(1);
392
- // Check GCC / CLang vectorize this
393
- for (py::ssize_t i = 0; i < n_samples; ++i) {
394
- const uint8_t* arr_row_ptr = arr_ptr + i * n_features;
395
- for (py::ssize_t j = 0; j < n_features; ++j) {
396
- out_ptr[j] += static_cast<uint64_t>(arr_row_ptr[j]);
397
- }
398
- }
399
- return out;
400
- }
401
-
402
448
  double jt_isim_unpacked_u8(const CArrayForcecast<uint8_t>& arr) {
403
449
  return jt_isim_from_sum(add_rows<uint8_t>(arr), arr.shape(0));
404
450
  }
@@ -406,8 +452,9 @@ double jt_isim_unpacked_u8(const CArrayForcecast<uint8_t>& arr) {
406
452
  double jt_isim_packed_u8(
407
453
  const CArrayForcecast<uint8_t>& arr,
408
454
  std::optional<py::ssize_t> n_features_opt = std::nullopt) {
409
- return jt_isim_from_sum(add_rows<uint8_t>(unpack_fingerprints(arr, n_features_opt)),
410
- arr.shape(0));
455
+ return jt_isim_from_sum(
456
+ add_rows<uint8_t>(unpack_fingerprints(arr, n_features_opt)),
457
+ arr.shape(0));
411
458
  }
412
459
 
413
460
  py::tuple jt_most_dissimilar_packed(
@@ -510,6 +557,10 @@ PYBIND11_MODULE(_cpp_similarity, m) {
510
557
  m.def("jt_isim_unpacked_u8", &jt_isim_unpacked_u8,
511
558
  "iSIM Tanimoto calculation", py::arg("arr"));
512
559
 
560
+ m.def("jt_compl_isim", &jt_compl_isim, "Complementary iSIM tanimoto",
561
+ py::arg("fps"), py::arg("input_is_packed") = true,
562
+ py::arg("n_features") = std::nullopt);
563
+
513
564
  m.def("_jt_sim_arr_vec_packed", &_jt_sim_arr_vec_packed,
514
565
  "Tanimoto similarity between a matrix of packed fps and a single "
515
566
  "packed fp",
bblean/fingerprints.py CHANGED
@@ -115,7 +115,11 @@ def _get_generator(kind: str, n_features: int) -> tp.Any:
115
115
  return rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=n_features)
116
116
  elif kind == "ecfp6":
117
117
  return rdFingerprintGenerator.GetMorganGenerator(radius=3, fpSize=n_features)
118
- raise ValueError(f"Unknonw kind {kind}. Should be one of 'rdkit|ecfp4|ecfp6'")
118
+ elif kind == "topological":
119
+ return rdFingerprintGenerator.GetTopologicalTorsionGenerator(fpSize=n_features)
120
+ elif kind == "ap":
121
+ return rdFingerprintGenerator.GetAtomPairGenerator(fpSize=n_features)
122
+ raise ValueError(f"Unknown kind {kind}. Use 'rdkit|ecfp4|ecfp6|topological|ap'")
119
123
 
120
124
 
121
125
  def _get_sanitize_flags(sanitize: str) -> tp.Any:
bblean/multiround.py CHANGED
@@ -65,6 +65,7 @@ from bblean._config import DEFAULTS
65
65
  from bblean.utils import batched
66
66
  from bblean.bitbirch import BitBirch
67
67
  from bblean.fingerprints import _get_fps_file_num
68
+ from bblean._py_similarity import _popcount
68
69
 
69
70
  __all__ = ["run_multiround_bitbirch"]
70
71
 
@@ -157,6 +158,7 @@ class _InitialRound:
157
158
  max_fps: int | None = None,
158
159
  merge_criterion: str = DEFAULTS.merge_criterion,
159
160
  input_is_packed: bool = True,
161
+ sort_fps: bool = False,
160
162
  ) -> None:
161
163
  self.n_features = n_features
162
164
  self.refinement_before_midsection = refinement_before_midsection
@@ -171,6 +173,7 @@ class _InitialRound:
171
173
  self.refine_merge_criterion = refine_merge_criterion
172
174
  self.input_is_packed = input_is_packed
173
175
  self.refine_threshold_change = refine_threshold_change
176
+ self._sort_fps = sort_fps
174
177
 
175
178
  def __call__(self, file_info: tuple[str, Path, int, int]) -> None:
176
179
  file_label, fp_file, start_idx, end_idx = file_info
@@ -182,6 +185,14 @@ class _InitialRound:
182
185
  threshold=self.threshold,
183
186
  merge_criterion=self.merge_criterion,
184
187
  )
188
+ if self._sort_fps:
189
+ fp_input = np.load(fp_file)
190
+ counts = _popcount(fp_input)
191
+ sort_idxs = np.argsort(counts)
192
+ fp_input = fp_input[sort_idxs]
193
+ else:
194
+ fp_input = fp_file
195
+
185
196
  range_ = range(start_idx, end_idx)
186
197
  tree.fit(
187
198
  fp_file,
@@ -201,7 +212,7 @@ class _InitialRound:
201
212
  # Finish the first refinement step internally in this round
202
213
  tree.reset()
203
214
  tree.set_merge(
204
- self.refine_merge_criterion,
215
+ merge_criterion=self.refine_merge_criterion,
205
216
  tolerance=self.tolerance,
206
217
  threshold=self.threshold + self.refine_threshold_change,
207
218
  )
@@ -225,7 +236,7 @@ class _TreeMergingRound:
225
236
  round_idx: int,
226
237
  out_dir: Path | str,
227
238
  split_largest_cluster: bool,
228
- criterion: str,
239
+ merge_criterion: str,
229
240
  all_fp_paths: tp.Sequence[Path] = (),
230
241
  ) -> None:
231
242
  self.all_fp_paths = list(all_fp_paths)
@@ -235,14 +246,14 @@ class _TreeMergingRound:
235
246
  self.round_idx = round_idx
236
247
  self.out_dir = Path(out_dir)
237
248
  self.split_largest_cluster = split_largest_cluster
238
- self.criterion = criterion
249
+ self.merge_criterion = merge_criterion
239
250
 
240
251
  def __call__(self, batch_info: tuple[str, tp.Sequence[tuple[Path, Path]]]) -> None:
241
252
  batch_label, batch_path_pairs = batch_info
242
253
  tree = BitBirch(
243
254
  branching_factor=self.branching_factor,
244
255
  threshold=self.threshold,
245
- merge_criterion=self.criterion,
256
+ merge_criterion=self.merge_criterion,
246
257
  tolerance=self.tolerance,
247
258
  )
248
259
  # Rebuild a tree, inserting all BitFeatures from the corresponding batch
@@ -270,13 +281,20 @@ class _FinalTreeMergingRound(_TreeMergingRound):
270
281
  branching_factor: int,
271
282
  threshold: float,
272
283
  tolerance: float,
273
- criterion: str,
284
+ merge_criterion: str,
274
285
  out_dir: Path | str,
275
286
  save_tree: bool,
276
287
  save_centroids: bool,
277
288
  ) -> None:
278
289
  super().__init__(
279
- branching_factor, threshold, tolerance, -1, out_dir, False, criterion, ()
290
+ branching_factor,
291
+ threshold,
292
+ tolerance,
293
+ -1,
294
+ out_dir,
295
+ False,
296
+ merge_criterion,
297
+ (),
280
298
  )
281
299
  self.save_tree = save_tree
282
300
  self.save_centroids = save_centroids
@@ -286,7 +304,7 @@ class _FinalTreeMergingRound(_TreeMergingRound):
286
304
  tree = BitBirch(
287
305
  branching_factor=self.branching_factor,
288
306
  threshold=self.threshold,
289
- merge_criterion=self.criterion,
307
+ merge_criterion=self.merge_criterion,
290
308
  tolerance=self.tolerance,
291
309
  )
292
310
  # Rebuild a tree, inserting all BitFeatures from the corresponding batch
@@ -298,13 +316,8 @@ class _FinalTreeMergingRound(_TreeMergingRound):
298
316
 
299
317
  # Save clusters and exit
300
318
  if self.save_tree:
301
- # TODO: BitBIRCH is highly recursive. pickling may crash python,
302
- # an alternative solution would be better
303
- _old_limit = sys.getrecursionlimit()
304
- sys.setrecursionlimit(100_000)
305
- with open(self.out_dir / "bitbirch.pkl", mode="wb") as f:
306
- pickle.dump(tree, f)
307
- sys.setrecursionlimit(_old_limit)
319
+ # TODO: Find alternative solution
320
+ tree.save(self.out_dir / "bitbirch.pkl")
308
321
  tree.delete_internal_nodes()
309
322
  if self.save_centroids:
310
323
  output = tree.get_centroids_mol_ids()
@@ -358,6 +371,7 @@ def run_multiround_bitbirch(
358
371
  mp_context: tp.Any = None,
359
372
  save_tree: bool = False,
360
373
  save_centroids: bool = True,
374
+ sort_fps: bool = False,
361
375
  # Debug
362
376
  max_fps: int | None = None,
363
377
  verbose: bool = False,
@@ -404,6 +418,7 @@ def run_multiround_bitbirch(
404
418
  console.print(f"(Initial) Round {round_idx}: Cluster initial batch of fingerprints")
405
419
 
406
420
  initial_fn = _InitialRound(
421
+ sort_fps=sort_fps,
407
422
  n_features=n_features,
408
423
  refinement_before_midsection=refinement_before_midsection,
409
424
  max_fps=max_fps,
@@ -441,7 +456,7 @@ def run_multiround_bitbirch(
441
456
  round_idx=round_idx,
442
457
  all_fp_paths=input_files,
443
458
  split_largest_cluster=split_largest_after_each_midsection_round,
444
- criterion=midsection_merge_criterion,
459
+ merge_criterion=midsection_merge_criterion,
445
460
  threshold=threshold + midsection_threshold_change,
446
461
  **common_kwargs,
447
462
  )
@@ -469,7 +484,7 @@ def run_multiround_bitbirch(
469
484
  final_fn = _FinalTreeMergingRound(
470
485
  save_tree=save_tree,
471
486
  save_centroids=save_centroids,
472
- criterion=final_merge_criterion,
487
+ merge_criterion=final_merge_criterion,
473
488
  threshold=threshold + midsection_threshold_change,
474
489
  **common_kwargs,
475
490
  )
bblean/plotting.py CHANGED
@@ -399,13 +399,17 @@ def dump_mol_images(
399
399
  clusters: list[list[int]],
400
400
  cluster_idx: int = 0,
401
401
  batch_size: int = 30,
402
+ limit: int = -1,
402
403
  ) -> None:
403
404
  r"""Dump smiles associated with a specific cluster as ``*.png`` image files"""
404
405
  if isinstance(smiles, str):
405
406
  smiles = [smiles]
406
407
  smiles = np.asarray(smiles)
407
408
  idxs = clusters[cluster_idx]
409
+ num = 0
408
410
  for i, idx_seq in enumerate(batched(idxs, batch_size)):
411
+ if num + len(idx_seq) > limit:
412
+ idx_seq = idx_seq[: num + len(idx_seq) - limit]
409
413
  mols = []
410
414
  for smi in smiles[list(idx_seq)]:
411
415
  mol = Chem.MolFromSmiles(smi)
@@ -415,6 +419,9 @@ def dump_mol_images(
415
419
  img = Draw.MolsToGridImage(mols, molsPerRow=5)
416
420
  with open(f"cluster_{cluster_idx}_{i}.png", "wb") as f:
417
421
  f.write(img.data)
422
+ num += len(idx_seq)
423
+ if num >= limit:
424
+ break
418
425
 
419
426
 
420
427
  # For internal use, dispatches a visualization workflow and optionally saves
bblean/similarity.py CHANGED
@@ -34,12 +34,8 @@ __all__ = [
34
34
  "jt_sim_matrix_packed",
35
35
  ]
36
36
 
37
- from bblean._py_similarity import (
38
- centroid_from_sum,
39
- centroid,
40
- jt_compl_isim,
41
- jt_isim_medoid,
42
- )
37
+ from bblean._py_similarity import centroid_from_sum, centroid
38
+ from bblean.fingerprints import pack_fingerprints, unpack_fingerprints
43
39
 
44
40
  # jt_isim_packed and jt_isim_unpacked are not exposed, only used within functions for
45
41
  # speed
@@ -49,6 +45,7 @@ if os.getenv("BITBIRCH_NO_EXTENSIONS"):
49
45
  jt_isim_from_sum,
50
46
  jt_isim_unpacked,
51
47
  jt_isim_packed,
48
+ jt_compl_isim,
52
49
  _jt_sim_arr_vec_packed,
53
50
  jt_most_dissimilar_packed,
54
51
  )
@@ -56,11 +53,13 @@ else:
56
53
  try:
57
54
  from bblean._cpp_similarity import ( # type: ignore
58
55
  jt_isim_from_sum,
59
- _jt_sim_arr_vec_packed,
60
56
  jt_isim_unpacked_u8,
61
57
  jt_isim_packed_u8,
58
+ jt_compl_isim, # TODO: Does it need wrappers for non-uint8?
59
+ _jt_sim_arr_vec_packed,
62
60
  jt_most_dissimilar_packed,
63
- unpack_fingerprints,
61
+ # Needed for wrappers
62
+ unpack_fingerprints as _unpack_fingerprints,
64
63
  )
65
64
 
66
65
  # Wrap these two since doing
@@ -80,7 +79,7 @@ else:
80
79
  if arr.dtype == np.uint64:
81
80
  return jt_isim_from_sum(
82
81
  np.sum(
83
- unpack_fingerprints(arr, n_features), # type: ignore
82
+ _unpack_fingerprints(arr, n_features), # type: ignore
84
83
  axis=0,
85
84
  dtype=np.uint64,
86
85
  ),
@@ -93,6 +92,7 @@ else:
93
92
  jt_isim_from_sum,
94
93
  jt_isim_unpacked,
95
94
  jt_isim_packed,
95
+ jt_compl_isim,
96
96
  _jt_sim_arr_vec_packed,
97
97
  jt_most_dissimilar_packed,
98
98
  )
@@ -103,6 +103,35 @@ else:
103
103
  )
104
104
 
105
105
 
106
+ def jt_isim_medoid(
107
+ fps: NDArray[np.uint8],
108
+ input_is_packed: bool = True,
109
+ n_features: int | None = None,
110
+ pack: bool = True,
111
+ ) -> tuple[int, NDArray[np.uint8]]:
112
+ r"""Calculate the (Tanimoto) medoid of a set of fingerprints, using iSIM
113
+
114
+ Returns both the index of the medoid in the input array and the medoid itself
115
+
116
+ .. note::
117
+ Returns the first (or only) fingerprint for array of size 2 and 1 respectively.
118
+ Raises ValueError for arrays of size 0
119
+
120
+ """
121
+ if not fps.size:
122
+ raise ValueError("Size of fingerprints set must be > 0")
123
+ if input_is_packed:
124
+ fps = unpack_fingerprints(fps, n_features)
125
+ if len(fps) < 3:
126
+ idx = 0 # Medoid undefined for sets of 3 or more fingerprints
127
+ else:
128
+ idx = np.argmin(jt_compl_isim(fps, input_is_packed, n_features)).item()
129
+ m = fps[idx]
130
+ if pack:
131
+ return idx, pack_fingerprints(m)
132
+ return idx, m
133
+
134
+
106
135
  def jt_isim(
107
136
  fps: NDArray[np.integer],
108
137
  input_is_packed: bool = True,
@@ -149,7 +178,11 @@ def jt_isim_diameter(
149
178
  r"""Calculate the Tanimoto diameter of a set of fingerprints"""
150
179
  return jt_isim_diameter_from_sum(
151
180
  np.sum(
152
- unpack_fingerprints(arr, n_features) if input_is_packed else arr,
181
+ (
182
+ unpack_fingerprints(arr.astype(np.uint8, copy=False), n_features)
183
+ if input_is_packed
184
+ else arr
185
+ ),
153
186
  axis=0,
154
187
  dtype=np.uint64,
155
188
  ), # type: ignore
@@ -165,7 +198,11 @@ def jt_isim_radius(
165
198
  r"""Calculate the Tanimoto radius of a set of fingerprints"""
166
199
  return jt_isim_radius_from_sum(
167
200
  np.sum(
168
- unpack_fingerprints(arr, n_features) if input_is_packed else arr,
201
+ (
202
+ unpack_fingerprints(arr.astype(np.uint8, copy=False), n_features)
203
+ if input_is_packed
204
+ else arr
205
+ ),
169
206
  axis=0,
170
207
  dtype=np.uint64,
171
208
  ), # type: ignore
@@ -181,7 +218,11 @@ def jt_isim_radius_compl(
181
218
  r"""Calculate the complement of the Tanimoto radius of a set of fingerprints"""
182
219
  return jt_isim_radius_compl_from_sum(
183
220
  np.sum(
184
- unpack_fingerprints(arr, n_features) if input_is_packed else arr,
221
+ (
222
+ unpack_fingerprints(arr.astype(np.uint8, copy=False), n_features)
223
+ if input_is_packed
224
+ else arr
225
+ ),
185
226
  axis=0,
186
227
  dtype=np.uint64,
187
228
  ), # type: ignore
@@ -252,14 +293,28 @@ def estimate_jt_std(
252
293
  n_samples: int | None = None,
253
294
  input_is_packed: bool = True,
254
295
  n_features: int | None = None,
296
+ min_samples: int = 1_000_000,
255
297
  ) -> float:
256
- r"""Estimate std of tanimoto sim using a deterministic sample"""
298
+ r"""Estimate the std of all pairwise Tanimoto.
299
+
300
+ Returns
301
+ -------
302
+ std : float
303
+ The standard deviation of all pairwise Tanimoto among the sampled fingerprints.
304
+ """
257
305
  num_fps = len(fps)
306
+ if num_fps > min_samples:
307
+ np.random.seed(42)
308
+ random_choices = np.random.choice(num_fps, size=min_samples, replace=False)
309
+ fps = fps[random_choices]
310
+ num_fps = len(fps)
258
311
  if n_samples is None:
259
- n_samples = max(num_fps // 1000, 50)
312
+ # Heuristic: use at least 50 samples, or 1 per 10,000 fingerprints,
313
+ # to balance statistical representativeness and computational efficiency
314
+ n_samples = max(num_fps // 10_000, 50)
260
315
  sample_idxs = jt_stratified_sampling(fps, n_samples, input_is_packed, n_features)
261
316
 
262
- # Work with sample from now on
317
+ # Work with only the sampled fingerprints
263
318
  fps = fps[sample_idxs]
264
319
  num_fps = len(fps)
265
320
  pairs = np.empty(num_fps * (num_fps - 1) // 2, dtype=np.float64)
bblean/sklearn.py CHANGED
@@ -131,8 +131,7 @@ class BitBirch(
131
131
  .astype(np.uint8, copy=False)
132
132
  .view(np.bool)
133
133
  )
134
- # TODO: Even when both inputs are bool, this function warns for some reason
135
- # I believe this may be a sklearn bug
134
+ # TODO: Due to a sklearn bug this performs unnecessary casts
136
135
  centers = self.subcluster_centers_.astype(np.uint8, copy=False).view(np.bool)
137
136
  argmin = pairwise_distances_argmin(X, centers, metric="jaccard")
138
137
  return self.subcluster_labels_[argmin]
bblean/smiles.py CHANGED
@@ -32,23 +32,34 @@ def calc_num_smiles(smiles_paths: SmilesPaths) -> int:
32
32
 
33
33
 
34
34
  def iter_smiles_from_paths(
35
- smiles_paths: SmilesPaths,
35
+ smiles_paths: SmilesPaths, tab_separated: bool = False
36
36
  ) -> tp.Iterator[str]:
37
- r"""Iterate over smiles in a sequence of smiles paths"""
37
+ r"""Iterate over smiles in a sequence of smiles paths
38
+
39
+ If tab_separated = True the file is assumed to have the format
40
+ <smiles><tab><field><tab><field>..., and only the smiles is returned
41
+ """
38
42
  if isinstance(smiles_paths, (Path, str)):
39
43
  smiles_paths = [smiles_paths]
40
44
  for smi_path in smiles_paths:
41
45
  with open(smi_path, mode="rt", encoding="utf-8") as f:
42
46
  for smi in f:
47
+ smi = smi if not tab_separated else smi.split("\t")[0]
48
+ # Skip headers
49
+ if smi.lower().strip() == "smiles":
50
+ continue
43
51
  yield smi
44
52
 
45
53
 
46
54
  def _iter_ranges_and_smiles_batches(
47
55
  smiles_paths: SmilesPaths,
48
56
  num_per_batch: int,
57
+ tab_separated: bool = False,
49
58
  ) -> tp.Iterable[tuple[tuple[int, int], tuple[str, ...]]]:
50
59
  start_idx = 0
51
- for batch in batched(iter_smiles_from_paths(smiles_paths), num_per_batch):
60
+ for batch in batched(
61
+ iter_smiles_from_paths(smiles_paths, tab_separated), num_per_batch
62
+ ):
52
63
  size = len(batch)
53
64
  end_idx = start_idx + size
54
65
  yield (start_idx, end_idx), batch
@@ -56,6 +67,10 @@ def _iter_ranges_and_smiles_batches(
56
67
 
57
68
 
58
69
  def _iter_idxs_and_smiles_batches(
59
- smiles_paths: SmilesPaths, num_per_batch: int
70
+ smiles_paths: SmilesPaths,
71
+ num_per_batch: int,
72
+ tab_separated: bool = False,
60
73
  ) -> tp.Iterable[tuple[int, tuple[str, ...]]]:
61
- yield from enumerate(batched(iter_smiles_from_paths(smiles_paths), num_per_batch))
74
+ yield from enumerate(
75
+ batched(iter_smiles_from_paths(smiles_paths, tab_separated), num_per_batch)
76
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bblean
3
- Version: 0.6.0b2
3
+ Version: 0.7.2b0
4
4
  Summary: BitBirch-Lean Python package
5
5
  Author: The Miranda-Quintana Lab and other BitBirch developers
6
6
  Author-email: Ramon Alain Miranda Quintana <quintana@chem.ufl.edu>, Krisztina Zsigmond <kzsigmond@ufl.edu>, Ignacio Pickering <ipickering@ufl.edu>, Kenneth Lopez Perez <klopezperez@chem.ufl.edu>, Miroslav Lzicar <miroslav.lzicar@deepmedchem.com>
@@ -90,6 +90,7 @@ macOS via pip, which automatically includes C++ extensions:
90
90
 
91
91
  ```bash
92
92
  pip install bblean
93
+ # Alternatively you can use 'uv pip install'
93
94
  bb --help
94
95
  ```
95
96
 
@@ -235,7 +236,7 @@ tree = bblean.BitBirch(branching_factor=50, threshold=0.65, merge_criterion="dia
235
236
  tree.fit(fps)
236
237
 
237
238
  # Refine the tree (if needed)
238
- tree.set_merge(merge_criterion="tolerance-diameter", tolerance=0.0)
239
+ tree.set_merge("tolerance-diameter", tolerance=0.0)
239
240
  tree.refine_inplace(fps)
240
241
 
241
242
  # Visualize the results
@@ -0,0 +1,31 @@
1
+ bblean/__init__.py,sha256=9cudBHEt0H5p0jKEvgrhLZIHPSzwNAx0uJRp-_iM32I,686
2
+ bblean/_config.py,sha256=WaONZilOWCLFdZulqWLKRqNM-ZLhY0YCXfwk-84FYmQ,1813
3
+ bblean/_console.py,sha256=Mk1hi1NdPw2HDmjWj1LLbCuV3vCxL5l6u2gXaEeOFBM,8021
4
+ bblean/_cpp_similarity.cp312-win_amd64.pyd,sha256=1tgp4zCFzFZ2F3a99wGfnaXC5dDWZtfwZoujOgm8d9I,182272
5
+ bblean/_memory.py,sha256=eycXzXV_O_VEyIKpAv3QpbxtpB5WkBLChzm_e2Dqaw0,6892
6
+ bblean/_merges.py,sha256=xwFMJUPJ9VMujf2nSROx0NhsPoQ_R84KIxBF81x2hks,6432
7
+ bblean/_py_similarity.py,sha256=VYWu7gVCEDjNaRLgxiCxCGjCfmTity86UPC0dfT83Ok,9633
8
+ bblean/_timer.py,sha256=D1-_tTQFJqIQgzl4HSE__-P3Scw72EIVlNDaChJT8Qs,1402
9
+ bblean/_version.py,sha256=lDvwo76PevPSZqRGkRtOEgFcX8LHv1s_-G_abs3gvZk,754
10
+ bblean/analysis.py,sha256=apD5OgSoNGbIuBLSJFFzlUkVjZHBtb3fVEeEUJGbyqc,8118
11
+ bblean/bitbirch.py,sha256=OjK0IhdXT83dMdtsEcpQQLbAq6yEBb7z-7QojAkgelA,60279
12
+ bblean/cli.py,sha256=3thYaVWDfiMP8Crs7ShJnNa5E2MCbFoPeK5tVwQVY1w,64043
13
+ bblean/fingerprints.py,sha256=IvIzs2ETnQlUW8nNe_sk3GIgrhGBhrhBBAfubtRkS6A,15542
14
+ bblean/metrics.py,sha256=4KB-PIQJtFMsNg7lG2uM1HEId_eR5vhqcdLpCVLuI5Y,7280
15
+ bblean/multiround.py,sha256=rJMdwUJ6p5hBeNDWuoJMBMzo2doCTcxOjOhC1ZfcS7U,20278
16
+ bblean/plotting.py,sha256=OfVVdmvxaVVeyT7iAIL5QinYZwx5Ivzf8OcsAuY-qp4,15886
17
+ bblean/similarity.py,sha256=O2OTW5Dw64go177jwzF5skvDSJEzDS7UImyIQ2nShig,12192
18
+ bblean/sklearn.py,sha256=KK7rbF3gENjlv5-9uOvH-Q0LEW1RUY__xClcnLznuE0,7450
19
+ bblean/smiles.py,sha256=zyLWXzTLebeFmltDMuJcneJqaLLgGOYw0118889nn7A,2356
20
+ bblean/utils.py,sha256=K0ttSPf54nxrKD1TwbLFuwDIRlAD0jdr6KnuTqXs-HQ,3836
21
+ bblean/_legacy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ bblean/_legacy/bb_int64.py,sha256=Otqxu8NBLrfOMpJoMrLgWtDP_9Hn4joQXZVkU1hjges,45774
23
+ bblean/_legacy/bb_uint8.py,sha256=8kbeVAq7MxiR8hS_6lKhSDhVWc6acjLmLzNFCR466iA,41573
24
+ bblean/csrc/README.md,sha256=qOPPK6sTqkYgnlPWtcNu9P3PwuLH8cCNJ1FwJeewsrk,59
25
+ bblean/csrc/similarity.cpp,sha256=q6oMg9Vd0REPmqze8xToTmeXZiEuHTmOfL6QsTRFkDE,23122
26
+ bblean-0.7.2b0.dist-info/licenses/LICENSE,sha256=Dq9t2XHr5wSrykVuVo8etKsAS35ENnDobU1h7t3H_-k,2598
27
+ bblean-0.7.2b0.dist-info/METADATA,sha256=-aZ6OJ4RYBlH3mb6w4c-wOaoFn-4T5u4PmqdeJyjjM8,13053
28
+ bblean-0.7.2b0.dist-info/WHEEL,sha256=8UP9x9puWI0P1V_d7K2oMTBqfeLNm21CTzZ_Ptr0NXU,101
29
+ bblean-0.7.2b0.dist-info/entry_points.txt,sha256=a0jb2L5JFKioMD6CqbvJiI2unaArGzi-AMZsyY-uyGg,38
30
+ bblean-0.7.2b0.dist-info/top_level.txt,sha256=ybxTonvTC9zR25yR5B27aEDLl6CiwID093ZyS_--Cq4,7
31
+ bblean-0.7.2b0.dist-info/RECORD,,
@@ -1,31 +0,0 @@
1
- bblean/__init__.py,sha256=9cudBHEt0H5p0jKEvgrhLZIHPSzwNAx0uJRp-_iM32I,686
2
- bblean/_config.py,sha256=WaONZilOWCLFdZulqWLKRqNM-ZLhY0YCXfwk-84FYmQ,1813
3
- bblean/_console.py,sha256=Mk1hi1NdPw2HDmjWj1LLbCuV3vCxL5l6u2gXaEeOFBM,8021
4
- bblean/_cpp_similarity.cp312-win_amd64.pyd,sha256=GncQ3lReLTUxYnx66NaGgDA3pjJja2FUiju2NG6hr2g,178688
5
- bblean/_memory.py,sha256=eycXzXV_O_VEyIKpAv3QpbxtpB5WkBLChzm_e2Dqaw0,6892
6
- bblean/_merges.py,sha256=xwFMJUPJ9VMujf2nSROx0NhsPoQ_R84KIxBF81x2hks,6432
7
- bblean/_py_similarity.py,sha256=d1kbEc8lc0MgYsmW6nkFI-tV1Plo12e3bml32_8dkoU,9859
8
- bblean/_timer.py,sha256=D1-_tTQFJqIQgzl4HSE__-P3Scw72EIVlNDaChJT8Qs,1402
9
- bblean/_version.py,sha256=Z6NaqO7AvzfKUsoqEpOi7eBkzR_-GLsbF8CpiRFwVJo,746
10
- bblean/analysis.py,sha256=apD5OgSoNGbIuBLSJFFzlUkVjZHBtb3fVEeEUJGbyqc,8118
11
- bblean/bitbirch.py,sha256=fRS9dIHu3wx7rJztPYUyEINuv5KsridRpqLYh_DlmT0,58792
12
- bblean/cli.py,sha256=FwO-jWO9Wt-1CGP8mL_PmbEyJyHPnQxo9BaGT2zLVjE,62506
13
- bblean/fingerprints.py,sha256=cArsOt-946xjvoKM8qTXc0wfKA39ZFhzIht6MW9x-kQ,15315
14
- bblean/metrics.py,sha256=4KB-PIQJtFMsNg7lG2uM1HEId_eR5vhqcdLpCVLuI5Y,7280
15
- bblean/multiround.py,sha256=_-pr5LG_GLSBNZ60uLcy8XZ-qo7lr0Y048Kp041_ug8,19980
16
- bblean/plotting.py,sha256=1ryJbWJBVY7gkoX_JDyhY4k62spjumz1_V8IhpObzbY,15676
17
- bblean/similarity.py,sha256=nCrUH0t6k5GMNNWf6gD4r7ZszQEPR3b2qyk5Im7Naa8,10203
18
- bblean/sklearn.py,sha256=USE5qfGrWLZokz4Ati_RsRIGn1mOwHSCAw82VXD7qhA,7512
19
- bblean/smiles.py,sha256=fBoU41eLGmxq_uPkX-yWM9SBoPqb7_sWXmy0eo0MtNs,1855
20
- bblean/utils.py,sha256=K0ttSPf54nxrKD1TwbLFuwDIRlAD0jdr6KnuTqXs-HQ,3836
21
- bblean/_legacy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- bblean/_legacy/bb_int64.py,sha256=TJ5vd71iVLHZW1gEit_tAd4nwpJ8PMoWys84e9E8RIk,45770
23
- bblean/_legacy/bb_uint8.py,sha256=8kbeVAq7MxiR8hS_6lKhSDhVWc6acjLmLzNFCR466iA,41573
24
- bblean/csrc/README.md,sha256=qOPPK6sTqkYgnlPWtcNu9P3PwuLH8cCNJ1FwJeewsrk,59
25
- bblean/csrc/similarity.cpp,sha256=7zS76zHywEOnxPqK0kFPxrgsRjTKAD_YrSCYMgb1DJ4,21231
26
- bblean-0.6.0b2.dist-info/licenses/LICENSE,sha256=Dq9t2XHr5wSrykVuVo8etKsAS35ENnDobU1h7t3H_-k,2598
27
- bblean-0.6.0b2.dist-info/METADATA,sha256=9TcsxKr-RZCJGp6IFRXERdSsPbkO9GuYDYfx31kKg5w,13023
28
- bblean-0.6.0b2.dist-info/WHEEL,sha256=8UP9x9puWI0P1V_d7K2oMTBqfeLNm21CTzZ_Ptr0NXU,101
29
- bblean-0.6.0b2.dist-info/entry_points.txt,sha256=a0jb2L5JFKioMD6CqbvJiI2unaArGzi-AMZsyY-uyGg,38
30
- bblean-0.6.0b2.dist-info/top_level.txt,sha256=ybxTonvTC9zR25yR5B27aEDLl6CiwID093ZyS_--Cq4,7
31
- bblean-0.6.0b2.dist-info/RECORD,,