chemap 0.3.3__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {chemap-0.3.3 → chemap-0.3.4}/PKG-INFO +1 -1
  2. {chemap-0.3.3 → chemap-0.3.4}/chemap/plotting/chem_space_umap.py +52 -16
  3. {chemap-0.3.3 → chemap-0.3.4}/chemap/plotting/cleveland.py +6 -4
  4. {chemap-0.3.3 → chemap-0.3.4}/chemap/plotting/scatter_plots.py +44 -14
  5. {chemap-0.3.3 → chemap-0.3.4}/pyproject.toml +1 -1
  6. {chemap-0.3.3 → chemap-0.3.4}/LICENSE +0 -0
  7. {chemap-0.3.3 → chemap-0.3.4}/README.md +0 -0
  8. {chemap-0.3.3 → chemap-0.3.4}/chemap/__init__.py +0 -0
  9. {chemap-0.3.3 → chemap-0.3.4}/chemap/approx_nn.py +0 -0
  10. {chemap-0.3.3 → chemap-0.3.4}/chemap/benchmarking/__init__.py +0 -0
  11. {chemap-0.3.3 → chemap-0.3.4}/chemap/benchmarking/fingerprint_duplicates.py +0 -0
  12. {chemap-0.3.3 → chemap-0.3.4}/chemap/benchmarking/utils.py +0 -0
  13. {chemap-0.3.3 → chemap-0.3.4}/chemap/data_loader.py +0 -0
  14. {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprint_computation.py +0 -0
  15. {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprint_conversions.py +0 -0
  16. {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprint_statistics.py +0 -0
  17. {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprints/__init__.py +0 -0
  18. {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprints/chemap_base_fingerprint.py +0 -0
  19. {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprints/element_count_fp.py +0 -0
  20. {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprints/lingo.py +0 -0
  21. {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprints/map4.py +0 -0
  22. {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprints/mhfp.py +0 -0
  23. {chemap-0.3.3 → chemap-0.3.4}/chemap/mbp.py +0 -0
  24. {chemap-0.3.3 → chemap-0.3.4}/chemap/metrics.py +0 -0
  25. {chemap-0.3.3 → chemap-0.3.4}/chemap/plotting/__init__.py +0 -0
  26. {chemap-0.3.3 → chemap-0.3.4}/chemap/plotting/benchmark_duplicates.py +0 -0
  27. {chemap-0.3.3 → chemap-0.3.4}/chemap/plotting/colormap_handling.py +0 -0
  28. {chemap-0.3.3 → chemap-0.3.4}/chemap/plotting/colormaps.py +0 -0
  29. {chemap-0.3.3 → chemap-0.3.4}/chemap/types.py +0 -0
  30. {chemap-0.3.3 → chemap-0.3.4}/chemap/utils.py +0 -0
  31. {chemap-0.3.3 → chemap-0.3.4}/chemap/visualizations.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chemap
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Library for computing molecular fingerprint based similarities as well as dimensionality reduction based chemical space visualizations.
5
5
  License-Expression: MIT
6
6
  License-File: LICENSE
@@ -3,7 +3,11 @@ from typing import Any, Optional
3
3
  import numpy as np
4
4
  import pandas as pd
5
5
  from chemap import FingerprintConfig, compute_fingerprints
6
- from chemap.fingerprint_conversions import fingerprints_to_csr
6
+ from chemap.fingerprint_conversions import (
7
+ fingerprints_to_csr,
8
+ fingerprints_to_tfidf,
9
+ idf_normalized,
10
+ )
7
11
  from chemap.metrics import (
8
12
  tanimoto_distance_dense,
9
13
  tanimoto_distance_sparse,
@@ -53,7 +57,7 @@ def create_chem_space_umap(
53
57
  fpgen: Optional[Any] = None,
54
58
  fingerprint_config: Optional[FingerprintConfig] = None,
55
59
  show_progress: bool = True,
56
- log_count: bool = False,
60
+ scaling: str = None,
57
61
  # UMAP (CPU / umap-learn)
58
62
  n_neighbors: int = 100,
59
63
  min_dist: float = 0.25,
@@ -80,9 +84,9 @@ def create_chem_space_umap(
80
84
  FingerprintConfig(count=True, folded=False, invalid_policy="raise")
81
85
  show_progress:
82
86
  Forwarded to compute_fingerprints.
83
- log_count:
84
- If True, apply np.log1p to counts (works for sparse CSR and dense arrays).
85
- (For binary fingerprints this is harmless)
87
+ scaling:
88
+ Define scaling for count fingerprints. Default is None, which means no scaling.
89
+ Can be set to "log" for log1p scaling, or to "tfidf" for TF-IDF scaling of bits.
86
90
  n_neighbors, min_dist, umap_random_state:
87
91
  Standard UMAP parameters.
88
92
  n_jobs:
@@ -137,14 +141,20 @@ def create_chem_space_umap(
137
141
 
138
142
  if not fingerprint_config.folded:
139
143
  # Convert to CSR matrix
140
- fps_csr = fingerprints_to_csr(fingerprints).X
144
+ if scaling == "tfidf":
145
+ fps_csr = fingerprints_to_tfidf(fingerprints).X
146
+ else:
147
+ fps_csr = fingerprints_to_csr(fingerprints).X
141
148
 
142
- if log_count:
143
- # Works well for count fingerprints ( for binary it's essentially unchanged).
144
- fps_csr = _log1p_csr_inplace(fps_csr)
149
+ if scaling == "log":
150
+ fps_csr = _log1p_csr_inplace(fps_csr)
145
151
 
146
152
  coords = reducer.fit_transform(fps_csr)
147
153
  else:
154
+ if scaling == "log":
155
+ fingerprints = np.log1p(fingerprints)
156
+ elif scaling == "tfidf":
157
+ fingerprints *= idf_normalized((fingerprints > 0).sum(axis=0), fingerprints.shape[0])
148
158
  coords = reducer.fit_transform(fingerprints)
149
159
 
150
160
  df[x_col] = coords[:, 0]
@@ -163,13 +173,39 @@ def create_chem_space_umap_gpu(
163
173
  fpgen: Optional[Any] = None,
164
174
  fingerprint_config: Optional[FingerprintConfig] = None,
165
175
  show_progress: bool = True,
166
- log_count: bool = False,
176
+ scaling: str = None,
167
177
  # UMAP (GPU / cuML)
168
178
  n_neighbors: int = 100,
169
179
  min_dist: float = 0.25,
170
180
  ) -> pd.DataFrame:
171
181
  """Compute fingerprints and create 2D UMAP coordinates using cuML (GPU).
172
182
 
183
+ Parameters
184
+ ----------
185
+ data:
186
+ Input dataframe containing a SMILES column.
187
+ col_smiles:
188
+ Name of the SMILES column.
189
+ inplace:
190
+ If True, write x/y columns into `data` and return it. Else returns a copy.
191
+ x_col, y_col:
192
+ Output coordinate column names.
193
+ fpgen:
194
+ RDKit fingerprint generator. Defaults to Morgan radius=9, fpSize=4096.
195
+ fingerprint_config:
196
+ FingerprintConfig for chemap.compute_fingerprints. Defaults to:
197
+ FingerprintConfig(count=True, folded=False, invalid_policy="raise")
198
+ show_progress:
199
+ Forwarded to compute_fingerprints.
200
+ scaling:
201
+ Define scaling for count fingerprints. Default is None, which means no scaling.
202
+ Can be set to "log" for log1p scaling, or to "tfidf" for TF-IDF scaling of bits.
203
+ n_neighbors, min_dist, umap_random_state:
204
+ Standard UMAP parameters.
205
+ n_jobs:
206
+ Passed to umap-learn UMAP for parallelism. Ignores random_state when n_jobs != 1.
207
+ Default -1 uses all CPUs.
208
+
173
209
  Notes
174
210
  -----
175
211
  - cuML UMAP here is fixed to metric="cosine"
@@ -222,12 +258,12 @@ def create_chem_space_umap_gpu(
222
258
  )
223
259
 
224
260
  # Reduce memory footprint (works well for count fingerprints)
225
- if not log_count:
226
- # stays integer-like
227
- fps = fingerprints.astype(np.int8, copy=False)
261
+ if scaling == "log":
262
+ fingerprints = np.log1p(fingerprints).astype(np.float32, copy=False)
263
+ elif scaling == "tfidf":
264
+ fingerprints *= idf_normalized((fingerprints > 0).sum(axis=0), fingerprints.shape[0])
228
265
  else:
229
- # log1p returns float
230
- fps = np.log1p(fingerprints).astype(np.float32, copy=False)
266
+ fingerprints = fingerprints.astype(np.int8, copy=False)
231
267
 
232
268
  umap_model = cuUMAP(
233
269
  n_neighbors=int(n_neighbors),
@@ -238,7 +274,7 @@ def create_chem_space_umap_gpu(
238
274
  n_components=2,
239
275
  )
240
276
 
241
- coords = umap_model.fit_transform(fps)
277
+ coords = umap_model.fit_transform(fingerprints)
242
278
 
243
279
  # cuML may return cupy/cudf-backed arrays; np.asarray makes it safe for pandas columns.
244
280
  coords_np = np.asarray(coords)
@@ -60,6 +60,8 @@ def cleveland_dotplot(
60
60
  show_legends: bool = True,
61
61
  color_legend_title: str = "Setting",
62
62
  marker_legend_title: str = "Variant",
63
+ color_legend_position: str = "lower left",
64
+ marker_legend_position: str = "lower right",
63
65
 
64
66
  style: ClevelandStyle = ClevelandStyle(),
65
67
  ) -> Tuple[Figure, Axes]:
@@ -260,16 +262,16 @@ def cleveland_dotplot(
260
262
 
261
263
  # Place legends similarly to your original if both exist
262
264
  if marker_handles and color_handles:
263
- leg1 = ax.legend(handles=marker_handles, loc="lower right",
265
+ leg1 = ax.legend(handles=marker_handles, loc=marker_legend_position,
264
266
  title=marker_legend_title, frameon=True)
265
267
  ax.add_artist(leg1)
266
- ax.legend(handles=color_handles, loc="lower left",
268
+ ax.legend(handles=color_handles, loc=color_legend_position,
267
269
  title=color_legend_title, frameon=True)
268
270
  elif marker_handles:
269
- ax.legend(handles=marker_handles, loc="lower right",
271
+ ax.legend(handles=marker_handles, loc=marker_legend_position,
270
272
  title=marker_legend_title, frameon=True)
271
273
  elif color_handles:
272
- ax.legend(handles=color_handles, loc="lower left",
274
+ ax.legend(handles=color_handles, loc=color_legend_position,
273
275
  title=color_legend_title, frameon=True)
274
276
 
275
277
  return fig, ax
@@ -36,6 +36,9 @@ class ScatterStyle:
36
36
  alpha: float = 0.25
37
37
  linewidths: float = 0.0
38
38
 
39
+ display_legend: bool = True
40
+ legend_outside: bool = False
41
+
39
42
  legend_title: Optional[str] = None
40
43
  legend_loc: str = "lower left"
41
44
  legend_frameon: bool = False
@@ -132,21 +135,40 @@ def scatter_plot_base(
132
135
  ax.set_xlabel("")
133
136
  ax.set_ylabel("")
134
137
 
135
- legend_title = style.legend_title if style.legend_title is not None else label_col
136
- handles = _build_legend_handles(
137
- legend_labels,
138
- palette,
139
- markersize=style.legend_markersize,
140
- alpha=style.legend_alpha,
141
- )
138
+ # ---- legend (optional + outside option) ----
139
+ if style.display_legend:
140
+ legend_title = style.legend_title if style.legend_title is not None else label_col
141
+ handles = _build_legend_handles(
142
+ legend_labels,
143
+ palette,
144
+ markersize=style.legend_markersize,
145
+ alpha=style.legend_alpha,
146
+ )
142
147
 
143
- ax.legend(
144
- handles=handles,
145
- title=legend_title,
146
- loc=style.legend_loc,
147
- frameon=style.legend_frameon,
148
- ncol=style.legend_ncol,
149
- )
148
+ if style.legend_outside:
149
+ # Put legend outside right; loc controls anchor point of legend box itself.
150
+ ax.legend(
151
+ handles=handles,
152
+ title=legend_title,
153
+ loc="center left",
154
+ bbox_to_anchor=(1.02, 0.5),
155
+ frameon=style.legend_frameon,
156
+ ncol=style.legend_ncol,
157
+ borderaxespad=0.0,
158
+ )
159
+ # Leave room on the right so legend isn't clipped
160
+ fig.tight_layout(rect=(0, 0, 0.85, 1))
161
+ else:
162
+ ax.legend(
163
+ handles=handles,
164
+ title=legend_title,
165
+ loc=style.legend_loc,
166
+ frameon=style.legend_frameon,
167
+ ncol=style.legend_ncol,
168
+ )
169
+ fig.tight_layout()
170
+ else:
171
+ fig.tight_layout()
150
172
 
151
173
  fig.tight_layout()
152
174
  return fig, ax
@@ -174,6 +196,8 @@ def scatter_plot_all_classes(
174
196
  s: float = 5.0,
175
197
  alpha: float = 0.25,
176
198
  linewidths: float = 0.0,
199
+ display_legend: bool = True,
200
+ legend_outside: bool = False,
177
201
  legend_title: Optional[str] = None,
178
202
  legend_loc: str = "lower left",
179
203
  legend_frameon: bool = False,
@@ -243,6 +267,8 @@ def scatter_plot_all_classes(
243
267
  s=s,
244
268
  alpha=alpha,
245
269
  linewidths=linewidths,
270
+ display_legend=display_legend,
271
+ legend_outside=legend_outside,
246
272
  legend_title=legend_title if legend_title is not None else subclass_col,
247
273
  legend_loc=legend_loc,
248
274
  legend_frameon=legend_frameon,
@@ -300,6 +326,8 @@ def scatter_plot_hierarchical_labels(
300
326
  s: float = 2.0,
301
327
  alpha: float = 0.2,
302
328
  linewidths: float = 0.0,
329
+ display_legend: bool = True,
330
+ legend_outside: bool = False,
303
331
  legend_title: str = "Class / Superclass",
304
332
  legend_loc: str = "lower left",
305
333
  legend_frameon: bool = False,
@@ -398,6 +426,8 @@ def scatter_plot_hierarchical_labels(
398
426
  s=s,
399
427
  alpha=alpha,
400
428
  linewidths=linewidths,
429
+ display_legend=display_legend,
430
+ legend_outside=legend_outside,
401
431
  legend_title=legend_title,
402
432
  legend_loc=legend_loc,
403
433
  legend_frameon=legend_frameon,
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "chemap"
3
- version = "0.3.3"
3
+ version = "0.3.4"
4
4
  description = "Library for computing molecular fingerprint based similarities as well as dimensionality reduction based chemical space visualizations. "
5
5
  authors = [
6
6
  { name="Florian Huber", email="florian.huber@hs-duesseldorf.de" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes