chemap 0.3.3__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chemap-0.3.3 → chemap-0.3.4}/PKG-INFO +1 -1
- {chemap-0.3.3 → chemap-0.3.4}/chemap/plotting/chem_space_umap.py +52 -16
- {chemap-0.3.3 → chemap-0.3.4}/chemap/plotting/cleveland.py +6 -4
- {chemap-0.3.3 → chemap-0.3.4}/chemap/plotting/scatter_plots.py +44 -14
- {chemap-0.3.3 → chemap-0.3.4}/pyproject.toml +1 -1
- {chemap-0.3.3 → chemap-0.3.4}/LICENSE +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/README.md +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/__init__.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/approx_nn.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/benchmarking/__init__.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/benchmarking/fingerprint_duplicates.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/benchmarking/utils.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/data_loader.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprint_computation.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprint_conversions.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprint_statistics.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprints/__init__.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprints/chemap_base_fingerprint.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprints/element_count_fp.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprints/lingo.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprints/map4.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/fingerprints/mhfp.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/mbp.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/metrics.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/plotting/__init__.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/plotting/benchmark_duplicates.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/plotting/colormap_handling.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/plotting/colormaps.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/types.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/utils.py +0 -0
- {chemap-0.3.3 → chemap-0.3.4}/chemap/visualizations.py +0 -0
|
@@ -3,7 +3,11 @@ from typing import Any, Optional
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
5
5
|
from chemap import FingerprintConfig, compute_fingerprints
|
|
6
|
-
from chemap.fingerprint_conversions import
|
|
6
|
+
from chemap.fingerprint_conversions import (
|
|
7
|
+
fingerprints_to_csr,
|
|
8
|
+
fingerprints_to_tfidf,
|
|
9
|
+
idf_normalized,
|
|
10
|
+
)
|
|
7
11
|
from chemap.metrics import (
|
|
8
12
|
tanimoto_distance_dense,
|
|
9
13
|
tanimoto_distance_sparse,
|
|
@@ -53,7 +57,7 @@ def create_chem_space_umap(
|
|
|
53
57
|
fpgen: Optional[Any] = None,
|
|
54
58
|
fingerprint_config: Optional[FingerprintConfig] = None,
|
|
55
59
|
show_progress: bool = True,
|
|
56
|
-
|
|
60
|
+
scaling: str = None,
|
|
57
61
|
# UMAP (CPU / umap-learn)
|
|
58
62
|
n_neighbors: int = 100,
|
|
59
63
|
min_dist: float = 0.25,
|
|
@@ -80,9 +84,9 @@ def create_chem_space_umap(
|
|
|
80
84
|
FingerprintConfig(count=True, folded=False, invalid_policy="raise")
|
|
81
85
|
show_progress:
|
|
82
86
|
Forwarded to compute_fingerprints.
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
87
|
+
scaling:
|
|
88
|
+
Define scaling for count fingerprints. Default is None, which means no scaling.
|
|
89
|
+
Can be set to "log" for log1p scaling, or to "tfidf" for TF-IDF scaling of bits.
|
|
86
90
|
n_neighbors, min_dist, umap_random_state:
|
|
87
91
|
Standard UMAP parameters.
|
|
88
92
|
n_jobs:
|
|
@@ -137,14 +141,20 @@ def create_chem_space_umap(
|
|
|
137
141
|
|
|
138
142
|
if not fingerprint_config.folded:
|
|
139
143
|
# Convert to CSR matrix
|
|
140
|
-
|
|
144
|
+
if scaling == "tfidf":
|
|
145
|
+
fps_csr = fingerprints_to_tfidf(fingerprints).X
|
|
146
|
+
else:
|
|
147
|
+
fps_csr = fingerprints_to_csr(fingerprints).X
|
|
141
148
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
fps_csr = _log1p_csr_inplace(fps_csr)
|
|
149
|
+
if scaling == "log":
|
|
150
|
+
fps_csr = _log1p_csr_inplace(fps_csr)
|
|
145
151
|
|
|
146
152
|
coords = reducer.fit_transform(fps_csr)
|
|
147
153
|
else:
|
|
154
|
+
if scaling == "log":
|
|
155
|
+
fingerprints = np.log1p(fingerprints)
|
|
156
|
+
elif scaling == "tfidf":
|
|
157
|
+
fingerprints *= idf_normalized((fingerprints > 0).sum(axis=0), fingerprints.shape[0])
|
|
148
158
|
coords = reducer.fit_transform(fingerprints)
|
|
149
159
|
|
|
150
160
|
df[x_col] = coords[:, 0]
|
|
@@ -163,13 +173,39 @@ def create_chem_space_umap_gpu(
|
|
|
163
173
|
fpgen: Optional[Any] = None,
|
|
164
174
|
fingerprint_config: Optional[FingerprintConfig] = None,
|
|
165
175
|
show_progress: bool = True,
|
|
166
|
-
|
|
176
|
+
scaling: str = None,
|
|
167
177
|
# UMAP (GPU / cuML)
|
|
168
178
|
n_neighbors: int = 100,
|
|
169
179
|
min_dist: float = 0.25,
|
|
170
180
|
) -> pd.DataFrame:
|
|
171
181
|
"""Compute fingerprints and create 2D UMAP coordinates using cuML (GPU).
|
|
172
182
|
|
|
183
|
+
Parameters
|
|
184
|
+
----------
|
|
185
|
+
data:
|
|
186
|
+
Input dataframe containing a SMILES column.
|
|
187
|
+
col_smiles:
|
|
188
|
+
Name of the SMILES column.
|
|
189
|
+
inplace:
|
|
190
|
+
If True, write x/y columns into `data` and return it. Else returns a copy.
|
|
191
|
+
x_col, y_col:
|
|
192
|
+
Output coordinate column names.
|
|
193
|
+
fpgen:
|
|
194
|
+
RDKit fingerprint generator. Defaults to Morgan radius=9, fpSize=4096.
|
|
195
|
+
fingerprint_config:
|
|
196
|
+
FingerprintConfig for chemap.compute_fingerprints. Defaults to:
|
|
197
|
+
FingerprintConfig(count=True, folded=False, invalid_policy="raise")
|
|
198
|
+
show_progress:
|
|
199
|
+
Forwarded to compute_fingerprints.
|
|
200
|
+
scaling:
|
|
201
|
+
Define scaling for count fingerprints. Default is None, which means no scaling.
|
|
202
|
+
Can be set to "log" for log1p scaling, or to "tfidf" for TF-IDF scaling of bits.
|
|
203
|
+
n_neighbors, min_dist, umap_random_state:
|
|
204
|
+
Standard UMAP parameters.
|
|
205
|
+
n_jobs:
|
|
206
|
+
Passed to umap-learn UMAP for parallelism. Ignores random_state when n_jobs != 1.
|
|
207
|
+
Default -1 uses all CPUs.
|
|
208
|
+
|
|
173
209
|
Notes
|
|
174
210
|
-----
|
|
175
211
|
- cuML UMAP here is fixed to metric="cosine"
|
|
@@ -222,12 +258,12 @@ def create_chem_space_umap_gpu(
|
|
|
222
258
|
)
|
|
223
259
|
|
|
224
260
|
# Reduce memory footprint (works well for count fingerprints)
|
|
225
|
-
if
|
|
226
|
-
|
|
227
|
-
|
|
261
|
+
if scaling == "log":
|
|
262
|
+
fingerprints = np.log1p(fingerprints).astype(np.float32, copy=False)
|
|
263
|
+
elif scaling == "tfidf":
|
|
264
|
+
fingerprints *= idf_normalized((fingerprints > 0).sum(axis=0), fingerprints.shape[0])
|
|
228
265
|
else:
|
|
229
|
-
|
|
230
|
-
fps = np.log1p(fingerprints).astype(np.float32, copy=False)
|
|
266
|
+
fingerprints = fingerprints.astype(np.int8, copy=False)
|
|
231
267
|
|
|
232
268
|
umap_model = cuUMAP(
|
|
233
269
|
n_neighbors=int(n_neighbors),
|
|
@@ -238,7 +274,7 @@ def create_chem_space_umap_gpu(
|
|
|
238
274
|
n_components=2,
|
|
239
275
|
)
|
|
240
276
|
|
|
241
|
-
coords = umap_model.fit_transform(
|
|
277
|
+
coords = umap_model.fit_transform(fingerprints)
|
|
242
278
|
|
|
243
279
|
# cuML may return cupy/cudf-backed arrays; np.asarray makes it safe for pandas columns.
|
|
244
280
|
coords_np = np.asarray(coords)
|
|
@@ -60,6 +60,8 @@ def cleveland_dotplot(
|
|
|
60
60
|
show_legends: bool = True,
|
|
61
61
|
color_legend_title: str = "Setting",
|
|
62
62
|
marker_legend_title: str = "Variant",
|
|
63
|
+
color_legend_position: str = "lower left",
|
|
64
|
+
marker_legend_position: str = "lower right",
|
|
63
65
|
|
|
64
66
|
style: ClevelandStyle = ClevelandStyle(),
|
|
65
67
|
) -> Tuple[Figure, Axes]:
|
|
@@ -260,16 +262,16 @@ def cleveland_dotplot(
|
|
|
260
262
|
|
|
261
263
|
# Place legends similarly to your original if both exist
|
|
262
264
|
if marker_handles and color_handles:
|
|
263
|
-
leg1 = ax.legend(handles=marker_handles, loc=
|
|
265
|
+
leg1 = ax.legend(handles=marker_handles, loc=marker_legend_position,
|
|
264
266
|
title=marker_legend_title, frameon=True)
|
|
265
267
|
ax.add_artist(leg1)
|
|
266
|
-
ax.legend(handles=color_handles, loc=
|
|
268
|
+
ax.legend(handles=color_handles, loc=color_legend_position,
|
|
267
269
|
title=color_legend_title, frameon=True)
|
|
268
270
|
elif marker_handles:
|
|
269
|
-
ax.legend(handles=marker_handles, loc=
|
|
271
|
+
ax.legend(handles=marker_handles, loc=marker_legend_position,
|
|
270
272
|
title=marker_legend_title, frameon=True)
|
|
271
273
|
elif color_handles:
|
|
272
|
-
ax.legend(handles=color_handles, loc=
|
|
274
|
+
ax.legend(handles=color_handles, loc=color_legend_position,
|
|
273
275
|
title=color_legend_title, frameon=True)
|
|
274
276
|
|
|
275
277
|
return fig, ax
|
|
@@ -36,6 +36,9 @@ class ScatterStyle:
|
|
|
36
36
|
alpha: float = 0.25
|
|
37
37
|
linewidths: float = 0.0
|
|
38
38
|
|
|
39
|
+
display_legend: bool = True
|
|
40
|
+
legend_outside: bool = False
|
|
41
|
+
|
|
39
42
|
legend_title: Optional[str] = None
|
|
40
43
|
legend_loc: str = "lower left"
|
|
41
44
|
legend_frameon: bool = False
|
|
@@ -132,21 +135,40 @@ def scatter_plot_base(
|
|
|
132
135
|
ax.set_xlabel("")
|
|
133
136
|
ax.set_ylabel("")
|
|
134
137
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
138
|
+
# ---- legend (optional + outside option) ----
|
|
139
|
+
if style.display_legend:
|
|
140
|
+
legend_title = style.legend_title if style.legend_title is not None else label_col
|
|
141
|
+
handles = _build_legend_handles(
|
|
142
|
+
legend_labels,
|
|
143
|
+
palette,
|
|
144
|
+
markersize=style.legend_markersize,
|
|
145
|
+
alpha=style.legend_alpha,
|
|
146
|
+
)
|
|
142
147
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
148
|
+
if style.legend_outside:
|
|
149
|
+
# Put legend outside right; loc controls anchor point of legend box itself.
|
|
150
|
+
ax.legend(
|
|
151
|
+
handles=handles,
|
|
152
|
+
title=legend_title,
|
|
153
|
+
loc="center left",
|
|
154
|
+
bbox_to_anchor=(1.02, 0.5),
|
|
155
|
+
frameon=style.legend_frameon,
|
|
156
|
+
ncol=style.legend_ncol,
|
|
157
|
+
borderaxespad=0.0,
|
|
158
|
+
)
|
|
159
|
+
# Leave room on the right so legend isn't clipped
|
|
160
|
+
fig.tight_layout(rect=(0, 0, 0.85, 1))
|
|
161
|
+
else:
|
|
162
|
+
ax.legend(
|
|
163
|
+
handles=handles,
|
|
164
|
+
title=legend_title,
|
|
165
|
+
loc=style.legend_loc,
|
|
166
|
+
frameon=style.legend_frameon,
|
|
167
|
+
ncol=style.legend_ncol,
|
|
168
|
+
)
|
|
169
|
+
fig.tight_layout()
|
|
170
|
+
else:
|
|
171
|
+
fig.tight_layout()
|
|
150
172
|
|
|
151
173
|
fig.tight_layout()
|
|
152
174
|
return fig, ax
|
|
@@ -174,6 +196,8 @@ def scatter_plot_all_classes(
|
|
|
174
196
|
s: float = 5.0,
|
|
175
197
|
alpha: float = 0.25,
|
|
176
198
|
linewidths: float = 0.0,
|
|
199
|
+
display_legend: bool = True,
|
|
200
|
+
legend_outside: bool = False,
|
|
177
201
|
legend_title: Optional[str] = None,
|
|
178
202
|
legend_loc: str = "lower left",
|
|
179
203
|
legend_frameon: bool = False,
|
|
@@ -243,6 +267,8 @@ def scatter_plot_all_classes(
|
|
|
243
267
|
s=s,
|
|
244
268
|
alpha=alpha,
|
|
245
269
|
linewidths=linewidths,
|
|
270
|
+
display_legend=display_legend,
|
|
271
|
+
legend_outside=legend_outside,
|
|
246
272
|
legend_title=legend_title if legend_title is not None else subclass_col,
|
|
247
273
|
legend_loc=legend_loc,
|
|
248
274
|
legend_frameon=legend_frameon,
|
|
@@ -300,6 +326,8 @@ def scatter_plot_hierarchical_labels(
|
|
|
300
326
|
s: float = 2.0,
|
|
301
327
|
alpha: float = 0.2,
|
|
302
328
|
linewidths: float = 0.0,
|
|
329
|
+
display_legend: bool = True,
|
|
330
|
+
legend_outside: bool = False,
|
|
303
331
|
legend_title: str = "Class / Superclass",
|
|
304
332
|
legend_loc: str = "lower left",
|
|
305
333
|
legend_frameon: bool = False,
|
|
@@ -398,6 +426,8 @@ def scatter_plot_hierarchical_labels(
|
|
|
398
426
|
s=s,
|
|
399
427
|
alpha=alpha,
|
|
400
428
|
linewidths=linewidths,
|
|
429
|
+
display_legend=display_legend,
|
|
430
|
+
legend_outside=legend_outside,
|
|
401
431
|
legend_title=legend_title,
|
|
402
432
|
legend_loc=legend_loc,
|
|
403
433
|
legend_frameon=legend_frameon,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "chemap"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.4"
|
|
4
4
|
description = "Library for computing molecular fingerprint based similarities as well as dimensionality reduction based chemical space visualizations. "
|
|
5
5
|
authors = [
|
|
6
6
|
{ name="Florian Huber", email="florian.huber@hs-duesseldorf.de" },
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|