como-ocsr 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
como/__init__.py ADDED
@@ -0,0 +1,298 @@
1
+ """
2
+ COMO-OCSR — Optical Chemical Structure Recognition with COMO.
3
+
4
+ COMO (Closed-loop Optical Molecule recOgnition) is a deep learning framework
5
+ that recognizes chemical structure diagrams from images and predicts SMILES
6
+ strings with atom-level coordinates and bond matrices. It uses Minimum Risk
7
+ Training (MRT) to directly optimize molecular-level, non-differentiable
8
+ objectives, closing the gap between token-level training and molecular-level
9
+ evaluation.
10
+
11
+ Quick start::
12
+
13
+ import como
14
+
15
+ model = como.load_model("path/to/checkpoint.pth", device="cuda")
16
+ smiles = como.predict(model, "molecule.png")
17
+ print(smiles) # "CC(=O)O"
18
+
19
+ # Batch prediction
20
+ results = como.predict_batch(model, ["mol1.png", "mol2.png"])
21
+
22
+ # Benchmark evaluation
23
+ metrics = como.evaluate(model, "benchmark/USPTO/", "benchmark/USPTO.csv")
24
+
25
+ # Multi-GPU evaluation (use GPUs 0, 1, 2)
26
+ metrics = como.evaluate(model, ..., gpus="0,1,2")
27
+ """
28
+
29
+ from ._core import ComoModel, ComoVocab, evaluate_benchmarks as _raw_evaluate_benchmarks, _result_to_smiles
30
+ from ._chemistry import canonicalize_smiles, canonicalize_tautomer
31
+
32
+ __version__ = "1.0.0"
33
+
34
+ __all__ = [
35
+ # Core classes
36
+ "ComoVocab",
37
+ "ComoModel",
38
+ # Convenience functions
39
+ "load_model",
40
+ "predict",
41
+ "predict_batch",
42
+ "evaluate",
43
+ "evaluate_benchmarks",
44
+ # SMILES utilities
45
+ "canonicalize_smiles",
46
+ "canonicalize_tautomer",
47
+ "_result_to_smiles",
48
+ # Version
49
+ "__version__",
50
+ ]
51
+
52
+
53
+ def load_model(
54
+ checkpoint_path: str,
55
+ device: str = "cuda",
56
+ pretrained: bool = True,
57
+ **model_kwargs,
58
+ ) -> ComoModel:
59
+ """Load a COMO model from a checkpoint file.
60
+
61
+ Parameters
62
+ ----------
63
+ checkpoint_path:
64
+ Path to a ``.pth`` checkpoint (e.g. ``"COMO_joint/tanimoto/final.pth"``).
65
+ device:
66
+ Device string (``"cuda"`` or ``"cpu"``).
67
+ pretrained:
68
+ Whether to use ImageNet-pretrained backbone weights (default: ``True``).
69
+ model_kwargs:
70
+ Additional arguments passed to :class:`ComoModel` (e.g. ``d_model``,
71
+ ``nhead``, ``num_decoder_layers``).
72
+
73
+ Returns
74
+ -------
75
+ ComoModel
76
+ The loaded model in evaluation mode.
77
+ """
78
+ vocab = ComoVocab(n_bins=64)
79
+ model = ComoModel(
80
+ vocab=vocab,
81
+ backbone="swin_b",
82
+ pretrained=pretrained,
83
+ **model_kwargs,
84
+ )
85
+ model.load_model(checkpoint_path, device=torch.device(device))
86
+ return model
87
+
88
+
89
+ def predict(
90
+ model: ComoModel,
91
+ image,
92
+ *,
93
+ beam_size: int = 1,
94
+ max_len: int = 500,
95
+ smiles_mode: str = "postprocess",
96
+ device: str | None = None,
97
+ ) -> str | dict:
98
+ """Predict the SMILES string for a single molecular image.
99
+
100
+ Parameters
101
+ ----------
102
+ model:
103
+ A loaded :class:`ComoModel`.
104
+ image:
105
+ One of: a file path (``str``), a NumPy array (H×W×3 or H×W), a PIL
106
+ ``Image``, or a preprocessed ``torch.Tensor``.
107
+ beam_size:
108
+ Beam width (1 = greedy, 3 = beam search). Greedy is faster and
109
+ produces identical results to beam search on most images.
110
+ max_len:
111
+ Maximum number of tokens to generate.
112
+ smiles_mode:
113
+ SMILES reconstruction mode. One of ``"postprocess"`` (best quality,
114
+ recommended), ``"graph"``, ``"decoder"``, or ``None`` (return raw
115
+ tokens dict instead of a SMILES string).
116
+ device:
117
+ Optional device override.
118
+
119
+ Returns
120
+ -------
121
+ str or dict
122
+ If *smiles_mode* is not ``None``, returns the predicted SMILES string.
123
+ If ``None``, returns the full result dict containing tokens, atom
124
+ symbols, coordinates, bond matrix, etc.
125
+ """
126
+ if device is not None:
127
+ model.to(device)
128
+ result = model.predict(
129
+ image,
130
+ beam_size=beam_size,
131
+ max_len=max_len,
132
+ smiles_mode=smiles_mode,
133
+ )
134
+ if smiles_mode is not None:
135
+ return result["pred_smiles"]
136
+ return result
137
+
138
+
139
+ def predict_batch(
140
+ model: ComoModel,
141
+ images: list,
142
+ *,
143
+ beam_size: int = 1,
144
+ max_len: int = 500,
145
+ smiles_mode: str = "postprocess",
146
+ device: str | None = None,
147
+ ) -> list[str] | list[dict]:
148
+ """Predict SMILES for a batch of images (single GPU).
149
+
150
+ Parameters
151
+ ----------
152
+ model:
153
+ A loaded :class:`ComoModel`.
154
+ images:
155
+ List of file paths, NumPy arrays, PIL Images, or torch Tensors.
156
+ beam_size:
157
+ Beam width (1 = greedy, recommended for batch).
158
+ max_len:
159
+ Maximum number of tokens per image.
160
+ smiles_mode:
161
+ SMILES reconstruction mode (``"postprocess"`` recommended).
162
+ device:
163
+ Optional device override.
164
+
165
+ Returns
166
+ -------
167
+ list[str] or list[dict]
168
+ Predicted SMILES strings (or raw result dicts if *smiles_mode* is
169
+ ``None``).
170
+ """
171
+ if device is not None:
172
+ model.to(device)
173
+ results = model.predict_batch(
174
+ images,
175
+ beam_size=beam_size,
176
+ max_len=max_len,
177
+ smiles_mode=smiles_mode,
178
+ )
179
+ if smiles_mode is not None:
180
+ return [r["pred_smiles"] for r in results]
181
+ return results
182
+
183
+
184
+ def evaluate(
185
+ model: ComoModel,
186
+ benchmark_dir: str,
187
+ csv_path: str,
188
+ *,
189
+ beam_size: int = 1,
190
+ postproc_workers: int = 32,
191
+ tautomer_standardize: bool = True,
192
+ gpus: str | None = "0",
193
+ ) -> dict:
194
+ """Evaluate model on a single benchmark.
195
+
196
+ Parameters
197
+ ----------
198
+ model:
199
+ A loaded :class:`ComoModel`.
200
+ benchmark_dir:
201
+ Directory containing benchmark images (``.png`` files).
202
+ csv_path:
203
+ Path to CSV with columns ``image_id`` and ``SMILES``.
204
+ beam_size:
205
+ Beam width for decoding (1 = greedy).
206
+ postproc_workers:
207
+ Number of parallel workers for SMILES post-processing.
208
+ tautomer_standardize:
209
+ If ``True``, additionally compute tautomer-normalized exact match.
210
+ gpus:
211
+ Comma-separated GPU IDs (e.g. ``"0"`` or ``"0,1,2,3"``). Set to
212
+ ``None`` to use all available GPUs. Default: ``"0"`` (single GPU).
213
+
214
+ Returns
215
+ -------
216
+ dict
217
+ Metrics including ``exact_match_acc``, ``avg_tanimoto``, and
218
+ (optionally) ``tautomer_match_acc`` for each SMILES mode.
219
+ """
220
+ import os as _os
221
+ _prev = _os.environ.get("CUDA_VISIBLE_DEVICES", None)
222
+ try:
223
+ if gpus is not None:
224
+ _os.environ["CUDA_VISIBLE_DEVICES"] = gpus
225
+ benchmarks = [{"name": "benchmark", "benchmark_dir": benchmark_dir, "csv_path": csv_path}]
226
+ results = _raw_evaluate_benchmarks(
227
+ model,
228
+ benchmarks,
229
+ beam_size=beam_size,
230
+ postproc_workers=postproc_workers,
231
+ tautomer_standardize=tautomer_standardize,
232
+ )
233
+ return results["benchmark"]
234
+ finally:
235
+ if _prev is None:
236
+ _os.environ.pop("CUDA_VISIBLE_DEVICES", None)
237
+ else:
238
+ _os.environ["CUDA_VISIBLE_DEVICES"] = _prev
239
+
240
+
241
+ def evaluate_benchmarks(
242
+ model: ComoModel,
243
+ benchmarks: list[dict],
244
+ *,
245
+ beam_size: int = 1,
246
+ postproc_workers: int = 32,
247
+ tautomer_standardize: bool = True,
248
+ gpus: str | None = "0",
249
+ ) -> dict[str, dict]:
250
+ """Evaluate model on multiple benchmarks.
251
+
252
+ Parameters
253
+ ----------
254
+ model:
255
+ A loaded :class:`ComoModel`.
256
+ benchmarks:
257
+ List of dicts, each with keys ``"name"``, ``"benchmark_dir"``, and
258
+ ``"csv_path"``. Example::
259
+
260
+ [{"name": "USPTO", "benchmark_dir": "...", "csv_path": "..."}]
261
+ beam_size:
262
+ Beam width for decoding (1 = greedy).
263
+ postproc_workers:
264
+ Number of parallel workers for SMILES post-processing.
265
+ tautomer_standardize:
266
+ If ``True``, additionally compute tautomer-normalized exact match.
267
+ gpus:
268
+ Comma-separated GPU IDs (e.g. ``"0"`` or ``"0,1,2,3"``). Set to
269
+ ``None`` to use all available GPUs. Default: ``"0"`` (single GPU).
270
+
271
+ Returns
272
+ -------
273
+ dict[str, dict]
274
+ Mapping from benchmark name to metrics dict.
275
+ """
276
+ import os as _os
277
+ _prev = _os.environ.get("CUDA_VISIBLE_DEVICES", None)
278
+ try:
279
+ if gpus is not None:
280
+ _os.environ["CUDA_VISIBLE_DEVICES"] = gpus
281
+ return _raw_evaluate_benchmarks(
282
+ model,
283
+ benchmarks,
284
+ beam_size=beam_size,
285
+ postproc_workers=postproc_workers,
286
+ tautomer_standardize=tautomer_standardize,
287
+ )
288
+ finally:
289
+ if _prev is None:
290
+ _os.environ.pop("CUDA_VISIBLE_DEVICES", None)
291
+ else:
292
+ _os.environ["CUDA_VISIBLE_DEVICES"] = _prev
293
+
294
+
295
+ # ---------------------------------------------------------------------------
296
+ # Internal: torch import for load_model
297
+ # ---------------------------------------------------------------------------
298
+ import torch # noqa: E402