rdkit-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. rdkit_cli/__init__.py +4 -0
  2. rdkit_cli/__main__.py +6 -0
  3. rdkit_cli/cli.py +162 -0
  4. rdkit_cli/commands/__init__.py +1 -0
  5. rdkit_cli/commands/conformers.py +220 -0
  6. rdkit_cli/commands/convert.py +162 -0
  7. rdkit_cli/commands/depict.py +311 -0
  8. rdkit_cli/commands/descriptors.py +251 -0
  9. rdkit_cli/commands/diversity.py +232 -0
  10. rdkit_cli/commands/enumerate.py +229 -0
  11. rdkit_cli/commands/filter.py +384 -0
  12. rdkit_cli/commands/fingerprints.py +179 -0
  13. rdkit_cli/commands/fragment.py +284 -0
  14. rdkit_cli/commands/mcs.py +162 -0
  15. rdkit_cli/commands/reactions.py +191 -0
  16. rdkit_cli/commands/scaffold.py +243 -0
  17. rdkit_cli/commands/similarity.py +359 -0
  18. rdkit_cli/commands/standardize.py +138 -0
  19. rdkit_cli/core/__init__.py +1 -0
  20. rdkit_cli/core/conformers.py +197 -0
  21. rdkit_cli/core/depict.py +241 -0
  22. rdkit_cli/core/descriptors.py +248 -0
  23. rdkit_cli/core/diversity.py +174 -0
  24. rdkit_cli/core/enumerate.py +190 -0
  25. rdkit_cli/core/filters.py +443 -0
  26. rdkit_cli/core/fingerprints.py +265 -0
  27. rdkit_cli/core/fragment.py +237 -0
  28. rdkit_cli/core/mcs.py +128 -0
  29. rdkit_cli/core/reactions.py +159 -0
  30. rdkit_cli/core/scaffold.py +174 -0
  31. rdkit_cli/core/similarity.py +206 -0
  32. rdkit_cli/core/standardizer.py +141 -0
  33. rdkit_cli/io/__init__.py +7 -0
  34. rdkit_cli/io/formats.py +109 -0
  35. rdkit_cli/io/readers.py +352 -0
  36. rdkit_cli/io/writers.py +275 -0
  37. rdkit_cli/parallel/__init__.py +5 -0
  38. rdkit_cli/parallel/batch.py +181 -0
  39. rdkit_cli/parallel/executor.py +180 -0
  40. rdkit_cli/progress/__init__.py +5 -0
  41. rdkit_cli/progress/ninja.py +195 -0
  42. rdkit_cli/utils/__init__.py +1 -0
  43. rdkit_cli-0.1.0.dist-info/METADATA +380 -0
  44. rdkit_cli-0.1.0.dist-info/RECORD +47 -0
  45. rdkit_cli-0.1.0.dist-info/WHEEL +4 -0
  46. rdkit_cli-0.1.0.dist-info/entry_points.txt +2 -0
  47. rdkit_cli-0.1.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,443 @@
1
+ """Molecular filtering engine."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Optional, Any, Callable
5
+
6
+ from rdkit import Chem
7
+ from rdkit.Chem import Descriptors, FilterCatalog, rdfiltercatalog
8
+
9
+ from rdkit_cli.io.readers import MoleculeRecord
10
+
11
+
12
+ @dataclass
13
+ class FilterResult:
14
+ """Result of a filter check."""
15
+
16
+ passed: bool
17
+ reason: Optional[str] = None
18
+
19
+
20
+ # Drug-likeness rules
21
+ DRUGLIKE_RULES = {
22
+ "lipinski": {
23
+ "MolWt": (None, 500),
24
+ "MolLogP": (None, 5),
25
+ "NumHDonors": (None, 5),
26
+ "NumHAcceptors": (None, 10),
27
+ },
28
+ "veber": {
29
+ "NumRotatableBonds": (None, 10),
30
+ "TPSA": (None, 140),
31
+ },
32
+ "ghose": {
33
+ "MolWt": (160, 480),
34
+ "MolLogP": (-0.4, 5.6),
35
+ "NumAtoms": (20, 70),
36
+ "MolMR": (40, 130),
37
+ },
38
+ "egan": {
39
+ "MolLogP": (None, 5.88),
40
+ "TPSA": (None, 131.6),
41
+ },
42
+ "muegge": {
43
+ "MolWt": (200, 600),
44
+ "MolLogP": (-2, 5),
45
+ "TPSA": (None, 150),
46
+ "RingCount": (None, 7),
47
+ "NumHDonors": (None, 5),
48
+ "NumHAcceptors": (None, 10),
49
+ "NumRotatableBonds": (None, 15),
50
+ },
51
+ }
52
+
53
+
54
+ def check_property_range(
55
+ mol: Chem.Mol,
56
+ property_name: str,
57
+ min_val: Optional[float],
58
+ max_val: Optional[float],
59
+ ) -> bool:
60
+ """Check if a property is within range."""
61
+ # Get property function
62
+ if property_name == "NumAtoms":
63
+ value = mol.GetNumAtoms()
64
+ elif hasattr(Descriptors, property_name):
65
+ func = getattr(Descriptors, property_name)
66
+ value = func(mol)
67
+ else:
68
+ return True # Unknown property, pass
69
+
70
+ if min_val is not None and value < min_val:
71
+ return False
72
+ if max_val is not None and value > max_val:
73
+ return False
74
+
75
+ return True
76
+
77
+
78
+ def check_druglike_rules(mol: Chem.Mol, rule_name: str) -> FilterResult:
79
+ """
80
+ Check drug-likeness rules.
81
+
82
+ Args:
83
+ mol: RDKit molecule
84
+ rule_name: Name of rule set (lipinski, veber, etc.)
85
+
86
+ Returns:
87
+ FilterResult with pass/fail status
88
+ """
89
+ if rule_name not in DRUGLIKE_RULES:
90
+ raise ValueError(f"Unknown rule: {rule_name}")
91
+
92
+ rules = DRUGLIKE_RULES[rule_name]
93
+ violations = []
94
+
95
+ for prop, (min_val, max_val) in rules.items():
96
+ if not check_property_range(mol, prop, min_val, max_val):
97
+ violations.append(prop)
98
+
99
+ if violations:
100
+ return FilterResult(passed=False, reason=f"Failed: {', '.join(violations)}")
101
+
102
+ return FilterResult(passed=True)
103
+
104
+
105
+ class SubstructureFilter:
106
+ """Filter molecules by substructure."""
107
+
108
+ def __init__(
109
+ self,
110
+ smarts: str,
111
+ exclude: bool = False,
112
+ include_smiles: bool = True,
113
+ include_name: bool = True,
114
+ ):
115
+ """
116
+ Initialize substructure filter.
117
+
118
+ Args:
119
+ smarts: SMARTS pattern to match
120
+ exclude: If True, exclude matching molecules
121
+ include_smiles: Include SMILES in output
122
+ include_name: Include molecule name in output
123
+ """
124
+ self.pattern = Chem.MolFromSmarts(smarts)
125
+ if self.pattern is None:
126
+ raise ValueError(f"Invalid SMARTS pattern: {smarts}")
127
+
128
+ self.exclude = exclude
129
+ self.include_smiles = include_smiles
130
+ self.include_name = include_name
131
+
132
+ def filter(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
133
+ """
134
+ Filter a molecule record.
135
+
136
+ Args:
137
+ record: MoleculeRecord to check
138
+
139
+ Returns:
140
+ Dictionary if molecule passes filter, None otherwise
141
+ """
142
+ if record.mol is None:
143
+ return None
144
+
145
+ has_match = record.mol.HasSubstructMatch(self.pattern)
146
+
147
+ # If exclude=True, we want molecules WITHOUT the match
148
+ # If exclude=False, we want molecules WITH the match
149
+ passes = (self.exclude and not has_match) or (not self.exclude and has_match)
150
+
151
+ if not passes:
152
+ return None
153
+
154
+ result: dict[str, Any] = {}
155
+ if self.include_smiles:
156
+ result["smiles"] = record.smiles
157
+ if self.include_name and record.name:
158
+ result["name"] = record.name
159
+
160
+ # Copy other metadata
161
+ for key, value in record.metadata.items():
162
+ if key not in result:
163
+ result[key] = value
164
+
165
+ return result
166
+
167
+
168
+ class PropertyFilter:
169
+ """Filter molecules by property values."""
170
+
171
+ def __init__(
172
+ self,
173
+ rules: dict[str, tuple[Optional[float], Optional[float]]],
174
+ include_smiles: bool = True,
175
+ include_name: bool = True,
176
+ ):
177
+ """
178
+ Initialize property filter.
179
+
180
+ Args:
181
+ rules: Dictionary of property_name -> (min_val, max_val)
182
+ include_smiles: Include SMILES in output
183
+ include_name: Include molecule name in output
184
+ """
185
+ self.rules = rules
186
+ self.include_smiles = include_smiles
187
+ self.include_name = include_name
188
+
189
+ def filter(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
190
+ """Filter a molecule record."""
191
+ if record.mol is None:
192
+ return None
193
+
194
+ for prop, (min_val, max_val) in self.rules.items():
195
+ if not check_property_range(record.mol, prop, min_val, max_val):
196
+ return None
197
+
198
+ result: dict[str, Any] = {}
199
+ if self.include_smiles:
200
+ result["smiles"] = record.smiles
201
+ if self.include_name and record.name:
202
+ result["name"] = record.name
203
+
204
+ for key, value in record.metadata.items():
205
+ if key not in result:
206
+ result[key] = value
207
+
208
+ return result
209
+
210
+
211
+ class DruglikeFilter:
212
+ """Filter molecules by drug-likeness rules."""
213
+
214
+ def __init__(
215
+ self,
216
+ rule_name: str = "lipinski",
217
+ max_violations: int = 0,
218
+ include_smiles: bool = True,
219
+ include_name: bool = True,
220
+ ):
221
+ """
222
+ Initialize drug-likeness filter.
223
+
224
+ Args:
225
+ rule_name: Rule set to use
226
+ max_violations: Maximum allowed violations
227
+ include_smiles: Include SMILES in output
228
+ include_name: Include molecule name in output
229
+ """
230
+ if rule_name not in DRUGLIKE_RULES:
231
+ raise ValueError(f"Unknown rule: {rule_name}. Available: {', '.join(DRUGLIKE_RULES.keys())}")
232
+
233
+ self.rule_name = rule_name
234
+ self.max_violations = max_violations
235
+ self.include_smiles = include_smiles
236
+ self.include_name = include_name
237
+
238
+ def filter(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
239
+ """Filter a molecule record."""
240
+ if record.mol is None:
241
+ return None
242
+
243
+ rules = DRUGLIKE_RULES[self.rule_name]
244
+ violations = 0
245
+
246
+ for prop, (min_val, max_val) in rules.items():
247
+ if not check_property_range(record.mol, prop, min_val, max_val):
248
+ violations += 1
249
+
250
+ if violations > self.max_violations:
251
+ return None
252
+
253
+ result: dict[str, Any] = {}
254
+ if self.include_smiles:
255
+ result["smiles"] = record.smiles
256
+ if self.include_name and record.name:
257
+ result["name"] = record.name
258
+
259
+ for key, value in record.metadata.items():
260
+ if key not in result:
261
+ result[key] = value
262
+
263
+ return result
264
+
265
+
266
+ class PAINSFilter:
267
+ """Filter molecules for PAINS (Pan-Assay Interference Compounds)."""
268
+
269
+ def __init__(
270
+ self,
271
+ exclude: bool = True,
272
+ include_smiles: bool = True,
273
+ include_name: bool = True,
274
+ ):
275
+ """Initialize PAINS filter."""
276
+ self.exclude = exclude
277
+ self.include_smiles = include_smiles
278
+ self.include_name = include_name
279
+
280
+ # Initialize PAINS catalog
281
+ params = FilterCatalog.FilterCatalogParams()
282
+ params.AddCatalog(FilterCatalog.FilterCatalogParams.FilterCatalogs.PAINS)
283
+ self.catalog = FilterCatalog.FilterCatalog(params)
284
+
285
+ def filter(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
286
+ """Filter a molecule record (returns None if PAINS hit and exclude=True)."""
287
+ if record.mol is None:
288
+ return None
289
+
290
+ # Check for PAINS
291
+ entry = self.catalog.GetFirstMatch(record.mol)
292
+ is_pains = entry is not None
293
+
294
+ # If exclude=True (default), filter out PAINS hits
295
+ # If exclude=False, keep only PAINS hits
296
+ if self.exclude and is_pains:
297
+ return None
298
+ if not self.exclude and not is_pains:
299
+ return None
300
+
301
+ result: dict[str, Any] = {}
302
+ if self.include_smiles:
303
+ result["smiles"] = record.smiles
304
+ if self.include_name and record.name:
305
+ result["name"] = record.name
306
+
307
+ for key, value in record.metadata.items():
308
+ if key not in result:
309
+ result[key] = value
310
+
311
+ return result
312
+
313
+
314
+ class ElementFilter:
315
+ """Filter molecules by allowed/required/forbidden elements."""
316
+
317
+ def __init__(
318
+ self,
319
+ allowed_elements: Optional[list[str]] = None,
320
+ required_elements: Optional[list[str]] = None,
321
+ forbidden_elements: Optional[list[str]] = None,
322
+ include_smiles: bool = True,
323
+ include_name: bool = True,
324
+ ):
325
+ """
326
+ Initialize element filter.
327
+
328
+ Args:
329
+ allowed_elements: Only these elements are allowed
330
+ required_elements: Molecule must contain all of these
331
+ forbidden_elements: Molecule must not contain any of these
332
+ """
333
+ self.allowed = set(allowed_elements) if allowed_elements else None
334
+ self.required = set(required_elements) if required_elements else None
335
+ self.forbidden = set(forbidden_elements) if forbidden_elements else None
336
+ self.include_smiles = include_smiles
337
+ self.include_name = include_name
338
+
339
+ def filter(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
340
+ """Filter a molecule record by elements."""
341
+ if record.mol is None:
342
+ return None
343
+
344
+ # Get elements in molecule
345
+ elements = set()
346
+ for atom in record.mol.GetAtoms():
347
+ elements.add(atom.GetSymbol())
348
+
349
+ # Check allowed
350
+ if self.allowed is not None:
351
+ if not elements.issubset(self.allowed):
352
+ return None
353
+
354
+ # Check required
355
+ if self.required is not None:
356
+ if not self.required.issubset(elements):
357
+ return None
358
+
359
+ # Check forbidden
360
+ if self.forbidden is not None:
361
+ if elements.intersection(self.forbidden):
362
+ return None
363
+
364
+ result: dict[str, Any] = {}
365
+ if self.include_smiles:
366
+ result["smiles"] = record.smiles
367
+ if self.include_name and record.name:
368
+ result["name"] = record.name
369
+
370
+ for key, value in record.metadata.items():
371
+ if key not in result:
372
+ result[key] = value
373
+
374
+ return result
375
+
376
+
377
+ class ComplexityFilter:
378
+ """Filter molecules by complexity measures."""
379
+
380
+ def __init__(
381
+ self,
382
+ min_atoms: int = 1,
383
+ max_atoms: int = 100,
384
+ min_rings: int = 0,
385
+ max_rings: int = 10,
386
+ min_rotatable: int = 0,
387
+ max_rotatable: int = 20,
388
+ include_smiles: bool = True,
389
+ include_name: bool = True,
390
+ ):
391
+ """
392
+ Initialize complexity filter.
393
+
394
+ Args:
395
+ min_atoms: Minimum heavy atom count
396
+ max_atoms: Maximum heavy atom count
397
+ min_rings: Minimum ring count
398
+ max_rings: Maximum ring count
399
+ min_rotatable: Minimum rotatable bonds
400
+ max_rotatable: Maximum rotatable bonds
401
+ """
402
+ self.min_atoms = min_atoms
403
+ self.max_atoms = max_atoms
404
+ self.min_rings = min_rings
405
+ self.max_rings = max_rings
406
+ self.min_rotatable = min_rotatable
407
+ self.max_rotatable = max_rotatable
408
+ self.include_smiles = include_smiles
409
+ self.include_name = include_name
410
+
411
+ def filter(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
412
+ """Filter a molecule record by complexity."""
413
+ if record.mol is None:
414
+ return None
415
+
416
+ mol = record.mol
417
+
418
+ # Check heavy atom count
419
+ heavy_atoms = mol.GetNumHeavyAtoms()
420
+ if heavy_atoms < self.min_atoms or heavy_atoms > self.max_atoms:
421
+ return None
422
+
423
+ # Check ring count
424
+ ring_count = Descriptors.RingCount(mol)
425
+ if ring_count < self.min_rings or ring_count > self.max_rings:
426
+ return None
427
+
428
+ # Check rotatable bonds
429
+ rotatable = Descriptors.NumRotatableBonds(mol)
430
+ if rotatable < self.min_rotatable or rotatable > self.max_rotatable:
431
+ return None
432
+
433
+ result: dict[str, Any] = {}
434
+ if self.include_smiles:
435
+ result["smiles"] = record.smiles
436
+ if self.include_name and record.name:
437
+ result["name"] = record.name
438
+
439
+ for key, value in record.metadata.items():
440
+ if key not in result:
441
+ result[key] = value
442
+
443
+ return result
@@ -0,0 +1,265 @@
1
+ """Molecular fingerprint computation engine."""
2
+
3
+ from dataclasses import dataclass
4
+ from enum import Enum
5
+ from typing import Optional, Any
6
+
7
+ from rdkit import Chem, DataStructs
8
+ from rdkit.Chem import AllChem, MACCSkeys, rdMolDescriptors
9
+
10
+ from rdkit_cli.io.readers import MoleculeRecord
11
+
12
+
13
+ class FingerprintType(Enum):
14
+ """Supported fingerprint types."""
15
+
16
+ MORGAN = "morgan"
17
+ MACCS = "maccs"
18
+ RDKIT = "rdkit"
19
+ ATOMPAIR = "atompair"
20
+ TORSION = "torsion"
21
+ PATTERN = "pattern"
22
+
23
+
24
+ @dataclass
25
+ class FingerprintInfo:
26
+ """Information about a fingerprint type."""
27
+
28
+ name: str
29
+ description: str
30
+ default_bits: int
31
+ has_radius: bool
32
+
33
+
34
+ FINGERPRINT_INFO: dict[FingerprintType, FingerprintInfo] = {
35
+ FingerprintType.MORGAN: FingerprintInfo(
36
+ name="morgan",
37
+ description="Morgan/ECFP circular fingerprints",
38
+ default_bits=2048,
39
+ has_radius=True,
40
+ ),
41
+ FingerprintType.MACCS: FingerprintInfo(
42
+ name="maccs",
43
+ description="MACCS structural keys (166 bits)",
44
+ default_bits=167,
45
+ has_radius=False,
46
+ ),
47
+ FingerprintType.RDKIT: FingerprintInfo(
48
+ name="rdkit",
49
+ description="RDKit/Daylight-like path-based fingerprints",
50
+ default_bits=2048,
51
+ has_radius=False,
52
+ ),
53
+ FingerprintType.ATOMPAIR: FingerprintInfo(
54
+ name="atompair",
55
+ description="Atom pair fingerprints",
56
+ default_bits=2048,
57
+ has_radius=False,
58
+ ),
59
+ FingerprintType.TORSION: FingerprintInfo(
60
+ name="torsion",
61
+ description="Topological torsion fingerprints",
62
+ default_bits=2048,
63
+ has_radius=False,
64
+ ),
65
+ FingerprintType.PATTERN: FingerprintInfo(
66
+ name="pattern",
67
+ description="SMARTS pattern fingerprints (for screening)",
68
+ default_bits=2048,
69
+ has_radius=False,
70
+ ),
71
+ }
72
+
73
+
74
+ def list_fingerprints() -> list[FingerprintInfo]:
75
+ """List available fingerprint types."""
76
+ return list(FINGERPRINT_INFO.values())
77
+
78
+
79
+ def compute_fingerprint(
80
+ mol: Chem.Mol,
81
+ fp_type: FingerprintType,
82
+ n_bits: int = 2048,
83
+ radius: int = 2,
84
+ use_counts: bool = False,
85
+ ) -> Optional[DataStructs.ExplicitBitVect]:
86
+ """
87
+ Compute fingerprint for a molecule.
88
+
89
+ Args:
90
+ mol: RDKit molecule
91
+ fp_type: Type of fingerprint
92
+ n_bits: Number of bits
93
+ radius: Radius for Morgan fingerprints
94
+ use_counts: Use count fingerprints (Morgan only)
95
+
96
+ Returns:
97
+ Fingerprint bit vector or None on failure
98
+ """
99
+ try:
100
+ if fp_type == FingerprintType.MORGAN:
101
+ if use_counts:
102
+ return rdMolDescriptors.GetHashedMorganFingerprint(
103
+ mol, radius, nBits=n_bits
104
+ )
105
+ else:
106
+ return rdMolDescriptors.GetMorganFingerprintAsBitVect(
107
+ mol, radius, nBits=n_bits
108
+ )
109
+
110
+ elif fp_type == FingerprintType.MACCS:
111
+ return MACCSkeys.GenMACCSKeys(mol)
112
+
113
+ elif fp_type == FingerprintType.RDKIT:
114
+ return Chem.RDKFingerprint(mol, fpSize=n_bits)
115
+
116
+ elif fp_type == FingerprintType.ATOMPAIR:
117
+ return rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(
118
+ mol, nBits=n_bits
119
+ )
120
+
121
+ elif fp_type == FingerprintType.TORSION:
122
+ return rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(
123
+ mol, nBits=n_bits
124
+ )
125
+
126
+ elif fp_type == FingerprintType.PATTERN:
127
+ return Chem.PatternFingerprint(mol, fpSize=n_bits)
128
+
129
+ else:
130
+ raise ValueError(f"Unknown fingerprint type: {fp_type}")
131
+
132
+ except Exception:
133
+ return None
134
+
135
+
136
+ def fingerprint_to_hex(fp) -> str:
137
+ """Convert fingerprint to hex string."""
138
+ if fp is None:
139
+ return ""
140
+
141
+ if hasattr(fp, "GetNonzeroElements"):
142
+ # Count fingerprint - convert to bit vector first
143
+ bit_string = fp.ToBitString()
144
+ return hex(int(bit_string, 2))[2:]
145
+
146
+ # Bit vector
147
+ return fp.ToBase64()
148
+
149
+
150
+ def fingerprint_to_bitstring(fp) -> str:
151
+ """Convert fingerprint to bit string."""
152
+ if fp is None:
153
+ return ""
154
+ return fp.ToBitString()
155
+
156
+
157
+ def fingerprint_to_numpy(fp):
158
+ """Convert fingerprint to numpy array."""
159
+ import numpy as np
160
+
161
+ if fp is None:
162
+ return None
163
+
164
+ arr = np.zeros((len(fp),), dtype=np.int8)
165
+ DataStructs.ConvertToNumpyArray(fp, arr)
166
+ return arr
167
+
168
+
169
+ class FingerprintCalculator:
170
+ """Calculator for molecular fingerprints."""
171
+
172
+ def __init__(
173
+ self,
174
+ fp_type: FingerprintType = FingerprintType.MORGAN,
175
+ n_bits: int = 2048,
176
+ radius: int = 2,
177
+ use_counts: bool = False,
178
+ output_format: str = "hex",
179
+ include_smiles: bool = True,
180
+ include_name: bool = True,
181
+ ):
182
+ """
183
+ Initialize fingerprint calculator.
184
+
185
+ Args:
186
+ fp_type: Type of fingerprint
187
+ n_bits: Number of bits
188
+ radius: Radius for Morgan fingerprints
189
+ use_counts: Use count fingerprints
190
+ output_format: Output format (hex, bitstring, bits)
191
+ include_smiles: Include SMILES in output
192
+ include_name: Include molecule name in output
193
+ """
194
+ self.fp_type = fp_type
195
+ self.n_bits = n_bits
196
+ self.radius = radius
197
+ self.use_counts = use_counts
198
+ self.output_format = output_format
199
+ self.include_smiles = include_smiles
200
+ self.include_name = include_name
201
+
202
+ # Override n_bits for MACCS
203
+ if fp_type == FingerprintType.MACCS:
204
+ self.n_bits = 167
205
+
206
+ def compute(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
207
+ """
208
+ Compute fingerprint for a molecule record.
209
+
210
+ Args:
211
+ record: MoleculeRecord to process
212
+
213
+ Returns:
214
+ Dictionary with fingerprint or None if molecule is invalid
215
+ """
216
+ if record.mol is None:
217
+ return None
218
+
219
+ fp = compute_fingerprint(
220
+ record.mol,
221
+ self.fp_type,
222
+ n_bits=self.n_bits,
223
+ radius=self.radius,
224
+ use_counts=self.use_counts,
225
+ )
226
+
227
+ if fp is None:
228
+ return None
229
+
230
+ result: dict[str, Any] = {}
231
+
232
+ if self.include_smiles:
233
+ result["smiles"] = record.smiles
234
+ if self.include_name and record.name:
235
+ result["name"] = record.name
236
+
237
+ # Format fingerprint
238
+ if self.output_format == "hex":
239
+ result["fingerprint"] = fingerprint_to_hex(fp)
240
+ elif self.output_format == "bitstring":
241
+ result["fingerprint"] = fingerprint_to_bitstring(fp)
242
+ elif self.output_format == "bits":
243
+ # Individual bit columns
244
+ bits = fingerprint_to_bitstring(fp)
245
+ for i, bit in enumerate(bits):
246
+ result[f"bit_{i}"] = int(bit)
247
+ else:
248
+ result["fingerprint"] = fingerprint_to_hex(fp)
249
+
250
+ return result
251
+
252
+ def get_column_names(self) -> list[str]:
253
+ """Get output column names in order."""
254
+ cols = []
255
+ if self.include_smiles:
256
+ cols.append("smiles")
257
+ if self.include_name:
258
+ cols.append("name")
259
+
260
+ if self.output_format == "bits":
261
+ cols.extend([f"bit_{i}" for i in range(self.n_bits)])
262
+ else:
263
+ cols.append("fingerprint")
264
+
265
+ return cols