rdkit-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. rdkit_cli/__init__.py +4 -0
  2. rdkit_cli/__main__.py +6 -0
  3. rdkit_cli/cli.py +162 -0
  4. rdkit_cli/commands/__init__.py +1 -0
  5. rdkit_cli/commands/conformers.py +220 -0
  6. rdkit_cli/commands/convert.py +162 -0
  7. rdkit_cli/commands/depict.py +311 -0
  8. rdkit_cli/commands/descriptors.py +251 -0
  9. rdkit_cli/commands/diversity.py +232 -0
  10. rdkit_cli/commands/enumerate.py +229 -0
  11. rdkit_cli/commands/filter.py +384 -0
  12. rdkit_cli/commands/fingerprints.py +179 -0
  13. rdkit_cli/commands/fragment.py +284 -0
  14. rdkit_cli/commands/mcs.py +162 -0
  15. rdkit_cli/commands/reactions.py +191 -0
  16. rdkit_cli/commands/scaffold.py +243 -0
  17. rdkit_cli/commands/similarity.py +359 -0
  18. rdkit_cli/commands/standardize.py +138 -0
  19. rdkit_cli/core/__init__.py +1 -0
  20. rdkit_cli/core/conformers.py +197 -0
  21. rdkit_cli/core/depict.py +241 -0
  22. rdkit_cli/core/descriptors.py +248 -0
  23. rdkit_cli/core/diversity.py +174 -0
  24. rdkit_cli/core/enumerate.py +190 -0
  25. rdkit_cli/core/filters.py +443 -0
  26. rdkit_cli/core/fingerprints.py +265 -0
  27. rdkit_cli/core/fragment.py +237 -0
  28. rdkit_cli/core/mcs.py +128 -0
  29. rdkit_cli/core/reactions.py +159 -0
  30. rdkit_cli/core/scaffold.py +174 -0
  31. rdkit_cli/core/similarity.py +206 -0
  32. rdkit_cli/core/standardizer.py +141 -0
  33. rdkit_cli/io/__init__.py +7 -0
  34. rdkit_cli/io/formats.py +109 -0
  35. rdkit_cli/io/readers.py +352 -0
  36. rdkit_cli/io/writers.py +275 -0
  37. rdkit_cli/parallel/__init__.py +5 -0
  38. rdkit_cli/parallel/batch.py +181 -0
  39. rdkit_cli/parallel/executor.py +180 -0
  40. rdkit_cli/progress/__init__.py +5 -0
  41. rdkit_cli/progress/ninja.py +195 -0
  42. rdkit_cli/utils/__init__.py +1 -0
  43. rdkit_cli-0.1.0.dist-info/METADATA +380 -0
  44. rdkit_cli-0.1.0.dist-info/RECORD +47 -0
  45. rdkit_cli-0.1.0.dist-info/WHEEL +4 -0
  46. rdkit_cli-0.1.0.dist-info/entry_points.txt +2 -0
  47. rdkit_cli-0.1.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,237 @@
1
+ """Molecular fragmentation engine."""
2
+
3
+ from typing import Optional, Any
4
+ from collections import Counter
5
+
6
+ from rdkit import Chem
7
+ from rdkit.Chem import BRICS, Recap, AllChem, rdMolDescriptors
8
+
9
+ from rdkit_cli.io.readers import MoleculeRecord
10
+
11
+
12
+ class BRICSFragmenter:
13
+ """Fragment molecules using BRICS algorithm."""
14
+
15
+ def __init__(
16
+ self,
17
+ min_fragment_size: int = 1,
18
+ include_smiles: bool = True,
19
+ include_name: bool = True,
20
+ ):
21
+ """
22
+ Initialize BRICS fragmenter.
23
+
24
+ Args:
25
+ min_fragment_size: Minimum fragment heavy atom count
26
+ include_smiles: Include original SMILES in output
27
+ include_name: Include molecule name in output
28
+ """
29
+ self.min_fragment_size = min_fragment_size
30
+ self.include_smiles = include_smiles
31
+ self.include_name = include_name
32
+
33
+ def fragment(self, record: MoleculeRecord) -> list[dict[str, Any]]:
34
+ """
35
+ Fragment a molecule using BRICS.
36
+
37
+ Args:
38
+ record: MoleculeRecord to process
39
+
40
+ Returns:
41
+ List of dictionaries with fragment SMILES
42
+ """
43
+ if record.mol is None:
44
+ return []
45
+
46
+ try:
47
+ fragments = BRICS.BRICSDecompose(record.mol)
48
+
49
+ results = []
50
+ for i, frag_smi in enumerate(fragments):
51
+ # Parse fragment to check size
52
+ frag_mol = Chem.MolFromSmiles(frag_smi)
53
+ if frag_mol is None:
54
+ continue
55
+
56
+ heavy_atoms = frag_mol.GetNumHeavyAtoms()
57
+ if heavy_atoms < self.min_fragment_size:
58
+ continue
59
+
60
+ result: dict[str, Any] = {"fragment_smiles": frag_smi}
61
+
62
+ if self.include_smiles:
63
+ result["smiles"] = record.smiles
64
+ if self.include_name and record.name:
65
+ result["name"] = record.name
66
+
67
+ result["fragment_idx"] = i
68
+ result["heavy_atom_count"] = heavy_atoms
69
+
70
+ results.append(result)
71
+
72
+ return results
73
+
74
+ except Exception:
75
+ return []
76
+
77
+
78
+ class RECAPFragmenter:
79
+ """Fragment molecules using RECAP algorithm."""
80
+
81
+ def __init__(
82
+ self,
83
+ min_fragment_size: int = 1,
84
+ include_smiles: bool = True,
85
+ include_name: bool = True,
86
+ ):
87
+ """
88
+ Initialize RECAP fragmenter.
89
+
90
+ Args:
91
+ min_fragment_size: Minimum fragment heavy atom count
92
+ include_smiles: Include original SMILES in output
93
+ include_name: Include molecule name in output
94
+ """
95
+ self.min_fragment_size = min_fragment_size
96
+ self.include_smiles = include_smiles
97
+ self.include_name = include_name
98
+
99
+ def fragment(self, record: MoleculeRecord) -> list[dict[str, Any]]:
100
+ """
101
+ Fragment a molecule using RECAP.
102
+
103
+ Args:
104
+ record: MoleculeRecord to process
105
+
106
+ Returns:
107
+ List of dictionaries with fragment SMILES
108
+ """
109
+ if record.mol is None:
110
+ return []
111
+
112
+ try:
113
+ recap_tree = Recap.RecapDecompose(record.mol)
114
+ leaves = recap_tree.GetLeaves()
115
+
116
+ results = []
117
+ for i, (frag_smi, node) in enumerate(leaves.items()):
118
+ # Parse fragment to check size
119
+ frag_mol = node.mol
120
+ if frag_mol is None:
121
+ continue
122
+
123
+ heavy_atoms = frag_mol.GetNumHeavyAtoms()
124
+ if heavy_atoms < self.min_fragment_size:
125
+ continue
126
+
127
+ result: dict[str, Any] = {"fragment_smiles": frag_smi}
128
+
129
+ if self.include_smiles:
130
+ result["smiles"] = record.smiles
131
+ if self.include_name and record.name:
132
+ result["name"] = record.name
133
+
134
+ result["fragment_idx"] = i
135
+ result["heavy_atom_count"] = heavy_atoms
136
+
137
+ results.append(result)
138
+
139
+ return results
140
+
141
+ except Exception:
142
+ return []
143
+
144
+
145
+ class FunctionalGroupExtractor:
146
+ """Extract functional groups from molecules."""
147
+
148
+ def __init__(
149
+ self,
150
+ include_smiles: bool = True,
151
+ include_name: bool = True,
152
+ ):
153
+ """
154
+ Initialize functional group extractor.
155
+
156
+ Args:
157
+ include_smiles: Include original SMILES in output
158
+ include_name: Include molecule name in output
159
+ """
160
+ self.include_smiles = include_smiles
161
+ self.include_name = include_name
162
+ # Use RDKit's functional group hierarchy
163
+ self._fgs = rdMolDescriptors.GetMorganFingerprint
164
+
165
+ def extract(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
166
+ """
167
+ Extract functional groups from a molecule.
168
+
169
+ Args:
170
+ record: MoleculeRecord to process
171
+
172
+ Returns:
173
+ Dictionary with functional group info or None
174
+ """
175
+ if record.mol is None:
176
+ return None
177
+
178
+ try:
179
+ # Get functional groups using SMARTS patterns
180
+ fg_patterns = {
181
+ "alcohol": "[OX2H]",
182
+ "aldehyde": "[CX3H1](=O)[#6]",
183
+ "ketone": "[#6][CX3](=O)[#6]",
184
+ "carboxylic_acid": "[CX3](=O)[OX2H1]",
185
+ "ester": "[#6][CX3](=O)[OX2][#6]",
186
+ "ether": "[OD2]([#6])[#6]",
187
+ "amine_primary": "[NX3H2][#6]",
188
+ "amine_secondary": "[NX3H1]([#6])[#6]",
189
+ "amine_tertiary": "[NX3]([#6])([#6])[#6]",
190
+ "amide": "[NX3][CX3](=[OX1])[#6]",
191
+ "nitro": "[$([NX3](=O)=O),$([NX3+](=O)[O-])]",
192
+ "nitrile": "[NX1]#[CX2]",
193
+ "halogen": "[F,Cl,Br,I]",
194
+ "thiol": "[SX2H]",
195
+ "sulfide": "[#16X2]([#6])[#6]",
196
+ "aromatic_ring": "a1aaaaa1",
197
+ }
198
+
199
+ result: dict[str, Any] = {}
200
+
201
+ if self.include_smiles:
202
+ result["smiles"] = record.smiles
203
+ if self.include_name and record.name:
204
+ result["name"] = record.name
205
+
206
+ for name, smarts in fg_patterns.items():
207
+ pattern = Chem.MolFromSmarts(smarts)
208
+ if pattern:
209
+ matches = record.mol.GetSubstructMatches(pattern)
210
+ result[f"n_{name}"] = len(matches)
211
+
212
+ return result
213
+
214
+ except Exception:
215
+ return None
216
+
217
+
218
+ def analyze_fragments(fragments: list[str], top_n: int = 20) -> list[tuple[str, int, float]]:
219
+ """
220
+ Analyze fragment frequency distribution.
221
+
222
+ Args:
223
+ fragments: List of fragment SMILES
224
+ top_n: Number of top fragments to return
225
+
226
+ Returns:
227
+ List of (fragment, count, percentage) tuples
228
+ """
229
+ total = len(fragments)
230
+ counter = Counter(fragments)
231
+
232
+ results = []
233
+ for frag, count in counter.most_common(top_n):
234
+ percentage = (count / total) * 100 if total > 0 else 0
235
+ results.append((frag, count, round(percentage, 2)))
236
+
237
+ return results
rdkit_cli/core/mcs.py ADDED
@@ -0,0 +1,128 @@
1
+ """Maximum Common Substructure engine."""
2
+
3
+ from typing import Optional, Any
4
+
5
+ from rdkit import Chem
6
+ from rdkit.Chem import rdFMCS
7
+
8
+
9
+ def find_mcs(
10
+ mols: list[Chem.Mol],
11
+ timeout: int = 60,
12
+ threshold: float = 1.0,
13
+ maximize: str = "atoms",
14
+ ring_matches_ring_only: bool = True,
15
+ complete_rings_only: bool = True,
16
+ match_valences: bool = False,
17
+ match_chiral_tag: bool = False,
18
+ atom_compare: str = "elements",
19
+ bond_compare: str = "order",
20
+ ) -> Optional[dict[str, Any]]:
21
+ """
22
+ Find Maximum Common Substructure of molecules.
23
+
24
+ Args:
25
+ mols: List of molecules
26
+ timeout: Maximum time in seconds
27
+ threshold: Fraction of molecules that must contain MCS
28
+ maximize: What to maximize ('atoms' or 'bonds')
29
+ ring_matches_ring_only: Ring atoms only match ring atoms
30
+ complete_rings_only: Only return complete rings
31
+ match_valences: Match atom valences
32
+ match_chiral_tag: Match chirality
33
+ atom_compare: Atom comparison ('any', 'elements', 'isotopes')
34
+ bond_compare: Bond comparison ('any', 'order', 'orderexact')
35
+
36
+ Returns:
37
+ Dictionary with MCS results or None
38
+ """
39
+ # Filter None molecules
40
+ valid_mols = [mol for mol in mols if mol is not None]
41
+
42
+ if len(valid_mols) < 2:
43
+ return None
44
+
45
+ # Set up atom comparison
46
+ atom_compare_map = {
47
+ "any": rdFMCS.AtomCompare.CompareAny,
48
+ "elements": rdFMCS.AtomCompare.CompareElements,
49
+ "isotopes": rdFMCS.AtomCompare.CompareIsotopes,
50
+ }
51
+
52
+ # Set up bond comparison
53
+ bond_compare_map = {
54
+ "any": rdFMCS.BondCompare.CompareAny,
55
+ "order": rdFMCS.BondCompare.CompareOrder,
56
+ "orderexact": rdFMCS.BondCompare.CompareOrderExact,
57
+ }
58
+
59
+ try:
60
+ result = rdFMCS.FindMCS(
61
+ valid_mols,
62
+ timeout=timeout,
63
+ threshold=threshold,
64
+ maximizeBonds=(maximize == "bonds"),
65
+ ringMatchesRingOnly=ring_matches_ring_only,
66
+ completeRingsOnly=complete_rings_only,
67
+ matchValences=match_valences,
68
+ matchChiralTag=match_chiral_tag,
69
+ atomCompare=atom_compare_map.get(atom_compare, rdFMCS.AtomCompare.CompareElements),
70
+ bondCompare=bond_compare_map.get(bond_compare, rdFMCS.BondCompare.CompareOrder),
71
+ )
72
+
73
+ if result.canceled:
74
+ return {"canceled": True, "timeout": timeout}
75
+
76
+ if result.numAtoms == 0:
77
+ return {"smarts": "", "num_atoms": 0, "num_bonds": 0}
78
+
79
+ return {
80
+ "smarts": result.smartsString,
81
+ "num_atoms": result.numAtoms,
82
+ "num_bonds": result.numBonds,
83
+ "canceled": result.canceled,
84
+ }
85
+
86
+ except Exception as e:
87
+ return {"error": str(e)}
88
+
89
+
90
+ class MCSAligner:
91
+ """Align molecules based on MCS."""
92
+
93
+ def __init__(
94
+ self,
95
+ reference_smiles: str,
96
+ timeout: int = 30,
97
+ ):
98
+ """
99
+ Initialize MCS aligner.
100
+
101
+ Args:
102
+ reference_smiles: Reference molecule SMILES
103
+ timeout: MCS timeout in seconds
104
+ """
105
+ self.reference_mol = Chem.MolFromSmiles(reference_smiles)
106
+ if self.reference_mol is None:
107
+ raise ValueError(f"Invalid reference SMILES: {reference_smiles}")
108
+ self.timeout = timeout
109
+
110
+ def find_common(self, mol: Chem.Mol) -> Optional[dict[str, Any]]:
111
+ """
112
+ Find MCS between reference and query molecule.
113
+
114
+ Args:
115
+ mol: Query molecule
116
+
117
+ Returns:
118
+ Dictionary with MCS info or None
119
+ """
120
+ if mol is None:
121
+ return None
122
+
123
+ result = find_mcs(
124
+ [self.reference_mol, mol],
125
+ timeout=self.timeout,
126
+ )
127
+
128
+ return result
@@ -0,0 +1,159 @@
1
+ """Reaction transformation engine."""
2
+
3
+ from typing import Optional, Any
4
+
5
+ from rdkit import Chem
6
+ from rdkit.Chem import AllChem, rdChemReactions
7
+
8
+ from rdkit_cli.io.readers import MoleculeRecord
9
+
10
+
11
+ class ReactionTransformer:
12
+ """Apply SMIRKS transformations to molecules."""
13
+
14
+ def __init__(
15
+ self,
16
+ smirks: str,
17
+ max_products: int = 100,
18
+ ):
19
+ """
20
+ Initialize reaction transformer.
21
+
22
+ Args:
23
+ smirks: SMIRKS reaction pattern
24
+ max_products: Maximum number of products to generate
25
+ """
26
+ self.reaction = AllChem.ReactionFromSmarts(smirks)
27
+ if self.reaction is None:
28
+ raise ValueError(f"Invalid SMIRKS pattern: {smirks}")
29
+
30
+ self.max_products = max_products
31
+
32
+ def transform(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
33
+ """
34
+ Apply transformation to a molecule.
35
+
36
+ Args:
37
+ record: MoleculeRecord to transform
38
+
39
+ Returns:
40
+ Dictionary with products or None if no reaction
41
+ """
42
+ if record.mol is None:
43
+ return None
44
+
45
+ try:
46
+ products = self.reaction.RunReactants((record.mol,))
47
+
48
+ if not products:
49
+ return None
50
+
51
+ # Collect unique products
52
+ unique_smiles = set()
53
+ for product_set in products[:self.max_products]:
54
+ for prod in product_set:
55
+ try:
56
+ Chem.SanitizeMol(prod)
57
+ smi = Chem.MolToSmiles(prod)
58
+ unique_smiles.add(smi)
59
+ except Exception:
60
+ continue
61
+
62
+ if not unique_smiles:
63
+ return None
64
+
65
+ # Return first product (or could return all)
66
+ product_smiles = list(unique_smiles)[0]
67
+
68
+ result: dict[str, Any] = {
69
+ "smiles": product_smiles,
70
+ "reactant": record.smiles,
71
+ "num_products": len(unique_smiles),
72
+ }
73
+
74
+ if record.name:
75
+ result["name"] = record.name
76
+
77
+ return result
78
+
79
+ except Exception:
80
+ return None
81
+
82
+
83
+ class ReactionEnumerator:
84
+ """Enumerate products from reaction templates."""
85
+
86
+ def __init__(
87
+ self,
88
+ reaction_smarts: str,
89
+ max_products: int = 1000,
90
+ ):
91
+ """
92
+ Initialize reaction enumerator.
93
+
94
+ Args:
95
+ reaction_smarts: Reaction SMARTS
96
+ max_products: Maximum products to generate
97
+ """
98
+ self.reaction = AllChem.ReactionFromSmarts(reaction_smarts)
99
+ if self.reaction is None:
100
+ raise ValueError(f"Invalid reaction SMARTS: {reaction_smarts}")
101
+
102
+ self.max_products = max_products
103
+ self.num_reactants = self.reaction.GetNumReactantTemplates()
104
+
105
+ def enumerate(
106
+ self,
107
+ reactant_lists: list[list[Chem.Mol]],
108
+ ) -> list[dict[str, Any]]:
109
+ """
110
+ Enumerate reaction products from lists of reactants.
111
+
112
+ Args:
113
+ reactant_lists: List of reactant lists (one per reactant template)
114
+
115
+ Returns:
116
+ List of product dictionaries
117
+ """
118
+ if len(reactant_lists) != self.num_reactants:
119
+ raise ValueError(
120
+ f"Expected {self.num_reactants} reactant lists, got {len(reactant_lists)}"
121
+ )
122
+
123
+ results = []
124
+ unique_products = set()
125
+
126
+ # Generate all combinations
127
+ from itertools import product as iterproduct
128
+
129
+ for reactants in iterproduct(*reactant_lists):
130
+ if len(results) >= self.max_products:
131
+ break
132
+
133
+ try:
134
+ products = self.reaction.RunReactants(reactants)
135
+
136
+ for product_set in products:
137
+ for prod in product_set:
138
+ try:
139
+ Chem.SanitizeMol(prod)
140
+ smi = Chem.MolToSmiles(prod)
141
+
142
+ if smi not in unique_products:
143
+ unique_products.add(smi)
144
+ results.append({
145
+ "smiles": smi,
146
+ "reactants": ".".join(
147
+ Chem.MolToSmiles(r) for r in reactants
148
+ ),
149
+ })
150
+
151
+ if len(results) >= self.max_products:
152
+ break
153
+ except Exception:
154
+ continue
155
+
156
+ except Exception:
157
+ continue
158
+
159
+ return results
@@ -0,0 +1,174 @@
1
+ """Scaffold analysis engine."""
2
+
3
+ from typing import Optional, Any
4
+ from collections import Counter
5
+
6
+ from rdkit import Chem
7
+ from rdkit.Chem.Scaffolds import MurckoScaffold
8
+
9
+ from rdkit_cli.io.readers import MoleculeRecord
10
+
11
+
12
+ def get_murcko_scaffold(mol: Chem.Mol, generic: bool = False) -> Optional[str]:
13
+ """
14
+ Get Murcko scaffold for a molecule.
15
+
16
+ Args:
17
+ mol: RDKit molecule
18
+ generic: If True, return generic scaffold (element-agnostic)
19
+
20
+ Returns:
21
+ Scaffold SMILES or None if failed
22
+ """
23
+ try:
24
+ core = MurckoScaffold.GetScaffoldForMol(mol)
25
+
26
+ if generic:
27
+ core = MurckoScaffold.MakeScaffoldGeneric(core)
28
+
29
+ return Chem.MolToSmiles(core)
30
+ except Exception:
31
+ return None
32
+
33
+
34
+ def get_side_chains(mol: Chem.Mol) -> list[str]:
35
+ """
36
+ Get side chains (R-groups) for a molecule.
37
+
38
+ Args:
39
+ mol: RDKit molecule
40
+
41
+ Returns:
42
+ List of side chain SMILES
43
+ """
44
+ try:
45
+ side_chains = MurckoScaffold.MurckoDecompose(mol)
46
+ return [Chem.MolToSmiles(sc) for sc in side_chains if sc is not None]
47
+ except Exception:
48
+ return []
49
+
50
+
51
+ class ScaffoldExtractor:
52
+ """Extract Murcko scaffolds from molecules."""
53
+
54
+ def __init__(
55
+ self,
56
+ generic: bool = False,
57
+ include_smiles: bool = True,
58
+ include_name: bool = True,
59
+ ):
60
+ """
61
+ Initialize scaffold extractor.
62
+
63
+ Args:
64
+ generic: Generate generic (element-agnostic) scaffolds
65
+ include_smiles: Include original SMILES in output
66
+ include_name: Include molecule name in output
67
+ """
68
+ self.generic = generic
69
+ self.include_smiles = include_smiles
70
+ self.include_name = include_name
71
+
72
+ def extract(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
73
+ """
74
+ Extract scaffold from a molecule.
75
+
76
+ Args:
77
+ record: MoleculeRecord to process
78
+
79
+ Returns:
80
+ Dictionary with scaffold info or None if failed
81
+ """
82
+ if record.mol is None:
83
+ return None
84
+
85
+ scaffold = get_murcko_scaffold(record.mol, generic=self.generic)
86
+
87
+ if scaffold is None:
88
+ return None
89
+
90
+ result: dict[str, Any] = {}
91
+
92
+ if self.include_smiles:
93
+ result["smiles"] = record.smiles
94
+ if self.include_name and record.name:
95
+ result["name"] = record.name
96
+
97
+ result["scaffold"] = scaffold
98
+
99
+ return result
100
+
101
+
102
+ class ScaffoldDecomposer:
103
+ """Decompose molecules into scaffold and side chains."""
104
+
105
+ def __init__(
106
+ self,
107
+ include_smiles: bool = True,
108
+ include_name: bool = True,
109
+ ):
110
+ """
111
+ Initialize scaffold decomposer.
112
+
113
+ Args:
114
+ include_smiles: Include original SMILES in output
115
+ include_name: Include molecule name in output
116
+ """
117
+ self.include_smiles = include_smiles
118
+ self.include_name = include_name
119
+
120
+ def decompose(self, record: MoleculeRecord) -> Optional[dict[str, Any]]:
121
+ """
122
+ Decompose a molecule into scaffold and side chains.
123
+
124
+ Args:
125
+ record: MoleculeRecord to process
126
+
127
+ Returns:
128
+ Dictionary with decomposition info or None if failed
129
+ """
130
+ if record.mol is None:
131
+ return None
132
+
133
+ scaffold = get_murcko_scaffold(record.mol)
134
+ if scaffold is None:
135
+ return None
136
+
137
+ generic_scaffold = get_murcko_scaffold(record.mol, generic=True)
138
+
139
+ result: dict[str, Any] = {}
140
+
141
+ if self.include_smiles:
142
+ result["smiles"] = record.smiles
143
+ if self.include_name and record.name:
144
+ result["name"] = record.name
145
+
146
+ result["scaffold"] = scaffold
147
+ result["generic_scaffold"] = generic_scaffold
148
+
149
+ return result
150
+
151
+
152
+ def analyze_scaffolds(
153
+ scaffolds: list[str],
154
+ top_n: int = 20,
155
+ ) -> list[tuple[str, int, float]]:
156
+ """
157
+ Analyze scaffold frequency distribution.
158
+
159
+ Args:
160
+ scaffolds: List of scaffold SMILES
161
+ top_n: Number of top scaffolds to return
162
+
163
+ Returns:
164
+ List of (scaffold, count, percentage) tuples
165
+ """
166
+ total = len(scaffolds)
167
+ counter = Counter(scaffolds)
168
+
169
+ results = []
170
+ for scaffold, count in counter.most_common(top_n):
171
+ percentage = (count / total) * 100 if total > 0 else 0
172
+ results.append((scaffold, count, round(percentage, 2)))
173
+
174
+ return results