smiles-js 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/smiles.peggy +215 -0
- package/package.json +1 -1
- package/src/decompiler.js +209 -49
- package/src/decompiler.test.js +232 -60
- package/src/fragment.test.js +7 -2
- package/src/method-attachers.js +8 -8
- package/test-integration/__snapshots__/acetaminophen.test.js.snap +20 -0
- package/test-integration/__snapshots__/adjuvant-analgesics.test.js.snap +62 -0
- package/test-integration/__snapshots__/cholesterol-drugs.test.js.snap +261 -0
- package/test-integration/__snapshots__/dexamethasone.test.js.snap +31 -0
- package/test-integration/__snapshots__/endocannabinoids.test.js.snap +77 -0
- package/test-integration/__snapshots__/endogenous-opioids.test.js.snap +1116 -0
- package/test-integration/__snapshots__/hypertension-medication.test.js.snap +69 -0
- package/test-integration/__snapshots__/local-anesthetics.test.js.snap +97 -0
- package/test-integration/__snapshots__/nsaids-otc.test.js.snap +60 -0
- package/test-integration/__snapshots__/nsaids-prescription.test.js.snap +113 -0
- package/test-integration/__snapshots__/opioids.test.js.snap +109 -0
- package/test-integration/__snapshots__/steroids.test.js.snap +379 -0
- package/test-integration/acetaminophen.test.js +15 -3
- package/test-integration/adjuvant-analgesics.test.js +43 -7
- package/test-integration/cholesterol-drugs.test.js +88 -22
- package/test-integration/dexamethasone.test.js +8 -2
- package/test-integration/endocannabinoids.test.js +48 -12
- package/test-integration/endogenous-opioids.smiles.js +32 -0
- package/test-integration/endogenous-opioids.test.js +192 -0
- package/test-integration/hypertension-medication.test.js +32 -8
- package/test-integration/local-anesthetics.smiles.js +33 -0
- package/test-integration/local-anesthetics.test.js +64 -16
- package/test-integration/nsaids-otc.test.js +40 -10
- package/test-integration/nsaids-prescription.test.js +72 -18
- package/test-integration/opioids.test.js +56 -14
- package/test-integration/steroids.test.js +112 -28
- package/test-integration/utils.js +4 -2
- package/todo +2 -1
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
// SMILES Grammar for Peggy (PEG parser generator for JavaScript)
|
|
2
|
+
//
|
|
3
|
+
// This grammar mirrors the tokenizer + buildAtomList two-pass architecture
|
|
4
|
+
// in src/tokenizer.js and src/parser/smiles-parser-core.js.
|
|
5
|
+
//
|
|
6
|
+
// What this grammar produces:
|
|
7
|
+
// A concrete parse tree (token stream with structure). The codebase then
|
|
8
|
+
// does a second semantic pass (buildAST) to detect rings, fused rings,
|
|
9
|
+
// and attachments — that pass is stateful and cannot be expressed in PEG.
|
|
10
|
+
//
|
|
11
|
+
// Usage:
|
|
12
|
+
// npx peggy smiles.peggy # generates smiles.js
|
|
13
|
+
// npx peggy --format commonjs -o smiles.cjs smiles.peggy # CommonJS
|
|
14
|
+
//
|
|
15
|
+
// Differences from OpenSMILES spec (matches codebase behavior):
|
|
16
|
+
// - Simple atoms accept ANY [A-Za-z] letter, not just the organic subset.
|
|
17
|
+
// The tokenizer (isAtomStart + parseSimpleAtom) only special-cases Br/Cl
|
|
18
|
+
// as two-letter atoms; everything else is a single letter.
|
|
19
|
+
// - Bracketed atoms capture raw content only (the codebase has a TODO for
|
|
20
|
+
// full isotope/chirality/hcount/charge/class parsing). The grammar DOES
|
|
21
|
+
// parse the sub-fields since it's trivial in PEG and useful for consumers.
|
|
22
|
+
// - Whitespace is silently skipped (tokenizer.js line 104).
|
|
23
|
+
// - Ring markers can be preceded by a bond (e.g. C=1CC=1 is legal SMILES).
|
|
24
|
+
|
|
25
|
+
// ============================================================
|
|
26
|
+
// Top-level: dot-separated components
|
|
27
|
+
// Mirrors: tokenizer DOT token → buildAtomList skips DOT
|
|
28
|
+
// ============================================================
|
|
29
|
+
smiles
|
|
30
|
+
= _ head:chain tail:(_ "." _ chain)* _ {
|
|
31
|
+
const components = [head, ...tail.map(t => t[3])];
|
|
32
|
+
if (components.length === 1) return components[0];
|
|
33
|
+
return { type: "molecule", components };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ============================================================
|
|
37
|
+
// Chain: sequence of atom_units with optional bonds between them
|
|
38
|
+
// Mirrors: buildAtomList's linear scan — each ATOM token may be
|
|
39
|
+
// preceded by a BOND token, and followed by RING_MARKER
|
|
40
|
+
// and BRANCH_OPEN/CLOSE tokens.
|
|
41
|
+
// ============================================================
|
|
42
|
+
chain
|
|
43
|
+
= first:atom_unit rest:(_ bond? _ atom_unit)* {
|
|
44
|
+
const atoms = [first];
|
|
45
|
+
const bonds = [null];
|
|
46
|
+
for (const r of rest) {
|
|
47
|
+
bonds.push(r[1]);
|
|
48
|
+
atoms.push(r[3]);
|
|
49
|
+
}
|
|
50
|
+
return { type: "chain", atoms, bonds };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ============================================================
|
|
54
|
+
// Atom unit: atom + ring markers + branches
|
|
55
|
+
//
|
|
56
|
+
// Mirrors the token consumption order in buildAtomList:
|
|
57
|
+
// ATOM → RING_MARKER* → (BRANCH_OPEN chain BRANCH_CLOSE)*
|
|
58
|
+
//
|
|
59
|
+
// Ring markers can carry their own bond (e.g. C=1...=1)
|
|
60
|
+
// This is valid SMILES: the bond on a ring closure describes
|
|
61
|
+
// the bond between the two ring-closure atoms.
|
|
62
|
+
// ============================================================
|
|
63
|
+
atom_unit
|
|
64
|
+
= atom:atom ring_bonds:(bond? ring_marker)* branches:branch* {
|
|
65
|
+
const rings = ring_bonds.map(rb => ({
|
|
66
|
+
bond: rb[0],
|
|
67
|
+
number: rb[1]
|
|
68
|
+
}));
|
|
69
|
+
return { atom, rings, branches };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// ============================================================
|
|
73
|
+
// Atoms
|
|
74
|
+
//
|
|
75
|
+
// Two forms, matching tokenizer.js:
|
|
76
|
+
// 1. Bracketed: '[' ... ']' (parseBracketedAtom, line 67)
|
|
77
|
+
// 2. Simple: [A-Za-z*] (parseSimpleAtom, line 39)
|
|
78
|
+
// ============================================================
|
|
79
|
+
atom
|
|
80
|
+
= bracketed_atom
|
|
81
|
+
/ simple_atom
|
|
82
|
+
|
|
83
|
+
// ----------------------------------------------------------
|
|
84
|
+
// Bracketed atom: [isotope? element chirality? hcount? charge? class?]
|
|
85
|
+
//
|
|
86
|
+
// The codebase currently stores only { raw } (parseBracketedAtom, line 79).
|
|
87
|
+
// We parse the sub-fields here since PEG makes it easy and consumers
|
|
88
|
+
// can use them. The `raw` field preserves the full bracket text for
|
|
89
|
+
// round-trip fidelity (matching token.value in the codebase).
|
|
90
|
+
// ----------------------------------------------------------
|
|
91
|
+
bracketed_atom
|
|
92
|
+
= "[" content:bracketed_content "]" {
|
|
93
|
+
return { type: "bracket_atom", ...content, raw: text() };
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
bracketed_content
|
|
97
|
+
= isotope:isotope?
|
|
98
|
+
symbol:bracket_element
|
|
99
|
+
chirality:chirality?
|
|
100
|
+
hcount:hcount?
|
|
101
|
+
charge:charge?
|
|
102
|
+
atomClass:atom_class? {
|
|
103
|
+
return { isotope, symbol, chirality, hcount, charge, atomClass };
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Isotope: digits before the element symbol
|
|
107
|
+
// e.g. [13C], [2H]
|
|
108
|
+
isotope
|
|
109
|
+
= digits:$[0-9]+ &[A-Za-z*] { return parseInt(digits, 10); }
|
|
110
|
+
|
|
111
|
+
// Element inside brackets — can be aromatic two-letter (se, as),
|
|
112
|
+
// aromatic single-letter, or any standard element symbol, or wildcard
|
|
113
|
+
bracket_element
|
|
114
|
+
= aromatic_element
|
|
115
|
+
/ element_symbol
|
|
116
|
+
/ "*" { return "*"; }
|
|
117
|
+
|
|
118
|
+
// Standard element symbol: uppercase letter + optional lowercase
|
|
119
|
+
// e.g. C, Na, Fe, Zr
|
|
120
|
+
element_symbol
|
|
121
|
+
= a:$[A-Z] b:$[a-z]? { return b ? a + b : a; }
|
|
122
|
+
|
|
123
|
+
// Aromatic elements inside brackets
|
|
124
|
+
// OpenSMILES: b, c, n, o, p, s, se, as
|
|
125
|
+
aromatic_element
|
|
126
|
+
= "se" { return "se"; }
|
|
127
|
+
/ "as" { return "as"; }
|
|
128
|
+
/ c:[bcnops] { return c; }
|
|
129
|
+
|
|
130
|
+
// Chirality: @ or @@ (the codebase doesn't parse extended forms like @TH1)
|
|
131
|
+
chirality
|
|
132
|
+
= "@@" { return "@@"; }
|
|
133
|
+
/ "@" { return "@"; }
|
|
134
|
+
|
|
135
|
+
// Hydrogen count: H or H<digit>
|
|
136
|
+
// e.g. [NH3+] has hcount=3, [C@H] has hcount=1
|
|
137
|
+
hcount
|
|
138
|
+
= "H" n:$[0-9]? { return n ? parseInt(n, 10) : 1; }
|
|
139
|
+
|
|
140
|
+
// Charge: +, -, +2, -1, ++, --
|
|
141
|
+
// Ordered to try multi-char patterns before single-char
|
|
142
|
+
charge
|
|
143
|
+
= "++" { return 2; }
|
|
144
|
+
/ "--" { return -2; }
|
|
145
|
+
/ "+" n:$[0-9]+ { return parseInt(n, 10); }
|
|
146
|
+
/ "-" n:$[0-9]+ { return -parseInt(n, 10); }
|
|
147
|
+
/ "+" { return 1; }
|
|
148
|
+
/ "-" { return -1; }
|
|
149
|
+
|
|
150
|
+
// Atom class: :<digits>
|
|
151
|
+
// e.g. [C:1]
|
|
152
|
+
atom_class
|
|
153
|
+
= ":" n:$[0-9]+ { return parseInt(n, 10); }
|
|
154
|
+
|
|
155
|
+
// ----------------------------------------------------------
|
|
156
|
+
// Simple (non-bracketed) atom
|
|
157
|
+
//
|
|
158
|
+
// Mirrors tokenizer.js parseSimpleAtom (line 39) + isAtomStart (line 31):
|
|
159
|
+
// isAtomStart accepts /[A-Za-z*]/
|
|
160
|
+
// parseSimpleAtom checks for two-letter Cl/Br first, then single char
|
|
161
|
+
//
|
|
162
|
+
// The codebase does NOT restrict to the OpenSMILES organic subset —
|
|
163
|
+
// it accepts any letter as a valid atom. This is intentional for
|
|
164
|
+
// permissive parsing. The semantic layer validates later.
|
|
165
|
+
// ----------------------------------------------------------
|
|
166
|
+
simple_atom
|
|
167
|
+
= symbol:simple_atom_symbol { return { type: "simple_atom", symbol }; }
|
|
168
|
+
|
|
169
|
+
simple_atom_symbol
|
|
170
|
+
= "Br" { return "Br"; }
|
|
171
|
+
/ "Cl" { return "Cl"; }
|
|
172
|
+
/ c:[A-Za-z] { return c; }
|
|
173
|
+
/ "*" { return "*"; }
|
|
174
|
+
|
|
175
|
+
// ============================================================
|
|
176
|
+
// Bonds
|
|
177
|
+
//
|
|
178
|
+
// Mirrors: BOND_SYMBOLS in tokenizer.js (line 19)
|
|
179
|
+
// new Set(['-', '=', '#', ':', '/', '\\'])
|
|
180
|
+
// ============================================================
|
|
181
|
+
bond
|
|
182
|
+
= b:[-=#:/\\] { return b; }
|
|
183
|
+
|
|
184
|
+
// ============================================================
|
|
185
|
+
// Ring markers
|
|
186
|
+
//
|
|
187
|
+
// Mirrors: tokenizer.js lines 138-160
|
|
188
|
+
// '%' + two digits → ring number 10-99
|
|
189
|
+
// single digit → ring number 0-9
|
|
190
|
+
// ============================================================
|
|
191
|
+
ring_marker
|
|
192
|
+
= "%" d1:[0-9] d2:[0-9] { return parseInt(d1 + d2, 10); }
|
|
193
|
+
/ d:[0-9] { return parseInt(d, 10); }
|
|
194
|
+
|
|
195
|
+
// ============================================================
|
|
196
|
+
// Branches (recursive)
|
|
197
|
+
//
|
|
198
|
+
// Mirrors: BRANCH_OPEN → (bond? chain) → BRANCH_CLOSE
|
|
199
|
+
// in tokenizer.js lines 106-120 and buildAtomList lines 139-153
|
|
200
|
+
//
|
|
201
|
+
// A branch can start with a bond that applies to the first atom
|
|
202
|
+
// of the branch chain (e.g. C(=O) means double bond to O).
|
|
203
|
+
// ============================================================
|
|
204
|
+
branch
|
|
205
|
+
= "(" _ b:bond? _ c:chain _ ")" {
|
|
206
|
+
return { type: "branch", bond: b, chain: c };
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// ============================================================
|
|
210
|
+
// Whitespace (optional, skipped)
|
|
211
|
+
//
|
|
212
|
+
// Mirrors: tokenizer.js line 104 — /\s/ is skipped
|
|
213
|
+
// ============================================================
|
|
214
|
+
_ "whitespace"
|
|
215
|
+
= [ \t\n\r]*
|
package/package.json
CHANGED
package/src/decompiler.js
CHANGED
|
@@ -9,11 +9,13 @@ import {
|
|
|
9
9
|
isRingNode,
|
|
10
10
|
isLinearNode,
|
|
11
11
|
} from './ast.js';
|
|
12
|
+
import { buildSMILES } from './codegen/index.js';
|
|
13
|
+
import { createRingNode } from './node-creators.js';
|
|
12
14
|
|
|
13
15
|
// Helper to call decompileNode (satisfies no-loop-func rule)
|
|
14
|
-
function decompileChildNode(node, indent, nextVar) {
|
|
16
|
+
function decompileChildNode(node, indent, nextVar, verbose) {
|
|
15
17
|
// eslint-disable-next-line no-use-before-define
|
|
16
|
-
return decompileNode(node, indent, nextVar);
|
|
18
|
+
return decompileNode(node, indent, nextVar, verbose);
|
|
17
19
|
}
|
|
18
20
|
|
|
19
21
|
/**
|
|
@@ -110,7 +112,7 @@ function generateSubstitutionCode(ring, indent, nextVar, initialVar) {
|
|
|
110
112
|
* Generate code for ring attachments
|
|
111
113
|
* @returns {{ lines: string[], currentVar: string }}
|
|
112
114
|
*/
|
|
113
|
-
function generateAttachmentCode(ring, indent, nextVar, initialVar) {
|
|
115
|
+
function generateAttachmentCode(ring, indent, nextVar, initialVar, verbose = true) {
|
|
114
116
|
const lines = [];
|
|
115
117
|
let currentVar = initialVar;
|
|
116
118
|
|
|
@@ -128,7 +130,7 @@ function generateAttachmentCode(ring, indent, nextVar, initialVar) {
|
|
|
128
130
|
const inlineBranchId = hasInlineBranchAfter ? (metaBranchIds[nextIdx] || Infinity) : Infinity;
|
|
129
131
|
|
|
130
132
|
attachmentList.forEach((attachment) => {
|
|
131
|
-
const attachResult = decompileChildNode(attachment, indent, nextVar);
|
|
133
|
+
const attachResult = decompileChildNode(attachment, indent, nextVar, verbose);
|
|
132
134
|
lines.push(attachResult.code);
|
|
133
135
|
|
|
134
136
|
const newVar = nextVar();
|
|
@@ -158,13 +160,72 @@ function generateAttachmentCode(ring, indent, nextVar, initialVar) {
|
|
|
158
160
|
return { lines, currentVar };
|
|
159
161
|
}
|
|
160
162
|
|
|
163
|
+
/**
|
|
164
|
+
* Compute the SMILES for a ring node with substitutions but without attachments.
|
|
165
|
+
* Used in non-verbose mode to emit Fragment('SMILES') for the substituted ring.
|
|
166
|
+
*/
|
|
167
|
+
function getRingWithSubsSmiles(ring) {
|
|
168
|
+
const tempNode = createRingNode(
|
|
169
|
+
ring.atoms,
|
|
170
|
+
ring.size,
|
|
171
|
+
ring.ringNumber,
|
|
172
|
+
ring.offset,
|
|
173
|
+
ring.substitutions,
|
|
174
|
+
{},
|
|
175
|
+
ring.bonds,
|
|
176
|
+
ring.metaBranchDepths,
|
|
177
|
+
);
|
|
178
|
+
if (ring.metaLeadingBond) {
|
|
179
|
+
tempNode.metaLeadingBond = ring.metaLeadingBond;
|
|
180
|
+
}
|
|
181
|
+
return buildSMILES(tempNode);
|
|
182
|
+
}
|
|
183
|
+
|
|
161
184
|
/**
|
|
162
185
|
* Decompile a Ring node
|
|
163
186
|
*/
|
|
164
|
-
function decompileRing(ring, indent, nextVar) {
|
|
187
|
+
function decompileRing(ring, indent, nextVar, verbose = true) {
|
|
165
188
|
const lines = [];
|
|
166
189
|
const varName = nextVar();
|
|
167
190
|
|
|
191
|
+
if (!verbose && !ring.metaLeadingBond) {
|
|
192
|
+
// Non-verbose: emit Fragment('SMILES') for base ring
|
|
193
|
+
const baseSmiles = buildSMILES(createRingNode(
|
|
194
|
+
ring.atoms,
|
|
195
|
+
ring.size,
|
|
196
|
+
ring.ringNumber,
|
|
197
|
+
ring.offset,
|
|
198
|
+
{},
|
|
199
|
+
{},
|
|
200
|
+
ring.bonds,
|
|
201
|
+
ring.metaBranchDepths,
|
|
202
|
+
));
|
|
203
|
+
lines.push(`${indent}const ${varName} = Fragment('${baseSmiles}');`);
|
|
204
|
+
|
|
205
|
+
// Substitutions: each becomes an independent Fragment('SMILES')
|
|
206
|
+
let currentVar = varName;
|
|
207
|
+
if (Object.keys(ring.substitutions).length > 0) {
|
|
208
|
+
const subsSmiles = getRingWithSubsSmiles(ring);
|
|
209
|
+
const newVar = nextVar();
|
|
210
|
+
lines.push(`${indent}const ${newVar} = Fragment('${subsSmiles}');`);
|
|
211
|
+
currentVar = newVar;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Attachments stay as .attach() calls
|
|
215
|
+
const { lines: attLines, currentVar: attVar } = generateAttachmentCode(
|
|
216
|
+
ring,
|
|
217
|
+
indent,
|
|
218
|
+
nextVar,
|
|
219
|
+
currentVar,
|
|
220
|
+
verbose,
|
|
221
|
+
);
|
|
222
|
+
lines.push(...attLines);
|
|
223
|
+
currentVar = attVar;
|
|
224
|
+
|
|
225
|
+
return { code: lines.join('\n'), finalVar: currentVar };
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Verbose mode (original behavior)
|
|
168
229
|
// Build options object (include branchDepths for full decompilation)
|
|
169
230
|
const { optionsStr } = buildRingOptions(ring, { includeBranchDepths: true });
|
|
170
231
|
lines.push(`${indent}const ${varName} = Ring({ ${optionsStr} });`);
|
|
@@ -185,6 +246,7 @@ function decompileRing(ring, indent, nextVar) {
|
|
|
185
246
|
indent,
|
|
186
247
|
nextVar,
|
|
187
248
|
currentVar,
|
|
249
|
+
verbose,
|
|
188
250
|
);
|
|
189
251
|
lines.push(...attLines);
|
|
190
252
|
currentVar = attVar;
|
|
@@ -195,7 +257,7 @@ function decompileRing(ring, indent, nextVar) {
|
|
|
195
257
|
/**
|
|
196
258
|
* Decompile a Linear node
|
|
197
259
|
*/
|
|
198
|
-
function decompileLinear(linear, indent, nextVar) {
|
|
260
|
+
function decompileLinear(linear, indent, nextVar, verbose = true) {
|
|
199
261
|
const lines = [];
|
|
200
262
|
const varName = nextVar();
|
|
201
263
|
|
|
@@ -205,7 +267,13 @@ function decompileLinear(linear, indent, nextVar) {
|
|
|
205
267
|
const hasNonNullBonds = linear.bonds.some((b) => b !== null);
|
|
206
268
|
const hasLeadingBond = linear.metaLeadingBond !== undefined;
|
|
207
269
|
|
|
208
|
-
|
|
270
|
+
// Non-verbose: use Fragment('SMILES') when possible (no bonds, no leadingBond)
|
|
271
|
+
if (!verbose && !hasNonNullBonds && !hasLeadingBond) {
|
|
272
|
+
// Compute SMILES without attachments for the base linear node
|
|
273
|
+
const baseLinear = { ...linear, attachments: {} };
|
|
274
|
+
const smiles = buildSMILES(baseLinear);
|
|
275
|
+
lines.push(`${indent}const ${varName} = Fragment('${smiles}');`);
|
|
276
|
+
} else if (hasNonNullBonds && hasLeadingBond) {
|
|
209
277
|
lines.push(`${indent}const ${varName} = Linear([${atomsStr}], [${formatBondsArray(linear.bonds)}], {}, '${linear.metaLeadingBond}');`);
|
|
210
278
|
} else if (hasNonNullBonds) {
|
|
211
279
|
lines.push(`${indent}const ${varName} = Linear([${atomsStr}], [${formatBondsArray(linear.bonds)}]);`);
|
|
@@ -222,7 +290,7 @@ function decompileLinear(linear, indent, nextVar) {
|
|
|
222
290
|
Object.entries(linear.attachments).forEach(([pos, attachmentList]) => {
|
|
223
291
|
attachmentList.forEach((attachment) => {
|
|
224
292
|
// eslint-disable-next-line no-use-before-define
|
|
225
|
-
const attachRes = decompileNode(attachment, indent, nextVar);
|
|
293
|
+
const attachRes = decompileNode(attachment, indent, nextVar, verbose);
|
|
226
294
|
const { code: aCode, finalVar: aFinalVar } = attachRes;
|
|
227
295
|
lines.push(aCode);
|
|
228
296
|
|
|
@@ -423,7 +491,7 @@ function computeSharedPositions(fusedRing) {
|
|
|
423
491
|
* Emits .fuse() for the first pair and .addRing() for subsequent rings.
|
|
424
492
|
* The resulting code goes through the simple codegen path (offset-based).
|
|
425
493
|
*/
|
|
426
|
-
function decompileSimpleFusedRing(fusedRing, indent, nextVar) {
|
|
494
|
+
function decompileSimpleFusedRing(fusedRing, indent, nextVar, verbose = true) {
|
|
427
495
|
const lines = [];
|
|
428
496
|
const ringFinalVars = [];
|
|
429
497
|
|
|
@@ -486,20 +554,52 @@ function decompileSimpleFusedRing(fusedRing, indent, nextVar) {
|
|
|
486
554
|
}
|
|
487
555
|
|
|
488
556
|
const varName = nextVar();
|
|
489
|
-
const { optionsStr } = buildRingOptions(effectiveRing, { includeBranchDepths: true });
|
|
490
|
-
lines.push(`${indent}const ${varName} = Ring({ ${optionsStr} });`);
|
|
491
557
|
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
558
|
+
if (!verbose && !effectiveRing.metaLeadingBond) {
|
|
559
|
+
// Non-verbose: use Fragment for ring constructor
|
|
560
|
+
const baseSmiles = buildSMILES(createRingNode(
|
|
561
|
+
effectiveRing.atoms,
|
|
562
|
+
effectiveRing.size,
|
|
563
|
+
effectiveRing.ringNumber,
|
|
564
|
+
effectiveRing.offset,
|
|
565
|
+
{},
|
|
566
|
+
{},
|
|
567
|
+
effectiveRing.bonds,
|
|
568
|
+
effectiveRing.metaBranchDepths,
|
|
569
|
+
));
|
|
570
|
+
lines.push(`${indent}const ${varName} = Fragment('${baseSmiles}');`);
|
|
571
|
+
|
|
572
|
+
let currentVar = varName;
|
|
573
|
+
if (Object.keys(effectiveRing.substitutions || {}).length > 0) {
|
|
574
|
+
const subsSmiles = getRingWithSubsSmiles(effectiveRing);
|
|
575
|
+
const newVar = nextVar();
|
|
576
|
+
lines.push(`${indent}const ${newVar} = Fragment('${subsSmiles}');`);
|
|
577
|
+
currentVar = newVar;
|
|
578
|
+
}
|
|
496
579
|
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
580
|
+
const {
|
|
581
|
+
lines: attLines, currentVar: attVar,
|
|
582
|
+
} = generateAttachmentCode(effectiveRing, indent, nextVar, currentVar, verbose);
|
|
583
|
+
lines.push(...attLines);
|
|
501
584
|
|
|
502
|
-
|
|
585
|
+
ringFinalVars.push(attVar);
|
|
586
|
+
} else {
|
|
587
|
+
// Verbose: original behavior
|
|
588
|
+
const { optionsStr } = buildRingOptions(effectiveRing, { includeBranchDepths: true });
|
|
589
|
+
lines.push(`${indent}const ${varName} = Ring({ ${optionsStr} });`);
|
|
590
|
+
|
|
591
|
+
const {
|
|
592
|
+
lines: subLines, currentVar: subVar,
|
|
593
|
+
} = generateSubstitutionCode(effectiveRing, indent, nextVar, varName);
|
|
594
|
+
lines.push(...subLines);
|
|
595
|
+
|
|
596
|
+
const {
|
|
597
|
+
lines: attLines, currentVar: attVar,
|
|
598
|
+
} = generateAttachmentCode(effectiveRing, indent, nextVar, subVar, verbose);
|
|
599
|
+
lines.push(...attLines);
|
|
600
|
+
|
|
601
|
+
ringFinalVars.push(attVar);
|
|
602
|
+
}
|
|
503
603
|
});
|
|
504
604
|
|
|
505
605
|
const leadingBond = fusedRing.metaLeadingBond;
|
|
@@ -541,7 +641,7 @@ function decompileSimpleFusedRing(fusedRing, indent, nextVar) {
|
|
|
541
641
|
* rings to a fused ring. The codegen uses this metadata to correctly interleave
|
|
542
642
|
* ring markers and handle branch depths.
|
|
543
643
|
*/
|
|
544
|
-
function decompileComplexFusedRing(fusedRing, indent, nextVar) {
|
|
644
|
+
function decompileComplexFusedRing(fusedRing, indent, nextVar, verbose = true) {
|
|
545
645
|
const lines = [];
|
|
546
646
|
const sequentialRings = fusedRing.metaSequentialRings || [];
|
|
547
647
|
const seqAtomAttachments = fusedRing.metaSeqAtomAttachments || new Map();
|
|
@@ -549,17 +649,45 @@ function decompileComplexFusedRing(fusedRing, indent, nextVar) {
|
|
|
549
649
|
// Step 1: Decompile the base fused ring
|
|
550
650
|
const ringVars = [];
|
|
551
651
|
fusedRing.rings.forEach((ring) => {
|
|
552
|
-
const { optionsStr } = buildRingOptions(ring, { includeBranchDepths: true });
|
|
553
652
|
const varName = nextVar();
|
|
554
|
-
lines.push(`${indent}const ${varName} = Ring({ ${optionsStr} });`);
|
|
555
653
|
|
|
556
|
-
|
|
557
|
-
|
|
654
|
+
if (!verbose && !ring.metaLeadingBond) {
|
|
655
|
+
const baseSmiles = buildSMILES(createRingNode(
|
|
656
|
+
ring.atoms,
|
|
657
|
+
ring.size,
|
|
658
|
+
ring.ringNumber,
|
|
659
|
+
ring.offset,
|
|
660
|
+
{},
|
|
661
|
+
{},
|
|
662
|
+
ring.bonds,
|
|
663
|
+
ring.metaBranchDepths,
|
|
664
|
+
));
|
|
665
|
+
lines.push(`${indent}const ${varName} = Fragment('${baseSmiles}');`);
|
|
666
|
+
|
|
667
|
+
let currentVar = varName;
|
|
668
|
+
if (Object.keys(ring.substitutions || {}).length > 0) {
|
|
669
|
+
const subsSmiles = getRingWithSubsSmiles(ring);
|
|
670
|
+
const newVar = nextVar();
|
|
671
|
+
lines.push(`${indent}const ${newVar} = Fragment('${subsSmiles}');`);
|
|
672
|
+
currentVar = newVar;
|
|
673
|
+
}
|
|
558
674
|
|
|
559
|
-
|
|
560
|
-
|
|
675
|
+
const attResult = generateAttachmentCode(ring, indent, nextVar, currentVar, verbose);
|
|
676
|
+
lines.push(...attResult.lines);
|
|
677
|
+
|
|
678
|
+
ringVars.push({ var: attResult.currentVar, ring });
|
|
679
|
+
} else {
|
|
680
|
+
const { optionsStr } = buildRingOptions(ring, { includeBranchDepths: true });
|
|
681
|
+
lines.push(`${indent}const ${varName} = Ring({ ${optionsStr} });`);
|
|
682
|
+
|
|
683
|
+
const subResult = generateSubstitutionCode(ring, indent, nextVar, varName);
|
|
684
|
+
lines.push(...subResult.lines);
|
|
685
|
+
|
|
686
|
+
const attResult = generateAttachmentCode(ring, indent, nextVar, subResult.currentVar, verbose);
|
|
687
|
+
lines.push(...attResult.lines);
|
|
561
688
|
|
|
562
|
-
|
|
689
|
+
ringVars.push({ var: attResult.currentVar, ring });
|
|
690
|
+
}
|
|
563
691
|
});
|
|
564
692
|
|
|
565
693
|
// Decompile seqAtomAttachments BEFORE fuse so their vars are declared early
|
|
@@ -569,7 +697,7 @@ function decompileComplexFusedRing(fusedRing, indent, nextVar) {
|
|
|
569
697
|
seqAtomAttachments.forEach((attachments, pos) => {
|
|
570
698
|
const attVars = [];
|
|
571
699
|
attachments.forEach((att) => {
|
|
572
|
-
const attResult = decompileChildNode(att, indent, nextVar);
|
|
700
|
+
const attResult = decompileChildNode(att, indent, nextVar, verbose);
|
|
573
701
|
lines.push(attResult.code);
|
|
574
702
|
attVars.push(attResult.finalVar);
|
|
575
703
|
});
|
|
@@ -691,17 +819,45 @@ function decompileComplexFusedRing(fusedRing, indent, nextVar) {
|
|
|
691
819
|
// Decompile sequential rings
|
|
692
820
|
const seqRingVars = [];
|
|
693
821
|
sequentialRings.forEach((ring) => {
|
|
694
|
-
const { optionsStr } = buildRingOptions(ring, { includeBranchDepths: true });
|
|
695
822
|
const varName = nextVar();
|
|
696
|
-
lines.push(`${indent}const ${varName} = Ring({ ${optionsStr} });`);
|
|
697
823
|
|
|
698
|
-
|
|
699
|
-
|
|
824
|
+
if (!verbose && !ring.metaLeadingBond) {
|
|
825
|
+
const baseSmiles = buildSMILES(createRingNode(
|
|
826
|
+
ring.atoms,
|
|
827
|
+
ring.size,
|
|
828
|
+
ring.ringNumber,
|
|
829
|
+
ring.offset,
|
|
830
|
+
{},
|
|
831
|
+
{},
|
|
832
|
+
ring.bonds,
|
|
833
|
+
ring.metaBranchDepths,
|
|
834
|
+
));
|
|
835
|
+
lines.push(`${indent}const ${varName} = Fragment('${baseSmiles}');`);
|
|
836
|
+
|
|
837
|
+
let currentVar = varName;
|
|
838
|
+
if (Object.keys(ring.substitutions || {}).length > 0) {
|
|
839
|
+
const subsSmiles = getRingWithSubsSmiles(ring);
|
|
840
|
+
const newVar = nextVar();
|
|
841
|
+
lines.push(`${indent}const ${newVar} = Fragment('${subsSmiles}');`);
|
|
842
|
+
currentVar = newVar;
|
|
843
|
+
}
|
|
700
844
|
|
|
701
|
-
|
|
702
|
-
|
|
845
|
+
const attResult = generateAttachmentCode(ring, indent, nextVar, currentVar, verbose);
|
|
846
|
+
lines.push(...attResult.lines);
|
|
847
|
+
|
|
848
|
+
seqRingVars.push(attResult.currentVar);
|
|
849
|
+
} else {
|
|
850
|
+
const { optionsStr } = buildRingOptions(ring, { includeBranchDepths: true });
|
|
851
|
+
lines.push(`${indent}const ${varName} = Ring({ ${optionsStr} });`);
|
|
703
852
|
|
|
704
|
-
|
|
853
|
+
const subResult = generateSubstitutionCode(ring, indent, nextVar, varName);
|
|
854
|
+
lines.push(...subResult.lines);
|
|
855
|
+
|
|
856
|
+
const attResult = generateAttachmentCode(ring, indent, nextVar, subResult.currentVar, verbose);
|
|
857
|
+
lines.push(...attResult.lines);
|
|
858
|
+
|
|
859
|
+
seqRingVars.push(attResult.currentVar);
|
|
860
|
+
}
|
|
705
861
|
});
|
|
706
862
|
|
|
707
863
|
// Decompile chain atom attachments
|
|
@@ -710,7 +866,7 @@ function decompileComplexFusedRing(fusedRing, indent, nextVar) {
|
|
|
710
866
|
const attachments = seqAtomAttachments.get(entry.attachmentPos) || [];
|
|
711
867
|
const attVars = [];
|
|
712
868
|
attachments.forEach((att) => {
|
|
713
|
-
const attResult = decompileChildNode(att, indent, nextVar);
|
|
869
|
+
const attResult = decompileChildNode(att, indent, nextVar, verbose);
|
|
714
870
|
lines.push(attResult.code);
|
|
715
871
|
attVars.push(attResult.finalVar);
|
|
716
872
|
});
|
|
@@ -845,24 +1001,24 @@ function decompileComplexFusedRing(fusedRing, indent, nextVar) {
|
|
|
845
1001
|
* - Sequential rings or interleaved codegen → preserves metadata (needs it for correct SMILES)
|
|
846
1002
|
* - Everything else → structural API calls only (no metadata)
|
|
847
1003
|
*/
|
|
848
|
-
function decompileFusedRing(fusedRing, indent, nextVar) {
|
|
1004
|
+
function decompileFusedRing(fusedRing, indent, nextVar, verbose = true) {
|
|
849
1005
|
const seqRings = fusedRing.metaSequentialRings;
|
|
850
1006
|
const hasSeqRings = seqRings && seqRings.length > 0;
|
|
851
1007
|
const isInterleaved = needsInterleavedCodegen(fusedRing);
|
|
852
1008
|
|
|
853
1009
|
// Use complex decompilation for sequential rings or genuinely interleaved fused rings
|
|
854
1010
|
if (hasSeqRings || isInterleaved) {
|
|
855
|
-
return decompileComplexFusedRing(fusedRing, indent, nextVar);
|
|
1011
|
+
return decompileComplexFusedRing(fusedRing, indent, nextVar, verbose);
|
|
856
1012
|
}
|
|
857
1013
|
|
|
858
1014
|
// Everything else goes through the simple path (no metadata)
|
|
859
|
-
return decompileSimpleFusedRing(fusedRing, indent, nextVar);
|
|
1015
|
+
return decompileSimpleFusedRing(fusedRing, indent, nextVar, verbose);
|
|
860
1016
|
}
|
|
861
1017
|
|
|
862
1018
|
/**
|
|
863
1019
|
* Decompile a Molecule node
|
|
864
1020
|
*/
|
|
865
|
-
function decompileMolecule(molecule, indent, nextVar) {
|
|
1021
|
+
function decompileMolecule(molecule, indent, nextVar, verbose = true) {
|
|
866
1022
|
const lines = [];
|
|
867
1023
|
const { components } = molecule;
|
|
868
1024
|
|
|
@@ -878,7 +1034,7 @@ function decompileMolecule(molecule, indent, nextVar) {
|
|
|
878
1034
|
const componentFinalVars = [];
|
|
879
1035
|
components.forEach((component) => {
|
|
880
1036
|
// eslint-disable-next-line no-use-before-define
|
|
881
|
-
const { code: componentCode, finalVar } = decompileNode(component, indent, nextVar);
|
|
1037
|
+
const { code: componentCode, finalVar } = decompileNode(component, indent, nextVar, verbose);
|
|
882
1038
|
lines.push(componentCode);
|
|
883
1039
|
// Note: metaLeadingBond is now handled in component constructors via metadata
|
|
884
1040
|
// or leadingBond option, so no mutation needed here
|
|
@@ -893,21 +1049,21 @@ function decompileMolecule(molecule, indent, nextVar) {
|
|
|
893
1049
|
return { code: lines.join('\n'), finalVar: finalVarName };
|
|
894
1050
|
}
|
|
895
1051
|
|
|
896
|
-
function decompileNode(node, indent, nextVar) {
|
|
1052
|
+
function decompileNode(node, indent, nextVar, verbose = true) {
|
|
897
1053
|
if (isRingNode(node)) {
|
|
898
|
-
return decompileRing(node, indent, nextVar);
|
|
1054
|
+
return decompileRing(node, indent, nextVar, verbose);
|
|
899
1055
|
}
|
|
900
1056
|
|
|
901
1057
|
if (isLinearNode(node)) {
|
|
902
|
-
return decompileLinear(node, indent, nextVar);
|
|
1058
|
+
return decompileLinear(node, indent, nextVar, verbose);
|
|
903
1059
|
}
|
|
904
1060
|
|
|
905
1061
|
if (isFusedRingNode(node)) {
|
|
906
|
-
return decompileFusedRing(node, indent, nextVar);
|
|
1062
|
+
return decompileFusedRing(node, indent, nextVar, verbose);
|
|
907
1063
|
}
|
|
908
1064
|
|
|
909
1065
|
if (isMoleculeNode(node)) {
|
|
910
|
-
return decompileMolecule(node, indent, nextVar);
|
|
1066
|
+
return decompileMolecule(node, indent, nextVar, verbose);
|
|
911
1067
|
}
|
|
912
1068
|
|
|
913
1069
|
throw new Error(`Unknown node type: ${node.type}`);
|
|
@@ -919,15 +1075,19 @@ function decompileNode(node, indent, nextVar) {
|
|
|
919
1075
|
* @param {Object} options - Options
|
|
920
1076
|
* @param {number} options.indent - Indentation level (default 0)
|
|
921
1077
|
* @param {string} options.varName - Variable name prefix (default 'v')
|
|
1078
|
+
* @param {boolean} options.verbose - Use verbose constructor syntax (default false).
|
|
1079
|
+
* When false, uses Fragment('SMILES') for Ring and simple Linear nodes.
|
|
922
1080
|
* @param {boolean} options.includeMetadata - Include metadata assignments
|
|
923
1081
|
* (default true). Set to false for cleaner output (but code may not work)
|
|
924
1082
|
*/
|
|
925
1083
|
export function decompile(node, options = {}) {
|
|
926
|
-
const {
|
|
1084
|
+
const {
|
|
1085
|
+
indent = 0, varName = 'v', includeMetadata = true, verbose = false,
|
|
1086
|
+
} = options;
|
|
927
1087
|
const indentStr = ' '.repeat(indent);
|
|
928
1088
|
const nextVar = createCounter(varName);
|
|
929
1089
|
|
|
930
|
-
const { code } = decompileNode(node, indentStr, nextVar);
|
|
1090
|
+
const { code } = decompileNode(node, indentStr, nextVar, verbose);
|
|
931
1091
|
|
|
932
1092
|
// Always use export for declarations
|
|
933
1093
|
let result = code.replace(/^(\s*)(const|let) /gm, '$1export $2 ');
|