smiles-js 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/docs/smiles.peggy +215 -0
  2. package/package.json +1 -1
  3. package/src/decompiler.js +209 -49
  4. package/src/decompiler.test.js +232 -60
  5. package/src/fragment.test.js +7 -2
  6. package/src/method-attachers.js +8 -8
  7. package/test-integration/__snapshots__/acetaminophen.test.js.snap +20 -0
  8. package/test-integration/__snapshots__/adjuvant-analgesics.test.js.snap +62 -0
  9. package/test-integration/__snapshots__/cholesterol-drugs.test.js.snap +261 -0
  10. package/test-integration/__snapshots__/dexamethasone.test.js.snap +31 -0
  11. package/test-integration/__snapshots__/endocannabinoids.test.js.snap +77 -0
  12. package/test-integration/__snapshots__/endogenous-opioids.test.js.snap +1116 -0
  13. package/test-integration/__snapshots__/hypertension-medication.test.js.snap +69 -0
  14. package/test-integration/__snapshots__/local-anesthetics.test.js.snap +97 -0
  15. package/test-integration/__snapshots__/nsaids-otc.test.js.snap +60 -0
  16. package/test-integration/__snapshots__/nsaids-prescription.test.js.snap +113 -0
  17. package/test-integration/__snapshots__/opioids.test.js.snap +109 -0
  18. package/test-integration/__snapshots__/steroids.test.js.snap +379 -0
  19. package/test-integration/acetaminophen.test.js +15 -3
  20. package/test-integration/adjuvant-analgesics.test.js +43 -7
  21. package/test-integration/cholesterol-drugs.test.js +88 -22
  22. package/test-integration/dexamethasone.test.js +8 -2
  23. package/test-integration/endocannabinoids.test.js +48 -12
  24. package/test-integration/endogenous-opioids.smiles.js +32 -0
  25. package/test-integration/endogenous-opioids.test.js +192 -0
  26. package/test-integration/hypertension-medication.test.js +32 -8
  27. package/test-integration/local-anesthetics.smiles.js +33 -0
  28. package/test-integration/local-anesthetics.test.js +64 -16
  29. package/test-integration/nsaids-otc.test.js +40 -10
  30. package/test-integration/nsaids-prescription.test.js +72 -18
  31. package/test-integration/opioids.test.js +56 -14
  32. package/test-integration/steroids.test.js +112 -28
  33. package/test-integration/utils.js +4 -2
  34. package/todo +2 -1
@@ -0,0 +1,215 @@
1
+ // SMILES Grammar for Peggy (PEG parser generator for JavaScript)
2
+ //
3
+ // This grammar mirrors the tokenizer + buildAtomList two-pass architecture
4
+ // in src/tokenizer.js and src/parser/smiles-parser-core.js.
5
+ //
6
+ // What this grammar produces:
7
+ // A concrete parse tree (token stream with structure). The codebase then
8
+ // does a second semantic pass (buildAST) to detect rings, fused rings,
9
+ // and attachments — that pass is stateful and cannot be expressed in PEG.
10
+ //
11
+ // Usage:
12
+ // npx peggy smiles.peggy # generates smiles.js
13
+ // npx peggy --format commonjs -o smiles.cjs smiles.peggy # CommonJS
14
+ //
15
+ // Differences from OpenSMILES spec (matches codebase behavior):
16
+ // - Simple atoms accept ANY [A-Za-z] letter, not just the organic subset.
17
+ // The tokenizer (isAtomStart + parseSimpleAtom) only special-cases Br/Cl
18
+ // as two-letter atoms; everything else is a single letter.
19
+ // - Bracketed atoms capture raw content only (the codebase has a TODO for
20
+ // full isotope/chirality/hcount/charge/class parsing). The grammar DOES
21
+ // parse the sub-fields since it's trivial in PEG and useful for consumers.
22
+ // - Whitespace is silently skipped (tokenizer.js line 104).
23
+ // - Ring markers can be preceded by a bond (e.g. C=1CC=1 is legal SMILES).
24
+
25
+ // ============================================================
26
+ // Top-level: dot-separated components
27
+ // Mirrors: tokenizer DOT token → buildAtomList skips DOT
28
+ // ============================================================
29
+ smiles
30
+ = _ head:chain tail:(_ "." _ chain)* _ {
31
+ const components = [head, ...tail.map(t => t[3])];
32
+ if (components.length === 1) return components[0];
33
+ return { type: "molecule", components };
34
+ }
35
+
36
+ // ============================================================
37
+ // Chain: sequence of atom_units with optional bonds between them
38
+ // Mirrors: buildAtomList's linear scan — each ATOM token may be
39
+ // preceded by a BOND token, and followed by RING_MARKER
40
+ // and BRANCH_OPEN/CLOSE tokens.
41
+ // ============================================================
42
+ chain
43
+ = first:atom_unit rest:(_ bond? _ atom_unit)* {
44
+ const atoms = [first];
45
+ const bonds = [null];
46
+ for (const r of rest) {
47
+ bonds.push(r[1]);
48
+ atoms.push(r[3]);
49
+ }
50
+ return { type: "chain", atoms, bonds };
51
+ }
52
+
53
+ // ============================================================
54
+ // Atom unit: atom + ring markers + branches
55
+ //
56
+ // Mirrors the token consumption order in buildAtomList:
57
+ // ATOM → RING_MARKER* → (BRANCH_OPEN chain BRANCH_CLOSE)*
58
+ //
59
+ // Ring markers can carry their own bond (e.g. C=1...=1)
60
+ // This is valid SMILES: the bond on a ring closure describes
61
+ // the bond between the two ring-closure atoms.
62
+ // ============================================================
63
+ atom_unit
64
+ = atom:atom ring_bonds:(bond? ring_marker)* branches:branch* {
65
+ const rings = ring_bonds.map(rb => ({
66
+ bond: rb[0],
67
+ number: rb[1]
68
+ }));
69
+ return { atom, rings, branches };
70
+ }
71
+
72
+ // ============================================================
73
+ // Atoms
74
+ //
75
+ // Two forms, matching tokenizer.js:
76
+ // 1. Bracketed: '[' ... ']' (parseBracketedAtom, line 67)
77
+ // 2. Simple: [A-Za-z*] (parseSimpleAtom, line 39)
78
+ // ============================================================
79
+ atom
80
+ = bracketed_atom
81
+ / simple_atom
82
+
83
+ // ----------------------------------------------------------
84
+ // Bracketed atom: [isotope? element chirality? hcount? charge? class?]
85
+ //
86
+ // The codebase currently stores only { raw } (parseBracketedAtom, line 79).
87
+ // We parse the sub-fields here since PEG makes it easy and consumers
88
+ // can use them. The `raw` field preserves the full bracket text for
89
+ // round-trip fidelity (matching token.value in the codebase).
90
+ // ----------------------------------------------------------
91
+ bracketed_atom
92
+ = "[" content:bracketed_content "]" {
93
+ return { type: "bracket_atom", ...content, raw: text() };
94
+ }
95
+
96
+ bracketed_content
97
+ = isotope:isotope?
98
+ symbol:bracket_element
99
+ chirality:chirality?
100
+ hcount:hcount?
101
+ charge:charge?
102
+ atomClass:atom_class? {
103
+ return { isotope, symbol, chirality, hcount, charge, atomClass };
104
+ }
105
+
106
+ // Isotope: digits before the element symbol
107
+ // e.g. [13C], [2H]
108
+ isotope
109
+ = digits:$[0-9]+ &[A-Za-z*] { return parseInt(digits, 10); }
110
+
111
+ // Element inside brackets — can be aromatic two-letter (se, as),
112
+ // aromatic single-letter, or any standard element symbol, or wildcard
113
+ bracket_element
114
+ = aromatic_element
115
+ / element_symbol
116
+ / "*" { return "*"; }
117
+
118
+ // Standard element symbol: uppercase letter + optional lowercase
119
+ // e.g. C, Na, Fe, Zr
120
+ element_symbol
121
+ = a:$[A-Z] b:$[a-z]? { return b ? a + b : a; }
122
+
123
+ // Aromatic elements inside brackets
124
+ // OpenSMILES: b, c, n, o, p, s, se, as
125
+ aromatic_element
126
+ = "se" { return "se"; }
127
+ / "as" { return "as"; }
128
+ / c:[bcnops] { return c; }
129
+
130
+ // Chirality: @ or @@ (the codebase doesn't parse extended forms like @TH1)
131
+ chirality
132
+ = "@@" { return "@@"; }
133
+ / "@" { return "@"; }
134
+
135
+ // Hydrogen count: H or H<digit>
136
+ // e.g. [NH3+] has hcount=3, [C@H] has hcount=1
137
+ hcount
138
+ = "H" n:$[0-9]? { return n ? parseInt(n, 10) : 1; }
139
+
140
+ // Charge: +, -, +2, -1, ++, --
141
+ // Ordered to try multi-char patterns before single-char
142
+ charge
143
+ = "++" { return 2; }
144
+ / "--" { return -2; }
145
+ / "+" n:$[0-9]+ { return parseInt(n, 10); }
146
+ / "-" n:$[0-9]+ { return -parseInt(n, 10); }
147
+ / "+" { return 1; }
148
+ / "-" { return -1; }
149
+
150
+ // Atom class: :<digits>
151
+ // e.g. [C:1]
152
+ atom_class
153
+ = ":" n:$[0-9]+ { return parseInt(n, 10); }
154
+
155
+ // ----------------------------------------------------------
156
+ // Simple (non-bracketed) atom
157
+ //
158
+ // Mirrors tokenizer.js parseSimpleAtom (line 39) + isAtomStart (line 31):
159
+ // isAtomStart accepts /[A-Za-z*]/
160
+ // parseSimpleAtom checks for two-letter Cl/Br first, then single char
161
+ //
162
+ // The codebase does NOT restrict to the OpenSMILES organic subset —
163
+ // it accepts any letter as a valid atom. This is intentional for
164
+ // permissive parsing. The semantic layer validates later.
165
+ // ----------------------------------------------------------
166
+ simple_atom
167
+ = symbol:simple_atom_symbol { return { type: "simple_atom", symbol }; }
168
+
169
+ simple_atom_symbol
170
+ = "Br" { return "Br"; }
171
+ / "Cl" { return "Cl"; }
172
+ / c:[A-Za-z] { return c; }
173
+ / "*" { return "*"; }
174
+
175
+ // ============================================================
176
+ // Bonds
177
+ //
178
+ // Mirrors: BOND_SYMBOLS in tokenizer.js (line 19)
179
+ // new Set(['-', '=', '#', ':', '/', '\\'])
180
+ // ============================================================
181
+ bond
182
+ = b:[-=#:/\\] { return b; }
183
+
184
+ // ============================================================
185
+ // Ring markers
186
+ //
187
+ // Mirrors: tokenizer.js lines 138-160
188
+ // '%' + two digits → ring number 10-99
189
+ // single digit → ring number 0-9
190
+ // ============================================================
191
+ ring_marker
192
+ = "%" d1:[0-9] d2:[0-9] { return parseInt(d1 + d2, 10); }
193
+ / d:[0-9] { return parseInt(d, 10); }
194
+
195
+ // ============================================================
196
+ // Branches (recursive)
197
+ //
198
+ // Mirrors: BRANCH_OPEN → (bond? chain) → BRANCH_CLOSE
199
+ // in tokenizer.js lines 106-120 and buildAtomList lines 139-153
200
+ //
201
+ // A branch can start with a bond that applies to the first atom
202
+ // of the branch chain (e.g. C(=O) means double bond to O).
203
+ // ============================================================
204
+ branch
205
+ = "(" _ b:bond? _ c:chain _ ")" {
206
+ return { type: "branch", bond: b, chain: c };
207
+ }
208
+
209
+ // ============================================================
210
+ // Whitespace (optional, skipped)
211
+ //
212
+ // Mirrors: tokenizer.js line 104 — /\s/ is skipped
213
+ // ============================================================
214
+ _ "whitespace"
215
+ = [ \t\n\r]*
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smiles-js",
3
- "version": "2.1.0",
3
+ "version": "2.2.0",
4
4
  "description": "A JavaScript library for building molecules using composable fragments",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
package/src/decompiler.js CHANGED
@@ -9,11 +9,13 @@ import {
9
9
  isRingNode,
10
10
  isLinearNode,
11
11
  } from './ast.js';
12
+ import { buildSMILES } from './codegen/index.js';
13
+ import { createRingNode } from './node-creators.js';
12
14
 
13
15
  // Helper to call decompileNode (satisfies no-loop-func rule)
14
- function decompileChildNode(node, indent, nextVar) {
16
+ function decompileChildNode(node, indent, nextVar, verbose) {
15
17
  // eslint-disable-next-line no-use-before-define
16
- return decompileNode(node, indent, nextVar);
18
+ return decompileNode(node, indent, nextVar, verbose);
17
19
  }
18
20
 
19
21
  /**
@@ -110,7 +112,7 @@ function generateSubstitutionCode(ring, indent, nextVar, initialVar) {
110
112
  * Generate code for ring attachments
111
113
  * @returns {{ lines: string[], currentVar: string }}
112
114
  */
113
- function generateAttachmentCode(ring, indent, nextVar, initialVar) {
115
+ function generateAttachmentCode(ring, indent, nextVar, initialVar, verbose = true) {
114
116
  const lines = [];
115
117
  let currentVar = initialVar;
116
118
 
@@ -128,7 +130,7 @@ function generateAttachmentCode(ring, indent, nextVar, initialVar) {
128
130
  const inlineBranchId = hasInlineBranchAfter ? (metaBranchIds[nextIdx] || Infinity) : Infinity;
129
131
 
130
132
  attachmentList.forEach((attachment) => {
131
- const attachResult = decompileChildNode(attachment, indent, nextVar);
133
+ const attachResult = decompileChildNode(attachment, indent, nextVar, verbose);
132
134
  lines.push(attachResult.code);
133
135
 
134
136
  const newVar = nextVar();
@@ -158,13 +160,72 @@ function generateAttachmentCode(ring, indent, nextVar, initialVar) {
158
160
  return { lines, currentVar };
159
161
  }
160
162
 
163
+ /**
164
+ * Compute the SMILES for a ring node with substitutions but without attachments.
165
+ * Used in non-verbose mode to emit Fragment('SMILES') for the substituted ring.
166
+ */
167
+ function getRingWithSubsSmiles(ring) {
168
+ const tempNode = createRingNode(
169
+ ring.atoms,
170
+ ring.size,
171
+ ring.ringNumber,
172
+ ring.offset,
173
+ ring.substitutions,
174
+ {},
175
+ ring.bonds,
176
+ ring.metaBranchDepths,
177
+ );
178
+ if (ring.metaLeadingBond) {
179
+ tempNode.metaLeadingBond = ring.metaLeadingBond;
180
+ }
181
+ return buildSMILES(tempNode);
182
+ }
183
+
161
184
  /**
162
185
  * Decompile a Ring node
163
186
  */
164
- function decompileRing(ring, indent, nextVar) {
187
+ function decompileRing(ring, indent, nextVar, verbose = true) {
165
188
  const lines = [];
166
189
  const varName = nextVar();
167
190
 
191
+ if (!verbose && !ring.metaLeadingBond) {
192
+ // Non-verbose: emit Fragment('SMILES') for base ring
193
+ const baseSmiles = buildSMILES(createRingNode(
194
+ ring.atoms,
195
+ ring.size,
196
+ ring.ringNumber,
197
+ ring.offset,
198
+ {},
199
+ {},
200
+ ring.bonds,
201
+ ring.metaBranchDepths,
202
+ ));
203
+ lines.push(`${indent}const ${varName} = Fragment('${baseSmiles}');`);
204
+
205
+ // Substitutions: each becomes an independent Fragment('SMILES')
206
+ let currentVar = varName;
207
+ if (Object.keys(ring.substitutions).length > 0) {
208
+ const subsSmiles = getRingWithSubsSmiles(ring);
209
+ const newVar = nextVar();
210
+ lines.push(`${indent}const ${newVar} = Fragment('${subsSmiles}');`);
211
+ currentVar = newVar;
212
+ }
213
+
214
+ // Attachments stay as .attach() calls
215
+ const { lines: attLines, currentVar: attVar } = generateAttachmentCode(
216
+ ring,
217
+ indent,
218
+ nextVar,
219
+ currentVar,
220
+ verbose,
221
+ );
222
+ lines.push(...attLines);
223
+ currentVar = attVar;
224
+
225
+ return { code: lines.join('\n'), finalVar: currentVar };
226
+ }
227
+
228
+ // Verbose mode (original behavior)
168
229
  // Build options object (include branchDepths for full decompilation)
169
230
  const { optionsStr } = buildRingOptions(ring, { includeBranchDepths: true });
170
231
  lines.push(`${indent}const ${varName} = Ring({ ${optionsStr} });`);
@@ -185,6 +246,7 @@ function decompileRing(ring, indent, nextVar) {
185
246
  indent,
186
247
  nextVar,
187
248
  currentVar,
249
+ verbose,
188
250
  );
189
251
  lines.push(...attLines);
190
252
  currentVar = attVar;
@@ -195,7 +257,7 @@ function decompileRing(ring, indent, nextVar) {
195
257
  /**
196
258
  * Decompile a Linear node
197
259
  */
198
- function decompileLinear(linear, indent, nextVar) {
260
+ function decompileLinear(linear, indent, nextVar, verbose = true) {
199
261
  const lines = [];
200
262
  const varName = nextVar();
201
263
 
@@ -205,7 +267,13 @@ function decompileLinear(linear, indent, nextVar) {
205
267
  const hasNonNullBonds = linear.bonds.some((b) => b !== null);
206
268
  const hasLeadingBond = linear.metaLeadingBond !== undefined;
207
269
 
208
- if (hasNonNullBonds && hasLeadingBond) {
270
+ // Non-verbose: use Fragment('SMILES') when possible (no bonds, no leadingBond)
271
+ if (!verbose && !hasNonNullBonds && !hasLeadingBond) {
272
+ // Compute SMILES without attachments for the base linear node
273
+ const baseLinear = { ...linear, attachments: {} };
274
+ const smiles = buildSMILES(baseLinear);
275
+ lines.push(`${indent}const ${varName} = Fragment('${smiles}');`);
276
+ } else if (hasNonNullBonds && hasLeadingBond) {
209
277
  lines.push(`${indent}const ${varName} = Linear([${atomsStr}], [${formatBondsArray(linear.bonds)}], {}, '${linear.metaLeadingBond}');`);
210
278
  } else if (hasNonNullBonds) {
211
279
  lines.push(`${indent}const ${varName} = Linear([${atomsStr}], [${formatBondsArray(linear.bonds)}]);`);
@@ -222,7 +290,7 @@ function decompileLinear(linear, indent, nextVar) {
222
290
  Object.entries(linear.attachments).forEach(([pos, attachmentList]) => {
223
291
  attachmentList.forEach((attachment) => {
224
292
  // eslint-disable-next-line no-use-before-define
225
- const attachRes = decompileNode(attachment, indent, nextVar);
293
+ const attachRes = decompileNode(attachment, indent, nextVar, verbose);
226
294
  const { code: aCode, finalVar: aFinalVar } = attachRes;
227
295
  lines.push(aCode);
228
296
 
@@ -423,7 +491,7 @@ function computeSharedPositions(fusedRing) {
423
491
  * Emits .fuse() for the first pair and .addRing() for subsequent rings.
424
492
  * The resulting code goes through the simple codegen path (offset-based).
425
493
  */
426
- function decompileSimpleFusedRing(fusedRing, indent, nextVar) {
494
+ function decompileSimpleFusedRing(fusedRing, indent, nextVar, verbose = true) {
427
495
  const lines = [];
428
496
  const ringFinalVars = [];
429
497
 
@@ -486,20 +554,52 @@ function decompileSimpleFusedRing(fusedRing, indent, nextVar) {
486
554
  }
487
555
 
488
556
  const varName = nextVar();
489
- const { optionsStr } = buildRingOptions(effectiveRing, { includeBranchDepths: true });
490
- lines.push(`${indent}const ${varName} = Ring({ ${optionsStr} });`);
491
557
 
492
- const {
493
- lines: subLines, currentVar: subVar,
494
- } = generateSubstitutionCode(effectiveRing, indent, nextVar, varName);
495
- lines.push(...subLines);
558
+ if (!verbose && !effectiveRing.metaLeadingBond) {
559
+ // Non-verbose: use Fragment for ring constructor
560
+ const baseSmiles = buildSMILES(createRingNode(
561
+ effectiveRing.atoms,
562
+ effectiveRing.size,
563
+ effectiveRing.ringNumber,
564
+ effectiveRing.offset,
565
+ {},
566
+ {},
567
+ effectiveRing.bonds,
568
+ effectiveRing.metaBranchDepths,
569
+ ));
570
+ lines.push(`${indent}const ${varName} = Fragment('${baseSmiles}');`);
571
+
572
+ let currentVar = varName;
573
+ if (Object.keys(effectiveRing.substitutions || {}).length > 0) {
574
+ const subsSmiles = getRingWithSubsSmiles(effectiveRing);
575
+ const newVar = nextVar();
576
+ lines.push(`${indent}const ${newVar} = Fragment('${subsSmiles}');`);
577
+ currentVar = newVar;
578
+ }
496
579
 
497
- const {
498
- lines: attLines, currentVar: attVar,
499
- } = generateAttachmentCode(effectiveRing, indent, nextVar, subVar);
500
- lines.push(...attLines);
580
+ const {
581
+ lines: attLines, currentVar: attVar,
582
+ } = generateAttachmentCode(effectiveRing, indent, nextVar, currentVar, verbose);
583
+ lines.push(...attLines);
501
584
 
502
- ringFinalVars.push(attVar);
585
+ ringFinalVars.push(attVar);
586
+ } else {
587
+ // Verbose: original behavior
588
+ const { optionsStr } = buildRingOptions(effectiveRing, { includeBranchDepths: true });
589
+ lines.push(`${indent}const ${varName} = Ring({ ${optionsStr} });`);
590
+
591
+ const {
592
+ lines: subLines, currentVar: subVar,
593
+ } = generateSubstitutionCode(effectiveRing, indent, nextVar, varName);
594
+ lines.push(...subLines);
595
+
596
+ const {
597
+ lines: attLines, currentVar: attVar,
598
+ } = generateAttachmentCode(effectiveRing, indent, nextVar, subVar, verbose);
599
+ lines.push(...attLines);
600
+
601
+ ringFinalVars.push(attVar);
602
+ }
503
603
  });
504
604
 
505
605
  const leadingBond = fusedRing.metaLeadingBond;
@@ -541,7 +641,7 @@ function decompileSimpleFusedRing(fusedRing, indent, nextVar) {
541
641
  * rings to a fused ring. The codegen uses this metadata to correctly interleave
542
642
  * ring markers and handle branch depths.
543
643
  */
544
- function decompileComplexFusedRing(fusedRing, indent, nextVar) {
644
+ function decompileComplexFusedRing(fusedRing, indent, nextVar, verbose = true) {
545
645
  const lines = [];
546
646
  const sequentialRings = fusedRing.metaSequentialRings || [];
547
647
  const seqAtomAttachments = fusedRing.metaSeqAtomAttachments || new Map();
@@ -549,17 +649,45 @@ function decompileComplexFusedRing(fusedRing, indent, nextVar) {
549
649
  // Step 1: Decompile the base fused ring
550
650
  const ringVars = [];
551
651
  fusedRing.rings.forEach((ring) => {
552
- const { optionsStr } = buildRingOptions(ring, { includeBranchDepths: true });
553
652
  const varName = nextVar();
554
- lines.push(`${indent}const ${varName} = Ring({ ${optionsStr} });`);
555
653
 
556
- const subResult = generateSubstitutionCode(ring, indent, nextVar, varName);
557
- lines.push(...subResult.lines);
654
+ if (!verbose && !ring.metaLeadingBond) {
655
+ const baseSmiles = buildSMILES(createRingNode(
656
+ ring.atoms,
657
+ ring.size,
658
+ ring.ringNumber,
659
+ ring.offset,
660
+ {},
661
+ {},
662
+ ring.bonds,
663
+ ring.metaBranchDepths,
664
+ ));
665
+ lines.push(`${indent}const ${varName} = Fragment('${baseSmiles}');`);
666
+
667
+ let currentVar = varName;
668
+ if (Object.keys(ring.substitutions || {}).length > 0) {
669
+ const subsSmiles = getRingWithSubsSmiles(ring);
670
+ const newVar = nextVar();
671
+ lines.push(`${indent}const ${newVar} = Fragment('${subsSmiles}');`);
672
+ currentVar = newVar;
673
+ }
558
674
 
559
- const attResult = generateAttachmentCode(ring, indent, nextVar, subResult.currentVar);
560
- lines.push(...attResult.lines);
675
+ const attResult = generateAttachmentCode(ring, indent, nextVar, currentVar, verbose);
676
+ lines.push(...attResult.lines);
677
+
678
+ ringVars.push({ var: attResult.currentVar, ring });
679
+ } else {
680
+ const { optionsStr } = buildRingOptions(ring, { includeBranchDepths: true });
681
+ lines.push(`${indent}const ${varName} = Ring({ ${optionsStr} });`);
682
+
683
+ const subResult = generateSubstitutionCode(ring, indent, nextVar, varName);
684
+ lines.push(...subResult.lines);
685
+
686
+ const attResult = generateAttachmentCode(ring, indent, nextVar, subResult.currentVar, verbose);
687
+ lines.push(...attResult.lines);
561
688
 
562
- ringVars.push({ var: attResult.currentVar, ring });
689
+ ringVars.push({ var: attResult.currentVar, ring });
690
+ }
563
691
  });
564
692
 
565
693
  // Decompile seqAtomAttachments BEFORE fuse so their vars are declared early
@@ -569,7 +697,7 @@ function decompileComplexFusedRing(fusedRing, indent, nextVar) {
569
697
  seqAtomAttachments.forEach((attachments, pos) => {
570
698
  const attVars = [];
571
699
  attachments.forEach((att) => {
572
- const attResult = decompileChildNode(att, indent, nextVar);
700
+ const attResult = decompileChildNode(att, indent, nextVar, verbose);
573
701
  lines.push(attResult.code);
574
702
  attVars.push(attResult.finalVar);
575
703
  });
@@ -691,17 +819,45 @@ function decompileComplexFusedRing(fusedRing, indent, nextVar) {
691
819
  // Decompile sequential rings
692
820
  const seqRingVars = [];
693
821
  sequentialRings.forEach((ring) => {
694
- const { optionsStr } = buildRingOptions(ring, { includeBranchDepths: true });
695
822
  const varName = nextVar();
696
- lines.push(`${indent}const ${varName} = Ring({ ${optionsStr} });`);
697
823
 
698
- const subResult = generateSubstitutionCode(ring, indent, nextVar, varName);
699
- lines.push(...subResult.lines);
824
+ if (!verbose && !ring.metaLeadingBond) {
825
+ const baseSmiles = buildSMILES(createRingNode(
826
+ ring.atoms,
827
+ ring.size,
828
+ ring.ringNumber,
829
+ ring.offset,
830
+ {},
831
+ {},
832
+ ring.bonds,
833
+ ring.metaBranchDepths,
834
+ ));
835
+ lines.push(`${indent}const ${varName} = Fragment('${baseSmiles}');`);
836
+
837
+ let currentVar = varName;
838
+ if (Object.keys(ring.substitutions || {}).length > 0) {
839
+ const subsSmiles = getRingWithSubsSmiles(ring);
840
+ const newVar = nextVar();
841
+ lines.push(`${indent}const ${newVar} = Fragment('${subsSmiles}');`);
842
+ currentVar = newVar;
843
+ }
700
844
 
701
- const attResult = generateAttachmentCode(ring, indent, nextVar, subResult.currentVar);
702
- lines.push(...attResult.lines);
845
+ const attResult = generateAttachmentCode(ring, indent, nextVar, currentVar, verbose);
846
+ lines.push(...attResult.lines);
847
+
848
+ seqRingVars.push(attResult.currentVar);
849
+ } else {
850
+ const { optionsStr } = buildRingOptions(ring, { includeBranchDepths: true });
851
+ lines.push(`${indent}const ${varName} = Ring({ ${optionsStr} });`);
703
852
 
704
- seqRingVars.push(attResult.currentVar);
853
+ const subResult = generateSubstitutionCode(ring, indent, nextVar, varName);
854
+ lines.push(...subResult.lines);
855
+
856
+ const attResult = generateAttachmentCode(ring, indent, nextVar, subResult.currentVar, verbose);
857
+ lines.push(...attResult.lines);
858
+
859
+ seqRingVars.push(attResult.currentVar);
860
+ }
705
861
  });
706
862
 
707
863
  // Decompile chain atom attachments
@@ -710,7 +866,7 @@ function decompileComplexFusedRing(fusedRing, indent, nextVar) {
710
866
  const attachments = seqAtomAttachments.get(entry.attachmentPos) || [];
711
867
  const attVars = [];
712
868
  attachments.forEach((att) => {
713
- const attResult = decompileChildNode(att, indent, nextVar);
869
+ const attResult = decompileChildNode(att, indent, nextVar, verbose);
714
870
  lines.push(attResult.code);
715
871
  attVars.push(attResult.finalVar);
716
872
  });
@@ -845,24 +1001,24 @@ function decompileComplexFusedRing(fusedRing, indent, nextVar) {
845
1001
  * - Sequential rings or interleaved codegen → preserves metadata (needs it for correct SMILES)
846
1002
  * - Everything else → structural API calls only (no metadata)
847
1003
  */
848
- function decompileFusedRing(fusedRing, indent, nextVar) {
1004
+ function decompileFusedRing(fusedRing, indent, nextVar, verbose = true) {
849
1005
  const seqRings = fusedRing.metaSequentialRings;
850
1006
  const hasSeqRings = seqRings && seqRings.length > 0;
851
1007
  const isInterleaved = needsInterleavedCodegen(fusedRing);
852
1008
 
853
1009
  // Use complex decompilation for sequential rings or genuinely interleaved fused rings
854
1010
  if (hasSeqRings || isInterleaved) {
855
- return decompileComplexFusedRing(fusedRing, indent, nextVar);
1011
+ return decompileComplexFusedRing(fusedRing, indent, nextVar, verbose);
856
1012
  }
857
1013
 
858
1014
  // Everything else goes through the simple path (no metadata)
859
- return decompileSimpleFusedRing(fusedRing, indent, nextVar);
1015
+ return decompileSimpleFusedRing(fusedRing, indent, nextVar, verbose);
860
1016
  }
861
1017
 
862
1018
  /**
863
1019
  * Decompile a Molecule node
864
1020
  */
865
- function decompileMolecule(molecule, indent, nextVar) {
1021
+ function decompileMolecule(molecule, indent, nextVar, verbose = true) {
866
1022
  const lines = [];
867
1023
  const { components } = molecule;
868
1024
 
@@ -878,7 +1034,7 @@ function decompileMolecule(molecule, indent, nextVar) {
878
1034
  const componentFinalVars = [];
879
1035
  components.forEach((component) => {
880
1036
  // eslint-disable-next-line no-use-before-define
881
- const { code: componentCode, finalVar } = decompileNode(component, indent, nextVar);
1037
+ const { code: componentCode, finalVar } = decompileNode(component, indent, nextVar, verbose);
882
1038
  lines.push(componentCode);
883
1039
  // Note: metaLeadingBond is now handled in component constructors via metadata
884
1040
  // or leadingBond option, so no mutation needed here
@@ -893,21 +1049,21 @@ function decompileMolecule(molecule, indent, nextVar) {
893
1049
  return { code: lines.join('\n'), finalVar: finalVarName };
894
1050
  }
895
1051
 
896
- function decompileNode(node, indent, nextVar) {
1052
+ function decompileNode(node, indent, nextVar, verbose = true) {
897
1053
  if (isRingNode(node)) {
898
- return decompileRing(node, indent, nextVar);
1054
+ return decompileRing(node, indent, nextVar, verbose);
899
1055
  }
900
1056
 
901
1057
  if (isLinearNode(node)) {
902
- return decompileLinear(node, indent, nextVar);
1058
+ return decompileLinear(node, indent, nextVar, verbose);
903
1059
  }
904
1060
 
905
1061
  if (isFusedRingNode(node)) {
906
- return decompileFusedRing(node, indent, nextVar);
1062
+ return decompileFusedRing(node, indent, nextVar, verbose);
907
1063
  }
908
1064
 
909
1065
  if (isMoleculeNode(node)) {
910
- return decompileMolecule(node, indent, nextVar);
1066
+ return decompileMolecule(node, indent, nextVar, verbose);
911
1067
  }
912
1068
 
913
1069
  throw new Error(`Unknown node type: ${node.type}`);
@@ -919,15 +1075,19 @@ function decompileNode(node, indent, nextVar) {
919
1075
  * @param {Object} options - Options
920
1076
  * @param {number} options.indent - Indentation level (default 0)
921
1077
  * @param {string} options.varName - Variable name prefix (default 'v')
1078
+ * @param {boolean} options.verbose - Use verbose constructor syntax (default false).
1079
+ * When false, uses Fragment('SMILES') for Ring and simple Linear nodes.
922
1080
  * @param {boolean} options.includeMetadata - Include metadata assignments
923
1081
  * (default true). Set to false for cleaner output (but code may not work)
924
1082
  */
925
1083
  export function decompile(node, options = {}) {
926
- const { indent = 0, varName = 'v', includeMetadata = true } = options;
1084
+ const {
1085
+ indent = 0, varName = 'v', includeMetadata = true, verbose = false,
1086
+ } = options;
927
1087
  const indentStr = ' '.repeat(indent);
928
1088
  const nextVar = createCounter(varName);
929
1089
 
930
- const { code } = decompileNode(node, indentStr, nextVar);
1090
+ const { code } = decompileNode(node, indentStr, nextVar, verbose);
931
1091
 
932
1092
  // Always use export for declarations
933
1093
  let result = code.replace(/^(\s*)(const|let) /gm, '$1export $2 ');