smiles-js 2.0.3 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/API.md +162 -0
  2. package/README.md +39 -0
  3. package/docs/MIRROR_PLAN.md +204 -0
  4. package/docs/smiles.peggy +215 -0
  5. package/package.json +1 -1
  6. package/scripts/coverage-summary.js +1 -1
  7. package/src/codegen/branch-crossing-ring.js +27 -6
  8. package/src/codegen/interleaved-fused-ring.js +24 -0
  9. package/src/decompiler.js +236 -51
  10. package/src/decompiler.test.js +232 -60
  11. package/src/fragment.test.js +7 -2
  12. package/src/manipulation.js +409 -4
  13. package/src/manipulation.test.js +359 -1
  14. package/src/method-attachers.js +37 -8
  15. package/src/node-creators.js +7 -0
  16. package/src/parser/ast-builder.js +23 -8
  17. package/src/parser/ring-group-builder.js +14 -2
  18. package/src/parser/ring-utils.js +28 -0
  19. package/test-integration/__snapshots__/acetaminophen.test.js.snap +20 -0
  20. package/test-integration/__snapshots__/adjuvant-analgesics.test.js.snap +63 -1
  21. package/test-integration/__snapshots__/cholesterol-drugs.test.js.snap +437 -0
  22. package/test-integration/__snapshots__/dexamethasone.test.js.snap +31 -0
  23. package/test-integration/__snapshots__/endocannabinoids.test.js.snap +79 -2
  24. package/test-integration/__snapshots__/endogenous-opioids.test.js.snap +1116 -0
  25. package/test-integration/__snapshots__/hypertension-medication.test.js.snap +70 -1
  26. package/test-integration/__snapshots__/local-anesthetics.test.js.snap +97 -0
  27. package/test-integration/__snapshots__/nsaids-otc.test.js.snap +61 -1
  28. package/test-integration/__snapshots__/nsaids-prescription.test.js.snap +115 -2
  29. package/test-integration/__snapshots__/opioids.test.js.snap +113 -4
  30. package/test-integration/__snapshots__/steroids.test.js.snap +381 -2
  31. package/test-integration/acetaminophen.test.js +15 -3
  32. package/test-integration/adjuvant-analgesics.test.js +43 -7
  33. package/test-integration/cholesterol-drugs.test.js +127 -20
  34. package/test-integration/cholesterol.test.js +112 -0
  35. package/test-integration/dexamethasone.test.js +8 -2
  36. package/test-integration/endocannabinoids.test.js +48 -12
  37. package/test-integration/endogenous-opioids.smiles.js +32 -0
  38. package/test-integration/endogenous-opioids.test.js +192 -0
  39. package/test-integration/hypertension-medication.test.js +32 -8
  40. package/test-integration/local-anesthetics.smiles.js +33 -0
  41. package/test-integration/local-anesthetics.test.js +64 -16
  42. package/test-integration/mirror.test.js +151 -0
  43. package/test-integration/nsaids-otc.test.js +40 -10
  44. package/test-integration/nsaids-prescription.test.js +72 -18
  45. package/test-integration/opioids.test.js +56 -14
  46. package/test-integration/polymer.test.js +148 -0
  47. package/test-integration/steroids.test.js +112 -28
  48. package/test-integration/utils.js +4 -2
  49. package/todo +2 -3
package/API.md CHANGED
@@ -190,6 +190,70 @@ Fuse this ring with another ring. `offset` is how many positions into this ring
190
190
 
191
191
  Return a deep copy of the ring.
192
192
 
193
+ #### `ring.repeat(n, leftId, rightId)`
194
+
195
+ Repeat the ring `n` times to build polymer chains. Each copy gets unique ring numbers automatically.
196
+
197
+ | Parameter | Type | Description |
198
+ |-----------|------|-------------|
199
+ | `n` | `number` | Number of repeating units (>= 1) |
200
+ | `leftId` | `number` | 1-indexed left (incoming) attachment point |
201
+ | `rightId` | `number` | 1-indexed right (outgoing) attachment point |
202
+
203
+ ```javascript
204
+ // Biphenyl (two linked benzene rings)
205
+ const benzene = Ring({ atoms: 'c', size: 6 });
206
+ const biphenyl = benzene.repeat(2, 1, 6);
207
+ console.log(biphenyl.smiles); // c1ccccc1c2ccccc2
208
+
209
+ // Terphenyl (three linked benzene rings)
210
+ const terphenyl = benzene.repeat(3, 1, 6);
211
+ console.log(terphenyl.smiles); // c1ccccc1c2ccccc2c3ccccc3
212
+ ```
213
+
214
+ #### `ring.fusedRepeat(n, offset)`
215
+
216
+ Repeat a ring `n` times by fusing, creating acene-like edge-sharing systems (naphthalene, anthracene, tetracene).
217
+
218
+ | Parameter | Type | Description |
219
+ |-----------|------|-------------|
220
+ | `n` | `number` | Total number of rings (>= 1) |
221
+ | `offset` | `number` | Fusion offset (number of shared atom positions between adjacent rings) |
222
+
223
+ ```javascript
224
+ const benzene = Ring({ atoms: 'c', size: 6 });
225
+
226
+ // Naphthalene (2 fused rings)
227
+ const naphthalene = benzene.fusedRepeat(2, 4);
228
+
229
+ // Anthracene (3 fused rings)
230
+ const anthracene = benzene.fusedRepeat(3, 4);
231
+
232
+ // Tetracene (4 fused rings)
233
+ const tetracene = benzene.fusedRepeat(4, 4);
234
+ ```
235
+
236
+ #### `ring.mirror(pivotId?)`
237
+
238
+ Mirror a ring's attachments and substitutions to create symmetric patterns. The pivot defines the axis of symmetry on the ring.
239
+
240
+ | Parameter | Type | Default | Description |
241
+ |-----------|------|---------|-------------|
242
+ | `pivotId` | `number` | `1` | 1-indexed ring position that serves as the symmetry axis |
243
+
244
+ ```javascript
245
+ const benzene = Ring({ atoms: 'c', size: 6 });
246
+
247
+ // meta-dimethylbenzene: attach at 2, mirror around pivot 3
248
+ const mono = benzene.attach(2, Linear(['C']));
249
+ const meta = mono.mirror(3);
250
+ console.log(meta.smiles); // c1c(C)cc(C)cc1
251
+
252
+ // Symmetric nitrogen substitution
253
+ const pyridine = Ring({ atoms: 'c', size: 6, substitutions: { 2: 'n' } });
254
+ const diazine = pyridine.mirror(1); // n at 2 and 6
255
+ ```
256
+
193
257
  ### Linear Methods
194
258
 
195
259
  ```javascript
@@ -225,6 +289,53 @@ Attach one or more branches at a position.
225
289
 
226
290
  Attach branches at multiple positions. `branchMap` is `{ position: node | [nodes] }`.
227
291
 
292
+ #### `linear.repeat(n, leftId, rightId)`
293
+
294
+ Repeat the linear chain `n` times to build polymer chains.
295
+
296
+ | Parameter | Type | Description |
297
+ |-----------|------|-------------|
298
+ | `n` | `number` | Number of repeating units (>= 1) |
299
+ | `leftId` | `number` | 1-indexed left (incoming) attachment point |
300
+ | `rightId` | `number` | 1-indexed right (outgoing) attachment point |
301
+
302
+ ```javascript
303
+ // Polyethylene trimer
304
+ const ethylene = Linear(['C', 'C']);
305
+ const PE = ethylene.repeat(3, 1, 2);
306
+ console.log(PE.smiles); // CCCCCC
307
+
308
+ // Polystyrene dimer
309
+ const styrene = Linear(['C', 'C']).attach(2, Ring({ atoms: 'c', size: 6 }));
310
+ const PS = styrene.repeat(2, 1, 2);
311
+ console.log(PS.smiles); // CC(c1ccccc1)CC(c2ccccc2)
312
+ ```
313
+
314
+ #### `linear.mirror(pivotId?)`
315
+
316
+ Mirror a linear chain around a pivot atom to create palindromic (A-B-A) patterns. The pivot atom appears once at the center.
317
+
318
+ | Parameter | Type | Default | Description |
319
+ |-----------|------|---------|-------------|
320
+ | `pivotId` | `number` | `atoms.length` | 1-indexed pivot position (center of symmetry) |
321
+
322
+ ```javascript
323
+ // Diethyl ether: mirror ethanol around the oxygen
324
+ const chain = Linear(['C', 'C', 'O']);
325
+ const ether = chain.mirror(); // pivot defaults to last atom
326
+ console.log(ether.smiles); // CCOCC
327
+
328
+ // Symmetric alkene: mirror C=C around position 2
329
+ const vinyl = Linear(['C', 'C'], ['=']);
330
+ const symAlkene = vinyl.mirror(2);
331
+ console.log(symAlkene.smiles); // C=CC
332
+
333
+ // With attachments: phenyl pendants are mirrored too
334
+ const base = Linear(['C', 'C', 'C']).attach(1, Ring({ atoms: 'c', size: 6 }));
335
+ const mirrored = base.mirror();
336
+ console.log(mirrored.smiles); // C(c1ccccc1)CCC(c2ccccc2)C
337
+ ```
338
+
228
339
  ### Molecule Methods
229
340
 
230
341
  ```javascript
@@ -244,6 +355,41 @@ const modified = mol.replaceComponent(0, Linear(['N', 'N']));
244
355
 
245
356
  // Concatenate molecules
246
357
  const combined = mol.concat(Molecule([Ring({ atoms: 'c', size: 6 })]));
358
+
359
+ // Repeat the molecule as a polymer unit
360
+ const unit = Molecule([Linear(['C']), Ring({ atoms: 'c', size: 6 })]);
361
+ const dimer = unit.repeat(2, 1, 1);
362
+ console.log(dimer.smiles); // Cc1ccccc1Cc2ccccc2
363
+
364
+ // Mirror molecule for ABA patterns
365
+ const A = Linear(['C', 'C']);
366
+ const B = Ring({ atoms: 'c', size: 6 });
367
+ const AB = Molecule([A, B]);
368
+ const ABA = AB.mirror(); // pivot defaults to last component
369
+ console.log(ABA.smiles); // CCc1ccccc1CC
370
+ ```
371
+
372
+ #### `molecule.mirror(pivotComponent?)`
373
+
374
+ Mirror a molecule's component sequence to create ABA or ABCBA patterns. The pivot component appears once at the center.
375
+
376
+ | Parameter | Type | Default | Description |
377
+ |-----------|------|---------|-------------|
378
+ | `pivotComponent` | `number` | `components.length - 1` | 0-indexed pivot component (center of symmetry) |
379
+
380
+ ```javascript
381
+ // ABA triblock: Linear-Ring-Linear
382
+ const A = Linear(['C', 'C']);
383
+ const B = Ring({ atoms: 'c', size: 6 });
384
+ const AB = Molecule([A, B]);
385
+ const ABA = AB.mirror();
386
+ console.log(ABA.smiles); // CCc1ccccc1CC
387
+
388
+ // ABCBA pentablock
389
+ const C = Linear(['N']);
390
+ const ABC = Molecule([A, B, C]);
391
+ const ABCBA = ABC.mirror();
392
+ console.log(ABCBA.smiles); // CCc1ccccc1NCCc2ccccc2CC... (mirrored)
247
393
  ```
248
394
 
249
395
  ### FusedRing Methods
@@ -276,6 +422,9 @@ const withSeq = fused.addSequentialRings([{ ring: ring3, depth: 1 }, { ring: rin
276
422
 
277
423
  // Add attachment to a sequential atom position
278
424
  const withAtt = fused.addSequentialAtomAttachment(25, Linear(['O']));
425
+
426
+ // Repeat the fused ring system as a polymer unit
427
+ const fusedDimer = fused.repeat(2, 1, 1);
279
428
  ```
280
429
 
281
430
  #### `fusedRing.addSequentialRings(rings, options?)`
@@ -431,10 +580,23 @@ import {
431
580
  moleculeConcat,
432
581
  moleculeGetComponent,
433
582
  moleculeReplaceComponent,
583
+ repeat,
584
+ fusedRepeat,
585
+ linearMirror,
586
+ moleculeMirror,
587
+ ringMirror,
434
588
  } from 'smiles-js/manipulation';
435
589
 
436
590
  const benzene = Ring({ atoms: 'c', size: 6 });
437
591
  const toluene = ringAttach(benzene, 1, Linear(['C']));
592
+
593
+ // Polymer construction
594
+ const biphenyl = repeat(benzene, 2, 1, 6);
595
+ const naphthalene = fusedRepeat(benzene, 2, 4);
596
+
597
+ // Mirror symmetry
598
+ const chain = Linear(['C', 'C', 'O']);
599
+ const ether = linearMirror(chain); // CCOCC
438
600
  ```
439
601
 
440
602
  ---
package/README.md CHANGED
@@ -16,6 +16,8 @@ Build complex molecules programmatically with an intuitive, composable API. Pars
16
16
 
17
17
  - **Parse complex SMILES** - Handles real-world pharmaceutical molecules (60-80+ characters)
18
18
  - **Programmatic construction** - Build molecules using composable Ring, Linear, and Molecule constructors
19
+ - **Polymer construction** - Build repeating units with `.repeat()` and fused acene systems with `.fusedRepeat()`
20
+ - **Mirror symmetry** - Create palindromic chains and ABA block patterns with `.mirror()`
19
21
  - **Round-trip fidelity** - Parse SMILES -> AST -> SMILES with structure preservation
20
22
  - **Code generation** - Auto-generate JavaScript construction code from SMILES strings
21
23
  - **Pharmaceutical validated** - Tested with Atorvastatin, Sildenafil, Ritonavir, and 30+ other drugs
@@ -85,6 +87,43 @@ const pyridine = benzene.substitute(5, 'n');
85
87
  console.log(pyridine.smiles); // c1cccnc1
86
88
  ```
87
89
 
90
+ ### Build Polymers
91
+
92
+ ```javascript
93
+ import { Ring, Linear } from 'smiles-js';
94
+
95
+ // Polyethylene trimer: repeat ethylene unit 3 times
96
+ const ethylene = Linear(['C', 'C']);
97
+ const PE = ethylene.repeat(3, 1, 2);
98
+ console.log(PE.smiles); // CCCCCC
99
+
100
+ // Polystyrene dimer: repeat styrene unit with phenyl branch
101
+ const styrene = Linear(['C', 'C']).attach(2, Ring({ atoms: 'c', size: 6 }));
102
+ const PS = styrene.repeat(2, 1, 2);
103
+ console.log(PS.smiles); // CC(c1ccccc1)CC(c2ccccc2)
104
+
105
+ // Acene series via fused repeat
106
+ const benzene = Ring({ atoms: 'c', size: 6 });
107
+ const naphthalene = benzene.fusedRepeat(2, 4); // 2 fused rings
108
+ const anthracene = benzene.fusedRepeat(3, 4); // 3 fused rings
109
+ ```
110
+
111
+ ### Mirror Symmetry
112
+
113
+ ```javascript
114
+ import { Ring, Linear, Molecule } from 'smiles-js';
115
+
116
+ // Diethyl ether: mirror C-C-O around oxygen
117
+ const ether = Linear(['C', 'C', 'O']).mirror();
118
+ console.log(ether.smiles); // CCOCC
119
+
120
+ // ABA triblock copolymer
121
+ const A = Linear(['C', 'C']);
122
+ const B = Ring({ atoms: 'c', size: 6 });
123
+ const ABA = Molecule([A, B]).mirror();
124
+ console.log(ABA.smiles); // CCc1ccccc1CC
125
+ ```
126
+
88
127
  ### Generate Construction Code
89
128
 
90
129
  ```javascript
@@ -0,0 +1,204 @@
1
+ # Plan: `.mirror()` API for Symmetric Molecule Construction
2
+
3
+ ## Motivation
4
+
5
+ Many molecules and polymers are symmetric — their structure reads the same forwards and backwards around a central point. Building these today requires manually constructing both halves. A `.mirror()` method would let users define one half and automatically produce the symmetric whole.
6
+
7
+ ### Use Cases
8
+
9
+ 1. **ABA triblock copolymers** — The most common symmetric polymer architecture. Define the A and B blocks, get A-B-A automatically.
10
+ 2. **Palindromic linear chains** — e.g., `CCCOCCC` from `CCCO` mirrored at the O.
11
+ 3. **Symmetric branched molecules** — e.g., diethyl ether `CCOCC` from `CCO` mirrored.
12
+ 4. **Symmetric ring-bearing chains** — e.g., a chain with a ring in the center and identical arms on each side.
13
+ 5. **Dendrimers** — Symmetric branching structures built outward from a core.
14
+
15
+ ---
16
+
17
+ ## API Design
18
+
19
+ ### `linear.mirror(pivotId?)`
20
+
21
+ Mirror a linear chain around a pivot atom to create a palindromic structure.
22
+
23
+ | Parameter | Type | Default | Description |
24
+ |-----------|------|---------|-------------|
25
+ | `pivotId` | `number` | `atoms.length` | 1-indexed position of the pivot atom (included once in the output) |
26
+
27
+ **Returns:** A new `Linear` (for simple chains) or `Molecule` (if attachments are present).
28
+
29
+ **Behavior:** Takes atoms `[1..pivotId]`, then appends atoms `[pivotId-1..1]` in reverse. The pivot atom appears once in the center. Attachments on mirrored atoms are also mirrored.
30
+
31
+ ```javascript
32
+ // Diethyl ether: CCO + mirror → CCOCC
33
+ const half = Linear(['C', 'C', 'O']);
34
+ const ether = half.mirror(); // pivot defaults to last atom (O)
35
+ console.log(ether.smiles); // CCOCC
36
+
37
+ // Palindromic chain: CCCNCCC
38
+ const half2 = Linear(['C', 'C', 'C', 'N']);
39
+ const palindrome = half2.mirror(); // pivot at N (position 4)
40
+ console.log(palindrome.smiles); // CCCNCCC
41
+
42
+ // Mirror at a specific pivot
43
+ const chain = Linear(['C', 'C', 'C', 'O', 'C']);
44
+ const mirrored = chain.mirror(4); // pivot at O (position 4)
45
+ console.log(mirrored.smiles); // CCCOCCC
46
+ // atoms [1,2,3,4] + reverse of [3,2,1] → C,C,C,O,C,C,C
47
+ ```
48
+
49
+ ### `ring.mirror(pivotId?)`
50
+
51
+ Mirror a ring's attachments to create symmetric substitution patterns.
52
+
53
+ | Parameter | Type | Default | Description |
54
+ |-----------|------|---------|-------------|
55
+ | `pivotId` | `number` | `1` | 1-indexed ring position that serves as the symmetry axis |
56
+
57
+ **Returns:** A new `Ring` with attachments mirrored around the pivot.
58
+
59
+ **Behavior:** For a ring with an attachment at position `p`, also adds the same attachment at the "mirror" position relative to the pivot. The mirror position for `p` around pivot `v` on a ring of size `s` is: `((2*v - p) mod s)`, adjusted to 1-indexed range.
60
+
61
+ ```javascript
62
+ // Symmetric toluene → xylene (1,4-dimethylbenzene)
63
+ const benzene = Ring({ atoms: 'c', size: 6 });
64
+ const toluene = benzene.attach(1, Linear(['C']));
65
+ const xylene = toluene.mirror(1); // mirror around position 1
66
+ // Attachment at position 1 stays, mirrored attachment appears at position 4
67
+ console.log(xylene.smiles); // c1(C)ccc(C)cc1
68
+ ```
69
+
70
+ ### `molecule.mirror(pivotComponent?)`
71
+
72
+ Mirror a molecule's component sequence to create an ABA-like structure.
73
+
74
+ | Parameter | Type | Default | Description |
75
+ |-----------|------|---------|-------------|
76
+ | `pivotComponent` | `number` | `components.length - 1` | 0-indexed component that serves as the center |
77
+
78
+ **Returns:** A new `Molecule` with components mirrored.
79
+
80
+ **Behavior:** Takes components `[0..pivot]`, then appends components `[pivot-1..0]` in reverse (with ring renumbering to avoid collisions).
81
+
82
+ ```javascript
83
+ // ABA triblock copolymer
84
+ const A = Linear(['C', 'C']);
85
+ const B = Ring({ atoms: 'c', size: 6 });
86
+ const AB = Molecule([A, B]);
87
+ const ABA = AB.mirror(); // pivot at last component (B)
88
+ console.log(ABA.smiles); // CCc1ccccc1CC
89
+
90
+ // More complex: A-B-C-B-A from A-B-C
91
+ const block = Molecule([
92
+ Linear(['C', 'C']),
93
+ Ring({ atoms: 'c', size: 6 }),
94
+ Linear(['O']),
95
+ ]);
96
+ const symmetric = block.mirror(); // pivot at last (O)
97
+ console.log(symmetric.smiles); // CCc1ccccc1Oc2ccccc2CC
98
+ ```
99
+
100
+ ### `fusedRing.mirror()`
101
+
102
+ For fused rings, mirror could create a symmetric fused system by adding rings on both sides of the base. This is more complex and may be deferred to a later iteration.
103
+
104
+ ---
105
+
106
+ ## Functional API
107
+
108
+ ```javascript
109
+ import {
110
+ linearMirror,
111
+ ringMirror,
112
+ moleculeMirror,
113
+ } from 'smiles-js/manipulation';
114
+
115
+ const half = Linear(['C', 'C', 'O']);
116
+ const ether = linearMirror(half); // CCOCC
117
+ ```
118
+
119
+ ---
120
+
121
+ ## Implementation Strategy
122
+
123
+ ### Phase 1: `linear.mirror(pivotId?)`
124
+
125
+ The simplest and highest-value case. Pure atom/bond array manipulation.
126
+
127
+ **Algorithm:**
128
+ 1. Validate `pivotId` is within `[1, atoms.length]`.
129
+ 2. Take `leftAtoms = atoms[0..pivotId-1]` (the left half including pivot).
130
+ 3. Take `rightAtoms = atoms[0..pivotId-2].reverse()` (the left half excluding pivot, reversed).
131
+ 4. Concatenate: `leftAtoms + rightAtoms`.
132
+ 5. Handle bonds: mirror the bond array similarly. Bond between atoms `i` and `i+1` maps to the corresponding mirrored position.
133
+ 6. Handle attachments: clone attachments from mirrored positions. Attachments at position `p` (where `p < pivotId`) also appear at the mirror position `2*pivotId - p`.
134
+
135
+ **Files:**
136
+ - `src/manipulation.js` — Add `linearMirror()` function
137
+ - `src/method-attachers.js` — Attach `.mirror()` to Linear
138
+
139
+ ### Phase 2: `molecule.mirror(pivotComponent?)`
140
+
141
+ Component-level mirroring for ABA block copolymers.
142
+
143
+ **Algorithm:**
144
+ 1. Take `leftComponents = components[0..pivot]`.
145
+ 2. Take `rightComponents = components[0..pivot-1].reverse()`, each deep-cloned with shifted ring numbers.
146
+ 3. Return `Molecule([...leftComponents, ...rightComponents])`.
147
+
148
+ **Files:**
149
+ - `src/manipulation.js` — Add `moleculeMirror()` function (reuses `shiftRingNumbers` and `maxRingNumber` from the `repeat` implementation)
150
+ - `src/method-attachers.js` — Attach `.mirror()` to Molecule
151
+
152
+ ### Phase 3: `ring.mirror(pivotId?)`
153
+
154
+ Attachment-level mirroring for symmetric ring substitution patterns.
155
+
156
+ **Algorithm:**
157
+ 1. For each attachment at position `p`, compute mirror position `mp = 2*pivotId - p` (mod ring size, 1-indexed).
158
+ 2. If `mp` doesn't already have the attachment, add it.
159
+ 3. Similarly mirror substitutions.
160
+
161
+ **Files:**
162
+ - `src/manipulation.js` — Add `ringMirror()` function
163
+ - `src/method-attachers.js` — Attach `.mirror()` to Ring
164
+
165
+ ### Phase 4 (Future): `fusedRing.mirror()`
166
+
167
+ Defer to a later iteration. FusedRing has complex position metadata that makes mirroring non-trivial.
168
+
169
+ ---
170
+
171
+ ## Edge Cases
172
+
173
+ 1. **Odd-length palindrome:** `mirror()` on `Linear(['C', 'O', 'C'])` with `pivotId=2` → `COCOC` (the pivot O appears once)
174
+ 2. **Single atom:** `Linear(['O']).mirror()` → `Linear(['O'])` (nothing to mirror)
175
+ 3. **Already symmetric:** Mirroring an already-symmetric molecule should produce the same result (idempotent on symmetric inputs)
176
+ 4. **Bonds in mirror:** Double bonds in the left half should appear in the mirrored right half at the corresponding position
177
+ 5. **Attachments with rings:** Mirrored ring attachments need unique ring numbers (reuse `shiftRingNumbers`)
178
+
179
+ ---
180
+
181
+ ## Test Plan
182
+
183
+ ### Unit Tests (`manipulation.test.js`)
184
+ - `linear.mirror()` — simple chain, with pivot, with bonds, with attachments
185
+ - `molecule.mirror()` — ABA, ABCBA, with rings
186
+ - `ring.mirror()` — symmetric substitutions, symmetric attachments
187
+ - Edge cases: n=1, already symmetric, single atom
188
+
189
+ ### Integration Tests (`test-integration/mirror.test.js`)
190
+ - Diethyl ether: `CCO.mirror()` → `CCOCC`
191
+ - ABA block copolymer with polystyrene/polyethylene blocks
192
+ - Symmetric biphenyl-bridged molecule
193
+ - Palindromic peptide-like chain
194
+
195
+ ---
196
+
197
+ ## Relationship to `.repeat()`
198
+
199
+ `.mirror()` and `.repeat()` are complementary:
200
+ - `.repeat(n, left, right)` — Produces **A-A-A-A** (homopolymer)
201
+ - `.mirror()` — Produces **A-B-A** (symmetric structure)
202
+ - Combined: `.repeat(2).mirror()` could produce **A-A-B-A-A** (repeated then mirrored)
203
+
204
+ Together they cover the main polymer architectures: homopolymer, block copolymer, and symmetric block copolymer.
@@ -0,0 +1,215 @@
1
+ // SMILES Grammar for Peggy (PEG parser generator for JavaScript)
2
+ //
3
+ // This grammar mirrors the tokenizer + buildAtomList two-pass architecture
4
+ // in src/tokenizer.js and src/parser/smiles-parser-core.js.
5
+ //
6
+ // What this grammar produces:
7
+ // A concrete parse tree (token stream with structure). The codebase then
8
+ // does a second semantic pass (buildAST) to detect rings, fused rings,
9
+ // and attachments — that pass is stateful and cannot be expressed in PEG.
10
+ //
11
+ // Usage:
12
+ // npx peggy smiles.peggy # generates smiles.js
13
+ // npx peggy --format commonjs -o smiles.cjs smiles.peggy # CommonJS
14
+ //
15
+ // Differences from OpenSMILES spec (matches codebase behavior):
16
+ // - Simple atoms accept ANY [A-Za-z] letter, not just the organic subset.
17
+ // The tokenizer (isAtomStart + parseSimpleAtom) only special-cases Br/Cl
18
+ // as two-letter atoms; everything else is a single letter.
19
+ // - Bracketed atoms capture raw content only (the codebase has a TODO for
20
+ // full isotope/chirality/hcount/charge/class parsing). The grammar DOES
21
+ // parse the sub-fields since it's trivial in PEG and useful for consumers.
22
+ // - Whitespace is silently skipped (tokenizer.js line 104).
23
+ // - Ring markers can be preceded by a bond (e.g. C=1CC=1 is legal SMILES).
24
+
25
+ // ============================================================
26
+ // Top-level: dot-separated components
27
+ // Mirrors: tokenizer DOT token → buildAtomList skips DOT
28
+ // ============================================================
29
+ smiles
30
+ = _ head:chain tail:(_ "." _ chain)* _ {
31
+ const components = [head, ...tail.map(t => t[3])];
32
+ if (components.length === 1) return components[0];
33
+ return { type: "molecule", components };
34
+ }
35
+
36
+ // ============================================================
37
+ // Chain: sequence of atom_units with optional bonds between them
38
+ // Mirrors: buildAtomList's linear scan — each ATOM token may be
39
+ // preceded by a BOND token, and followed by RING_MARKER
40
+ // and BRANCH_OPEN/CLOSE tokens.
41
+ // ============================================================
42
+ chain
43
+ = first:atom_unit rest:(_ bond? _ atom_unit)* {
44
+ const atoms = [first];
45
+ const bonds = [null];
46
+ for (const r of rest) {
47
+ bonds.push(r[1]);
48
+ atoms.push(r[3]);
49
+ }
50
+ return { type: "chain", atoms, bonds };
51
+ }
52
+
53
+ // ============================================================
54
+ // Atom unit: atom + ring markers + branches
55
+ //
56
+ // Mirrors the token consumption order in buildAtomList:
57
+ // ATOM → RING_MARKER* → (BRANCH_OPEN chain BRANCH_CLOSE)*
58
+ //
59
+ // Ring markers can carry their own bond (e.g. C=1...=1)
60
+ // This is valid SMILES: the bond on a ring closure describes
61
+ // the bond between the two ring-closure atoms.
62
+ // ============================================================
63
+ atom_unit
64
+ = atom:atom ring_bonds:(bond? ring_marker)* branches:branch* {
65
+ const rings = ring_bonds.map(rb => ({
66
+ bond: rb[0],
67
+ number: rb[1]
68
+ }));
69
+ return { atom, rings, branches };
70
+ }
71
+
72
+ // ============================================================
73
+ // Atoms
74
+ //
75
+ // Two forms, matching tokenizer.js:
76
+ // 1. Bracketed: '[' ... ']' (parseBracketedAtom, line 67)
77
+ // 2. Simple: [A-Za-z*] (parseSimpleAtom, line 39)
78
+ // ============================================================
79
+ atom
80
+ = bracketed_atom
81
+ / simple_atom
82
+
83
+ // ----------------------------------------------------------
84
+ // Bracketed atom: [isotope? element chirality? hcount? charge? class?]
85
+ //
86
+ // The codebase currently stores only { raw } (parseBracketedAtom, line 79).
87
+ // We parse the sub-fields here since PEG makes it easy and consumers
88
+ // can use them. The `raw` field preserves the full bracket text for
89
+ // round-trip fidelity (matching token.value in the codebase).
90
+ // ----------------------------------------------------------
91
+ bracketed_atom
92
+ = "[" content:bracketed_content "]" {
93
+ return { type: "bracket_atom", ...content, raw: text() };
94
+ }
95
+
96
+ bracketed_content
97
+ = isotope:isotope?
98
+ symbol:bracket_element
99
+ chirality:chirality?
100
+ hcount:hcount?
101
+ charge:charge?
102
+ atomClass:atom_class? {
103
+ return { isotope, symbol, chirality, hcount, charge, atomClass };
104
+ }
105
+
106
+ // Isotope: digits before the element symbol
107
+ // e.g. [13C], [2H]
108
+ isotope
109
+ = digits:$[0-9]+ &[A-Za-z*] { return parseInt(digits, 10); }
110
+
111
+ // Element inside brackets — can be aromatic two-letter (se, as),
112
+ // aromatic single-letter, or any standard element symbol, or wildcard
113
+ bracket_element
114
+ = aromatic_element
115
+ / element_symbol
116
+ / "*" { return "*"; }
117
+
118
+ // Standard element symbol: uppercase letter + optional lowercase
119
+ // e.g. C, Na, Fe, Zr
120
+ element_symbol
121
+ = a:$[A-Z] b:$[a-z]? { return b ? a + b : a; }
122
+
123
+ // Aromatic elements inside brackets
124
+ // OpenSMILES: b, c, n, o, p, s, se, as
125
+ aromatic_element
126
+ = "se" { return "se"; }
127
+ / "as" { return "as"; }
128
+ / c:[bcnops] { return c; }
129
+
130
+ // Chirality: @ or @@ (the codebase doesn't parse extended forms like @TH1)
131
+ chirality
132
+ = "@@" { return "@@"; }
133
+ / "@" { return "@"; }
134
+
135
+ // Hydrogen count: H or H<digit>
136
+ // e.g. [NH3+] has hcount=3, [C@H] has hcount=1
137
+ hcount
138
+ = "H" n:$[0-9]? { return n ? parseInt(n, 10) : 1; }
139
+
140
+ // Charge: +, -, +2, -1, ++, --
141
+ // Ordered to try multi-char patterns before single-char
142
+ charge
143
+ = "++" { return 2; }
144
+ / "--" { return -2; }
145
+ / "+" n:$[0-9]+ { return parseInt(n, 10); }
146
+ / "-" n:$[0-9]+ { return -parseInt(n, 10); }
147
+ / "+" { return 1; }
148
+ / "-" { return -1; }
149
+
150
+ // Atom class: :<digits>
151
+ // e.g. [C:1]
152
+ atom_class
153
+ = ":" n:$[0-9]+ { return parseInt(n, 10); }
154
+
155
+ // ----------------------------------------------------------
156
+ // Simple (non-bracketed) atom
157
+ //
158
+ // Mirrors tokenizer.js parseSimpleAtom (line 39) + isAtomStart (line 31):
159
+ // isAtomStart accepts /[A-Za-z*]/
160
+ // parseSimpleAtom checks for two-letter Cl/Br first, then single char
161
+ //
162
+ // The codebase does NOT restrict to the OpenSMILES organic subset —
163
+ // it accepts any letter as a valid atom. This is intentional for
164
+ // permissive parsing. The semantic layer validates later.
165
+ // ----------------------------------------------------------
166
+ simple_atom
167
+ = symbol:simple_atom_symbol { return { type: "simple_atom", symbol }; }
168
+
169
+ simple_atom_symbol
170
+ = "Br" { return "Br"; }
171
+ / "Cl" { return "Cl"; }
172
+ / c:[A-Za-z] { return c; }
173
+ / "*" { return "*"; }
174
+
175
+ // ============================================================
176
+ // Bonds
177
+ //
178
+ // Mirrors: BOND_SYMBOLS in tokenizer.js (line 19)
179
+ // new Set(['-', '=', '#', ':', '/', '\\'])
180
+ // ============================================================
181
+ bond
182
+ = b:[-=#:/\\] { return b; }
183
+
184
+ // ============================================================
185
+ // Ring markers
186
+ //
187
+ // Mirrors: tokenizer.js lines 138-160
188
+ // '%' + two digits → ring number 10-99
189
+ // single digit → ring number 0-9
190
+ // ============================================================
191
+ ring_marker
192
+ = "%" d1:[0-9] d2:[0-9] { return parseInt(d1 + d2, 10); }
193
+ / d:[0-9] { return parseInt(d, 10); }
194
+
195
+ // ============================================================
196
+ // Branches (recursive)
197
+ //
198
+ // Mirrors: BRANCH_OPEN → (bond? chain) → BRANCH_CLOSE
199
+ // in tokenizer.js lines 106-120 and buildAtomList lines 139-153
200
+ //
201
+ // A branch can start with a bond that applies to the first atom
202
+ // of the branch chain (e.g. C(=O) means double bond to O).
203
+ // ============================================================
204
+ branch
205
+ = "(" _ b:bond? _ c:chain _ ")" {
206
+ return { type: "branch", bond: b, chain: c };
207
+ }
208
+
209
+ // ============================================================
210
+ // Whitespace (optional, skipped)
211
+ //
212
+ // Mirrors: tokenizer.js line 104 — /\s/ is skipped
213
+ // ============================================================
214
+ _ "whitespace"
215
+ = [ \t\n\r]*
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smiles-js",
3
- "version": "2.0.3",
3
+ "version": "2.2.0",
4
4
  "description": "A JavaScript library for building molecules using composable fragments",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -9,7 +9,7 @@ const projectRoot = join(__dirname, '..');
9
9
 
10
10
  const result = spawnSync('bun', ['test', '--coverage'], {
11
11
  encoding: 'utf-8',
12
- cwd: projectRoot
12
+ cwd: projectRoot,
13
13
  });
14
14
 
15
15
  const output = (result.stdout || '') + (result.stderr || '');