smiles-js 2.0.3 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/API.md +162 -0
- package/README.md +39 -0
- package/docs/MIRROR_PLAN.md +204 -0
- package/docs/smiles.peggy +215 -0
- package/package.json +1 -1
- package/scripts/coverage-summary.js +1 -1
- package/src/codegen/branch-crossing-ring.js +27 -6
- package/src/codegen/interleaved-fused-ring.js +24 -0
- package/src/decompiler.js +236 -51
- package/src/decompiler.test.js +232 -60
- package/src/fragment.test.js +7 -2
- package/src/manipulation.js +409 -4
- package/src/manipulation.test.js +359 -1
- package/src/method-attachers.js +37 -8
- package/src/node-creators.js +7 -0
- package/src/parser/ast-builder.js +23 -8
- package/src/parser/ring-group-builder.js +14 -2
- package/src/parser/ring-utils.js +28 -0
- package/test-integration/__snapshots__/acetaminophen.test.js.snap +20 -0
- package/test-integration/__snapshots__/adjuvant-analgesics.test.js.snap +63 -1
- package/test-integration/__snapshots__/cholesterol-drugs.test.js.snap +437 -0
- package/test-integration/__snapshots__/dexamethasone.test.js.snap +31 -0
- package/test-integration/__snapshots__/endocannabinoids.test.js.snap +79 -2
- package/test-integration/__snapshots__/endogenous-opioids.test.js.snap +1116 -0
- package/test-integration/__snapshots__/hypertension-medication.test.js.snap +70 -1
- package/test-integration/__snapshots__/local-anesthetics.test.js.snap +97 -0
- package/test-integration/__snapshots__/nsaids-otc.test.js.snap +61 -1
- package/test-integration/__snapshots__/nsaids-prescription.test.js.snap +115 -2
- package/test-integration/__snapshots__/opioids.test.js.snap +113 -4
- package/test-integration/__snapshots__/steroids.test.js.snap +381 -2
- package/test-integration/acetaminophen.test.js +15 -3
- package/test-integration/adjuvant-analgesics.test.js +43 -7
- package/test-integration/cholesterol-drugs.test.js +127 -20
- package/test-integration/cholesterol.test.js +112 -0
- package/test-integration/dexamethasone.test.js +8 -2
- package/test-integration/endocannabinoids.test.js +48 -12
- package/test-integration/endogenous-opioids.smiles.js +32 -0
- package/test-integration/endogenous-opioids.test.js +192 -0
- package/test-integration/hypertension-medication.test.js +32 -8
- package/test-integration/local-anesthetics.smiles.js +33 -0
- package/test-integration/local-anesthetics.test.js +64 -16
- package/test-integration/mirror.test.js +151 -0
- package/test-integration/nsaids-otc.test.js +40 -10
- package/test-integration/nsaids-prescription.test.js +72 -18
- package/test-integration/opioids.test.js +56 -14
- package/test-integration/polymer.test.js +148 -0
- package/test-integration/steroids.test.js +112 -28
- package/test-integration/utils.js +4 -2
- package/todo +2 -3
package/API.md
CHANGED
|
@@ -190,6 +190,70 @@ Fuse this ring with another ring. `offset` is how many positions into this ring
|
|
|
190
190
|
|
|
191
191
|
Return a deep copy of the ring.
|
|
192
192
|
|
|
193
|
+
#### `ring.repeat(n, leftId, rightId)`
|
|
194
|
+
|
|
195
|
+
Repeat the ring `n` times to build polymer chains. Each copy gets unique ring numbers automatically.
|
|
196
|
+
|
|
197
|
+
| Parameter | Type | Description |
|
|
198
|
+
|-----------|------|-------------|
|
|
199
|
+
| `n` | `number` | Number of repeating units (>= 1) |
|
|
200
|
+
| `leftId` | `number` | 1-indexed left (incoming) attachment point |
|
|
201
|
+
| `rightId` | `number` | 1-indexed right (outgoing) attachment point |
|
|
202
|
+
|
|
203
|
+
```javascript
|
|
204
|
+
// Biphenyl (two linked benzene rings)
|
|
205
|
+
const benzene = Ring({ atoms: 'c', size: 6 });
|
|
206
|
+
const biphenyl = benzene.repeat(2, 1, 6);
|
|
207
|
+
console.log(biphenyl.smiles); // c1ccccc1c2ccccc2
|
|
208
|
+
|
|
209
|
+
// Terphenyl (three linked benzene rings)
|
|
210
|
+
const terphenyl = benzene.repeat(3, 1, 6);
|
|
211
|
+
console.log(terphenyl.smiles); // c1ccccc1c2ccccc2c3ccccc3
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
#### `ring.fusedRepeat(n, offset)`
|
|
215
|
+
|
|
216
|
+
Repeat a ring `n` times by fusing, creating acene-like edge-sharing systems (naphthalene, anthracene, tetracene).
|
|
217
|
+
|
|
218
|
+
| Parameter | Type | Description |
|
|
219
|
+
|-----------|------|-------------|
|
|
220
|
+
| `n` | `number` | Total number of rings (>= 1) |
|
|
221
|
+
| `offset` | `number` | Fusion offset (number of shared atom positions between adjacent rings) |
|
|
222
|
+
|
|
223
|
+
```javascript
|
|
224
|
+
const benzene = Ring({ atoms: 'c', size: 6 });
|
|
225
|
+
|
|
226
|
+
// Naphthalene (2 fused rings)
|
|
227
|
+
const naphthalene = benzene.fusedRepeat(2, 4);
|
|
228
|
+
|
|
229
|
+
// Anthracene (3 fused rings)
|
|
230
|
+
const anthracene = benzene.fusedRepeat(3, 4);
|
|
231
|
+
|
|
232
|
+
// Tetracene (4 fused rings)
|
|
233
|
+
const tetracene = benzene.fusedRepeat(4, 4);
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
#### `ring.mirror(pivotId?)`
|
|
237
|
+
|
|
238
|
+
Mirror a ring's attachments and substitutions to create symmetric patterns. The pivot defines the axis of symmetry on the ring.
|
|
239
|
+
|
|
240
|
+
| Parameter | Type | Default | Description |
|
|
241
|
+
|-----------|------|---------|-------------|
|
|
242
|
+
| `pivotId` | `number` | `1` | 1-indexed ring position that serves as the symmetry axis |
|
|
243
|
+
|
|
244
|
+
```javascript
|
|
245
|
+
const benzene = Ring({ atoms: 'c', size: 6 });
|
|
246
|
+
|
|
247
|
+
// meta-dimethylbenzene: attach at 2, mirror around pivot 3
|
|
248
|
+
const mono = benzene.attach(2, Linear(['C']));
|
|
249
|
+
const meta = mono.mirror(3);
|
|
250
|
+
console.log(meta.smiles); // c1c(C)cc(C)cc1
|
|
251
|
+
|
|
252
|
+
// Symmetric nitrogen substitution
|
|
253
|
+
const pyridine = Ring({ atoms: 'c', size: 6, substitutions: { 2: 'n' } });
|
|
254
|
+
const diazine = pyridine.mirror(1); // n at 2 and 6
|
|
255
|
+
```
|
|
256
|
+
|
|
193
257
|
### Linear Methods
|
|
194
258
|
|
|
195
259
|
```javascript
|
|
@@ -225,6 +289,53 @@ Attach one or more branches at a position.
|
|
|
225
289
|
|
|
226
290
|
Attach branches at multiple positions. `branchMap` is `{ position: node | [nodes] }`.
|
|
227
291
|
|
|
292
|
+
#### `linear.repeat(n, leftId, rightId)`
|
|
293
|
+
|
|
294
|
+
Repeat the linear chain `n` times to build polymer chains.
|
|
295
|
+
|
|
296
|
+
| Parameter | Type | Description |
|
|
297
|
+
|-----------|------|-------------|
|
|
298
|
+
| `n` | `number` | Number of repeating units (>= 1) |
|
|
299
|
+
| `leftId` | `number` | 1-indexed left (incoming) attachment point |
|
|
300
|
+
| `rightId` | `number` | 1-indexed right (outgoing) attachment point |
|
|
301
|
+
|
|
302
|
+
```javascript
|
|
303
|
+
// Polyethylene trimer
|
|
304
|
+
const ethylene = Linear(['C', 'C']);
|
|
305
|
+
const PE = ethylene.repeat(3, 1, 2);
|
|
306
|
+
console.log(PE.smiles); // CCCCCC
|
|
307
|
+
|
|
308
|
+
// Polystyrene dimer
|
|
309
|
+
const styrene = Linear(['C', 'C']).attach(2, Ring({ atoms: 'c', size: 6 }));
|
|
310
|
+
const PS = styrene.repeat(2, 1, 2);
|
|
311
|
+
console.log(PS.smiles); // CC(c1ccccc1)CC(c2ccccc2)
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
#### `linear.mirror(pivotId?)`
|
|
315
|
+
|
|
316
|
+
Mirror a linear chain around a pivot atom to create palindromic (A-B-A) patterns. The pivot atom appears once at the center.
|
|
317
|
+
|
|
318
|
+
| Parameter | Type | Default | Description |
|
|
319
|
+
|-----------|------|---------|-------------|
|
|
320
|
+
| `pivotId` | `number` | `atoms.length` | 1-indexed pivot position (center of symmetry) |
|
|
321
|
+
|
|
322
|
+
```javascript
|
|
323
|
+
// Diethyl ether: mirror ethanol around the oxygen
|
|
324
|
+
const chain = Linear(['C', 'C', 'O']);
|
|
325
|
+
const ether = chain.mirror(); // pivot defaults to last atom
|
|
326
|
+
console.log(ether.smiles); // CCOCC
|
|
327
|
+
|
|
328
|
+
// Symmetric alkene: mirror C=C around position 2
|
|
329
|
+
const vinyl = Linear(['C', 'C'], ['=']);
|
|
330
|
+
const symAlkene = vinyl.mirror(2);
|
|
331
|
+
console.log(symAlkene.smiles); // C=CC
|
|
332
|
+
|
|
333
|
+
// With attachments: phenyl pendants are mirrored too
|
|
334
|
+
const base = Linear(['C', 'C', 'C']).attach(1, Ring({ atoms: 'c', size: 6 }));
|
|
335
|
+
const mirrored = base.mirror();
|
|
336
|
+
console.log(mirrored.smiles); // C(c1ccccc1)CCC(c2ccccc2)C
|
|
337
|
+
```
|
|
338
|
+
|
|
228
339
|
### Molecule Methods
|
|
229
340
|
|
|
230
341
|
```javascript
|
|
@@ -244,6 +355,41 @@ const modified = mol.replaceComponent(0, Linear(['N', 'N']));
|
|
|
244
355
|
|
|
245
356
|
// Concatenate molecules
|
|
246
357
|
const combined = mol.concat(Molecule([Ring({ atoms: 'c', size: 6 })]));
|
|
358
|
+
|
|
359
|
+
// Repeat the molecule as a polymer unit
|
|
360
|
+
const unit = Molecule([Linear(['C']), Ring({ atoms: 'c', size: 6 })]);
|
|
361
|
+
const dimer = unit.repeat(2, 1, 1);
|
|
362
|
+
console.log(dimer.smiles); // Cc1ccccc1Cc2ccccc2
|
|
363
|
+
|
|
364
|
+
// Mirror molecule for ABA patterns
|
|
365
|
+
const A = Linear(['C', 'C']);
|
|
366
|
+
const B = Ring({ atoms: 'c', size: 6 });
|
|
367
|
+
const AB = Molecule([A, B]);
|
|
368
|
+
const ABA = AB.mirror(); // pivot defaults to last component
|
|
369
|
+
console.log(ABA.smiles); // CCc1ccccc1CC
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
#### `molecule.mirror(pivotComponent?)`
|
|
373
|
+
|
|
374
|
+
Mirror a molecule's component sequence to create ABA or ABCBA patterns. The pivot component appears once at the center.
|
|
375
|
+
|
|
376
|
+
| Parameter | Type | Default | Description |
|
|
377
|
+
|-----------|------|---------|-------------|
|
|
378
|
+
| `pivotComponent` | `number` | `components.length - 1` | 0-indexed pivot component (center of symmetry) |
|
|
379
|
+
|
|
380
|
+
```javascript
|
|
381
|
+
// ABA triblock: Linear-Ring-Linear
|
|
382
|
+
const A = Linear(['C', 'C']);
|
|
383
|
+
const B = Ring({ atoms: 'c', size: 6 });
|
|
384
|
+
const AB = Molecule([A, B]);
|
|
385
|
+
const ABA = AB.mirror();
|
|
386
|
+
console.log(ABA.smiles); // CCc1ccccc1CC
|
|
387
|
+
|
|
388
|
+
// ABCBA pentablock
|
|
389
|
+
const C = Linear(['N']);
|
|
390
|
+
const ABC = Molecule([A, B, C]);
|
|
391
|
+
const ABCBA = ABC.mirror();
|
|
392
|
+
console.log(ABCBA.smiles); // CCc1ccccc1NCCc2ccccc2CC... (mirrored)
|
|
247
393
|
```
|
|
248
394
|
|
|
249
395
|
### FusedRing Methods
|
|
@@ -276,6 +422,9 @@ const withSeq = fused.addSequentialRings([{ ring: ring3, depth: 1 }, { ring: rin
|
|
|
276
422
|
|
|
277
423
|
// Add attachment to a sequential atom position
|
|
278
424
|
const withAtt = fused.addSequentialAtomAttachment(25, Linear(['O']));
|
|
425
|
+
|
|
426
|
+
// Repeat the fused ring system as a polymer unit
|
|
427
|
+
const fusedDimer = fused.repeat(2, 1, 1);
|
|
279
428
|
```
|
|
280
429
|
|
|
281
430
|
#### `fusedRing.addSequentialRings(rings, options?)`
|
|
@@ -431,10 +580,23 @@ import {
|
|
|
431
580
|
moleculeConcat,
|
|
432
581
|
moleculeGetComponent,
|
|
433
582
|
moleculeReplaceComponent,
|
|
583
|
+
repeat,
|
|
584
|
+
fusedRepeat,
|
|
585
|
+
linearMirror,
|
|
586
|
+
moleculeMirror,
|
|
587
|
+
ringMirror,
|
|
434
588
|
} from 'smiles-js/manipulation';
|
|
435
589
|
|
|
436
590
|
const benzene = Ring({ atoms: 'c', size: 6 });
|
|
437
591
|
const toluene = ringAttach(benzene, 1, Linear(['C']));
|
|
592
|
+
|
|
593
|
+
// Polymer construction
|
|
594
|
+
const biphenyl = repeat(benzene, 2, 1, 6);
|
|
595
|
+
const naphthalene = fusedRepeat(benzene, 2, 4);
|
|
596
|
+
|
|
597
|
+
// Mirror symmetry
|
|
598
|
+
const chain = Linear(['C', 'C', 'O']);
|
|
599
|
+
const ether = linearMirror(chain); // CCOCC
|
|
438
600
|
```
|
|
439
601
|
|
|
440
602
|
---
|
package/README.md
CHANGED
|
@@ -16,6 +16,8 @@ Build complex molecules programmatically with an intuitive, composable API. Pars
|
|
|
16
16
|
|
|
17
17
|
- **Parse complex SMILES** - Handles real-world pharmaceutical molecules (60-80+ characters)
|
|
18
18
|
- **Programmatic construction** - Build molecules using composable Ring, Linear, and Molecule constructors
|
|
19
|
+
- **Polymer construction** - Build repeating units with `.repeat()` and fused acene systems with `.fusedRepeat()`
|
|
20
|
+
- **Mirror symmetry** - Create palindromic chains and ABA block patterns with `.mirror()`
|
|
19
21
|
- **Round-trip fidelity** - Parse SMILES -> AST -> SMILES with structure preservation
|
|
20
22
|
- **Code generation** - Auto-generate JavaScript construction code from SMILES strings
|
|
21
23
|
- **Pharmaceutical validated** - Tested with Atorvastatin, Sildenafil, Ritonavir, and 30+ other drugs
|
|
@@ -85,6 +87,43 @@ const pyridine = benzene.substitute(5, 'n');
|
|
|
85
87
|
console.log(pyridine.smiles); // c1cccnc1
|
|
86
88
|
```
|
|
87
89
|
|
|
90
|
+
### Build Polymers
|
|
91
|
+
|
|
92
|
+
```javascript
|
|
93
|
+
import { Ring, Linear } from 'smiles-js';
|
|
94
|
+
|
|
95
|
+
// Polyethylene trimer: repeat ethylene unit 3 times
|
|
96
|
+
const ethylene = Linear(['C', 'C']);
|
|
97
|
+
const PE = ethylene.repeat(3, 1, 2);
|
|
98
|
+
console.log(PE.smiles); // CCCCCC
|
|
99
|
+
|
|
100
|
+
// Polystyrene dimer: repeat styrene unit with phenyl branch
|
|
101
|
+
const styrene = Linear(['C', 'C']).attach(2, Ring({ atoms: 'c', size: 6 }));
|
|
102
|
+
const PS = styrene.repeat(2, 1, 2);
|
|
103
|
+
console.log(PS.smiles); // CC(c1ccccc1)CC(c2ccccc2)
|
|
104
|
+
|
|
105
|
+
// Acene series via fused repeat
|
|
106
|
+
const benzene = Ring({ atoms: 'c', size: 6 });
|
|
107
|
+
const naphthalene = benzene.fusedRepeat(2, 4); // 2 fused rings
|
|
108
|
+
const anthracene = benzene.fusedRepeat(3, 4); // 3 fused rings
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Mirror Symmetry
|
|
112
|
+
|
|
113
|
+
```javascript
|
|
114
|
+
import { Ring, Linear, Molecule } from 'smiles-js';
|
|
115
|
+
|
|
116
|
+
// Diethyl ether: mirror C-C-O around oxygen
|
|
117
|
+
const ether = Linear(['C', 'C', 'O']).mirror();
|
|
118
|
+
console.log(ether.smiles); // CCOCC
|
|
119
|
+
|
|
120
|
+
// ABA triblock copolymer
|
|
121
|
+
const A = Linear(['C', 'C']);
|
|
122
|
+
const B = Ring({ atoms: 'c', size: 6 });
|
|
123
|
+
const ABA = Molecule([A, B]).mirror();
|
|
124
|
+
console.log(ABA.smiles); // CCc1ccccc1CC
|
|
125
|
+
```
|
|
126
|
+
|
|
88
127
|
### Generate Construction Code
|
|
89
128
|
|
|
90
129
|
```javascript
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# Plan: `.mirror()` API for Symmetric Molecule Construction
|
|
2
|
+
|
|
3
|
+
## Motivation
|
|
4
|
+
|
|
5
|
+
Many molecules and polymers are symmetric — their structure reads the same forwards and backwards around a central point. Building these today requires manually constructing both halves. A `.mirror()` method would let users define one half and automatically produce the symmetric whole.
|
|
6
|
+
|
|
7
|
+
### Use Cases
|
|
8
|
+
|
|
9
|
+
1. **ABA triblock copolymers** — The most common symmetric polymer architecture. Define the A and B blocks, get A-B-A automatically.
|
|
10
|
+
2. **Palindromic linear chains** — e.g., `CCCOCCC` from `CCCO` mirrored at the O.
|
|
11
|
+
3. **Symmetric branched molecules** — e.g., diethyl ether `CCOCC` from `CCO` mirrored.
|
|
12
|
+
4. **Symmetric ring-bearing chains** — e.g., a chain with a ring in the center and identical arms on each side.
|
|
13
|
+
5. **Dendrimers** — Symmetric branching structures built outward from a core.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## API Design
|
|
18
|
+
|
|
19
|
+
### `linear.mirror(pivotId?)`
|
|
20
|
+
|
|
21
|
+
Mirror a linear chain around a pivot atom to create a palindromic structure.
|
|
22
|
+
|
|
23
|
+
| Parameter | Type | Default | Description |
|
|
24
|
+
|-----------|------|---------|-------------|
|
|
25
|
+
| `pivotId` | `number` | `atoms.length` | 1-indexed position of the pivot atom (included once in the output) |
|
|
26
|
+
|
|
27
|
+
**Returns:** A new `Linear` (for simple chains) or `Molecule` (if attachments are present).
|
|
28
|
+
|
|
29
|
+
**Behavior:** Takes atoms `[1..pivotId]`, then appends atoms `[pivotId-1..1]` in reverse. The pivot atom appears once in the center. Attachments on mirrored atoms are also mirrored.
|
|
30
|
+
|
|
31
|
+
```javascript
|
|
32
|
+
// Diethyl ether: CCO + mirror → CCOCC
|
|
33
|
+
const half = Linear(['C', 'C', 'O']);
|
|
34
|
+
const ether = half.mirror(); // pivot defaults to last atom (O)
|
|
35
|
+
console.log(ether.smiles); // CCOCC
|
|
36
|
+
|
|
37
|
+
// Palindromic chain: CCCNCCC
|
|
38
|
+
const half2 = Linear(['C', 'C', 'C', 'N']);
|
|
39
|
+
const palindrome = half2.mirror(); // pivot at N (position 4)
|
|
40
|
+
console.log(palindrome.smiles); // CCCNCCC
|
|
41
|
+
|
|
42
|
+
// Mirror at a specific pivot
|
|
43
|
+
const chain = Linear(['C', 'C', 'C', 'O', 'C']);
|
|
44
|
+
const mirrored = chain.mirror(4); // pivot at O (position 4)
|
|
45
|
+
console.log(mirrored.smiles); // CCCOCCC
|
|
46
|
+
// atoms [1,2,3,4] + reverse of [3,2,1] → C,C,C,O,C,C,C
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### `ring.mirror(pivotId?)`
|
|
50
|
+
|
|
51
|
+
Mirror a ring's attachments to create symmetric substitution patterns.
|
|
52
|
+
|
|
53
|
+
| Parameter | Type | Default | Description |
|
|
54
|
+
|-----------|------|---------|-------------|
|
|
55
|
+
| `pivotId` | `number` | `1` | 1-indexed ring position that serves as the symmetry axis |
|
|
56
|
+
|
|
57
|
+
**Returns:** A new `Ring` with attachments mirrored around the pivot.
|
|
58
|
+
|
|
59
|
+
**Behavior:** For a ring with an attachment at position `p`, also adds the same attachment at the "mirror" position relative to the pivot. The mirror position for `p` around pivot `v` on a ring of size `s` is: `((2*v - p) mod s)`, adjusted to 1-indexed range.
|
|
60
|
+
|
|
61
|
+
```javascript
|
|
62
|
+
// Symmetric toluene → xylene (1,4-dimethylbenzene)
|
|
63
|
+
const benzene = Ring({ atoms: 'c', size: 6 });
|
|
64
|
+
const toluene = benzene.attach(1, Linear(['C']));
|
|
65
|
+
const xylene = toluene.mirror(1); // mirror around position 1
|
|
66
|
+
// Attachment at position 1 stays, mirrored attachment appears at position 4
|
|
67
|
+
console.log(xylene.smiles); // c1(C)ccc(C)cc1
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### `molecule.mirror(pivotComponent?)`
|
|
71
|
+
|
|
72
|
+
Mirror a molecule's component sequence to create an ABA-like structure.
|
|
73
|
+
|
|
74
|
+
| Parameter | Type | Default | Description |
|
|
75
|
+
|-----------|------|---------|-------------|
|
|
76
|
+
| `pivotComponent` | `number` | `components.length - 1` | 0-indexed component that serves as the center |
|
|
77
|
+
|
|
78
|
+
**Returns:** A new `Molecule` with components mirrored.
|
|
79
|
+
|
|
80
|
+
**Behavior:** Takes components `[0..pivot]`, then appends components `[pivot-1..0]` in reverse (with ring renumbering to avoid collisions).
|
|
81
|
+
|
|
82
|
+
```javascript
|
|
83
|
+
// ABA triblock copolymer
|
|
84
|
+
const A = Linear(['C', 'C']);
|
|
85
|
+
const B = Ring({ atoms: 'c', size: 6 });
|
|
86
|
+
const AB = Molecule([A, B]);
|
|
87
|
+
const ABA = AB.mirror(); // pivot at last component (B)
|
|
88
|
+
console.log(ABA.smiles); // CCc1ccccc1CC
|
|
89
|
+
|
|
90
|
+
// More complex: A-B-C-B-A from A-B-C
|
|
91
|
+
const block = Molecule([
|
|
92
|
+
Linear(['C', 'C']),
|
|
93
|
+
Ring({ atoms: 'c', size: 6 }),
|
|
94
|
+
Linear(['O']),
|
|
95
|
+
]);
|
|
96
|
+
const symmetric = block.mirror(); // pivot at last (O)
|
|
97
|
+
console.log(symmetric.smiles); // CCc1ccccc1Oc2ccccc2CC
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### `fusedRing.mirror()`
|
|
101
|
+
|
|
102
|
+
For fused rings, mirror could create a symmetric fused system by adding rings on both sides of the base. This is more complex and may be deferred to a later iteration.
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## Functional API
|
|
107
|
+
|
|
108
|
+
```javascript
|
|
109
|
+
import {
|
|
110
|
+
linearMirror,
|
|
111
|
+
ringMirror,
|
|
112
|
+
moleculeMirror,
|
|
113
|
+
} from 'smiles-js/manipulation';
|
|
114
|
+
|
|
115
|
+
const half = Linear(['C', 'C', 'O']);
|
|
116
|
+
const ether = linearMirror(half); // CCOCC
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## Implementation Strategy
|
|
122
|
+
|
|
123
|
+
### Phase 1: `linear.mirror(pivotId?)`
|
|
124
|
+
|
|
125
|
+
The simplest and highest-value case. Pure atom/bond array manipulation.
|
|
126
|
+
|
|
127
|
+
**Algorithm:**
|
|
128
|
+
1. Validate `pivotId` is within `[1, atoms.length]`.
|
|
129
|
+
2. Take `leftAtoms = atoms[0..pivotId-1]` (the left half including pivot).
|
|
130
|
+
3. Take `rightAtoms = atoms[0..pivotId-2].reverse()` (the left half excluding pivot, reversed).
|
|
131
|
+
4. Concatenate: `leftAtoms + rightAtoms`.
|
|
132
|
+
5. Handle bonds: mirror the bond array similarly. Bond between atoms `i` and `i+1` maps to the corresponding mirrored position.
|
|
133
|
+
6. Handle attachments: clone attachments from mirrored positions. Attachments at position `p` (where `p < pivotId`) also appear at the mirror position `2*pivotId - p`.
|
|
134
|
+
|
|
135
|
+
**Files:**
|
|
136
|
+
- `src/manipulation.js` — Add `linearMirror()` function
|
|
137
|
+
- `src/method-attachers.js` — Attach `.mirror()` to Linear
|
|
138
|
+
|
|
139
|
+
### Phase 2: `molecule.mirror(pivotComponent?)`
|
|
140
|
+
|
|
141
|
+
Component-level mirroring for ABA block copolymers.
|
|
142
|
+
|
|
143
|
+
**Algorithm:**
|
|
144
|
+
1. Take `leftComponents = components[0..pivot]`.
|
|
145
|
+
2. Take `rightComponents = components[0..pivot-1].reverse()`, each deep-cloned with shifted ring numbers.
|
|
146
|
+
3. Return `Molecule([...leftComponents, ...rightComponents])`.
|
|
147
|
+
|
|
148
|
+
**Files:**
|
|
149
|
+
- `src/manipulation.js` — Add `moleculeMirror()` function (reuses `shiftRingNumbers` and `maxRingNumber` from the `repeat` implementation)
|
|
150
|
+
- `src/method-attachers.js` — Attach `.mirror()` to Molecule
|
|
151
|
+
|
|
152
|
+
### Phase 3: `ring.mirror(pivotId?)`
|
|
153
|
+
|
|
154
|
+
Attachment-level mirroring for symmetric ring substitution patterns.
|
|
155
|
+
|
|
156
|
+
**Algorithm:**
|
|
157
|
+
1. For each attachment at position `p`, compute mirror position `mp = 2*pivotId - p` (mod ring size, 1-indexed).
|
|
158
|
+
2. If `mp` doesn't already have the attachment, add it.
|
|
159
|
+
3. Similarly mirror substitutions.
|
|
160
|
+
|
|
161
|
+
**Files:**
|
|
162
|
+
- `src/manipulation.js` — Add `ringMirror()` function
|
|
163
|
+
- `src/method-attachers.js` — Attach `.mirror()` to Ring
|
|
164
|
+
|
|
165
|
+
### Phase 4 (Future): `fusedRing.mirror()`
|
|
166
|
+
|
|
167
|
+
Defer to a later iteration. FusedRing has complex position metadata that makes mirroring non-trivial.
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## Edge Cases
|
|
172
|
+
|
|
173
|
+
1. **Odd-length palindrome:** `mirror()` on `Linear(['C', 'O', 'C'])` with `pivotId=2` → `COCOC` (the pivot O appears once)
|
|
174
|
+
2. **Single atom:** `Linear(['O']).mirror()` → `Linear(['O'])` (nothing to mirror)
|
|
175
|
+
3. **Already symmetric:** Mirroring an already-symmetric molecule should produce the same result (idempotent on symmetric inputs)
|
|
176
|
+
4. **Bonds in mirror:** Double bonds in the left half should appear in the mirrored right half at the corresponding position
|
|
177
|
+
5. **Attachments with rings:** Mirrored ring attachments need unique ring numbers (reuse `shiftRingNumbers`)
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## Test Plan
|
|
182
|
+
|
|
183
|
+
### Unit Tests (`manipulation.test.js`)
|
|
184
|
+
- `linear.mirror()` — simple chain, with pivot, with bonds, with attachments
|
|
185
|
+
- `molecule.mirror()` — ABA, ABCBA, with rings
|
|
186
|
+
- `ring.mirror()` — symmetric substitutions, symmetric attachments
|
|
187
|
+
- Edge cases: n=1, already symmetric, single atom
|
|
188
|
+
|
|
189
|
+
### Integration Tests (`test-integration/mirror.test.js`)
|
|
190
|
+
- Diethyl ether: `CCO.mirror()` → `CCOCC`
|
|
191
|
+
- ABA block copolymer with polystyrene/polyethylene blocks
|
|
192
|
+
- Symmetric biphenyl-bridged molecule
|
|
193
|
+
- Palindromic peptide-like chain
|
|
194
|
+
|
|
195
|
+
---
|
|
196
|
+
|
|
197
|
+
## Relationship to `.repeat()`
|
|
198
|
+
|
|
199
|
+
`.mirror()` and `.repeat()` are complementary:
|
|
200
|
+
- `.repeat(n, left, right)` — Produces **A-A-A-A** (homopolymer)
|
|
201
|
+
- `.mirror()` — Produces **A-B-A** (symmetric structure)
|
|
202
|
+
- Combined: `.repeat(2).mirror()` could produce **A-A-B-A-A** (repeated then mirrored)
|
|
203
|
+
|
|
204
|
+
Together they cover the main polymer architectures: homopolymer, block copolymer, and symmetric block copolymer.
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
// SMILES Grammar for Peggy (PEG parser generator for JavaScript)
|
|
2
|
+
//
|
|
3
|
+
// This grammar mirrors the tokenizer + buildAtomList two-pass architecture
|
|
4
|
+
// in src/tokenizer.js and src/parser/smiles-parser-core.js.
|
|
5
|
+
//
|
|
6
|
+
// What this grammar produces:
|
|
7
|
+
// A concrete parse tree (token stream with structure). The codebase then
|
|
8
|
+
// does a second semantic pass (buildAST) to detect rings, fused rings,
|
|
9
|
+
// and attachments — that pass is stateful and cannot be expressed in PEG.
|
|
10
|
+
//
|
|
11
|
+
// Usage:
|
|
12
|
+
// npx peggy smiles.peggy # generates smiles.js
|
|
13
|
+
// npx peggy --format commonjs -o smiles.cjs smiles.peggy # CommonJS
|
|
14
|
+
//
|
|
15
|
+
// Differences from OpenSMILES spec (matches codebase behavior):
|
|
16
|
+
// - Simple atoms accept ANY [A-Za-z] letter, not just the organic subset.
|
|
17
|
+
// The tokenizer (isAtomStart + parseSimpleAtom) only special-cases Br/Cl
|
|
18
|
+
// as two-letter atoms; everything else is a single letter.
|
|
19
|
+
// - Bracketed atoms capture raw content only (the codebase has a TODO for
|
|
20
|
+
// full isotope/chirality/hcount/charge/class parsing). The grammar DOES
|
|
21
|
+
// parse the sub-fields since it's trivial in PEG and useful for consumers.
|
|
22
|
+
// - Whitespace is silently skipped (tokenizer.js line 104).
|
|
23
|
+
// - Ring markers can be preceded by a bond (e.g. C=1CC=1 is legal SMILES).
|
|
24
|
+
|
|
25
|
+
// ============================================================
|
|
26
|
+
// Top-level: dot-separated components
|
|
27
|
+
// Mirrors: tokenizer DOT token → buildAtomList skips DOT
|
|
28
|
+
// ============================================================
|
|
29
|
+
smiles
|
|
30
|
+
= _ head:chain tail:(_ "." _ chain)* _ {
|
|
31
|
+
const components = [head, ...tail.map(t => t[3])];
|
|
32
|
+
if (components.length === 1) return components[0];
|
|
33
|
+
return { type: "molecule", components };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ============================================================
|
|
37
|
+
// Chain: sequence of atom_units with optional bonds between them
|
|
38
|
+
// Mirrors: buildAtomList's linear scan — each ATOM token may be
|
|
39
|
+
// preceded by a BOND token, and followed by RING_MARKER
|
|
40
|
+
// and BRANCH_OPEN/CLOSE tokens.
|
|
41
|
+
// ============================================================
|
|
42
|
+
chain
|
|
43
|
+
= first:atom_unit rest:(_ bond? _ atom_unit)* {
|
|
44
|
+
const atoms = [first];
|
|
45
|
+
const bonds = [null];
|
|
46
|
+
for (const r of rest) {
|
|
47
|
+
bonds.push(r[1]);
|
|
48
|
+
atoms.push(r[3]);
|
|
49
|
+
}
|
|
50
|
+
return { type: "chain", atoms, bonds };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ============================================================
|
|
54
|
+
// Atom unit: atom + ring markers + branches
|
|
55
|
+
//
|
|
56
|
+
// Mirrors the token consumption order in buildAtomList:
|
|
57
|
+
// ATOM → RING_MARKER* → (BRANCH_OPEN chain BRANCH_CLOSE)*
|
|
58
|
+
//
|
|
59
|
+
// Ring markers can carry their own bond (e.g. C=1...=1)
|
|
60
|
+
// This is valid SMILES: the bond on a ring closure describes
|
|
61
|
+
// the bond between the two ring-closure atoms.
|
|
62
|
+
// ============================================================
|
|
63
|
+
atom_unit
|
|
64
|
+
= atom:atom ring_bonds:(bond? ring_marker)* branches:branch* {
|
|
65
|
+
const rings = ring_bonds.map(rb => ({
|
|
66
|
+
bond: rb[0],
|
|
67
|
+
number: rb[1]
|
|
68
|
+
}));
|
|
69
|
+
return { atom, rings, branches };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// ============================================================
|
|
73
|
+
// Atoms
|
|
74
|
+
//
|
|
75
|
+
// Two forms, matching tokenizer.js:
|
|
76
|
+
// 1. Bracketed: '[' ... ']' (parseBracketedAtom, line 67)
|
|
77
|
+
// 2. Simple: [A-Za-z*] (parseSimpleAtom, line 39)
|
|
78
|
+
// ============================================================
|
|
79
|
+
atom
|
|
80
|
+
= bracketed_atom
|
|
81
|
+
/ simple_atom
|
|
82
|
+
|
|
83
|
+
// ----------------------------------------------------------
|
|
84
|
+
// Bracketed atom: [isotope? element chirality? hcount? charge? class?]
|
|
85
|
+
//
|
|
86
|
+
// The codebase currently stores only { raw } (parseBracketedAtom, line 79).
|
|
87
|
+
// We parse the sub-fields here since PEG makes it easy and consumers
|
|
88
|
+
// can use them. The `raw` field preserves the full bracket text for
|
|
89
|
+
// round-trip fidelity (matching token.value in the codebase).
|
|
90
|
+
// ----------------------------------------------------------
|
|
91
|
+
bracketed_atom
|
|
92
|
+
= "[" content:bracketed_content "]" {
|
|
93
|
+
return { type: "bracket_atom", ...content, raw: text() };
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
bracketed_content
|
|
97
|
+
= isotope:isotope?
|
|
98
|
+
symbol:bracket_element
|
|
99
|
+
chirality:chirality?
|
|
100
|
+
hcount:hcount?
|
|
101
|
+
charge:charge?
|
|
102
|
+
atomClass:atom_class? {
|
|
103
|
+
return { isotope, symbol, chirality, hcount, charge, atomClass };
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Isotope: digits before the element symbol
|
|
107
|
+
// e.g. [13C], [2H]
|
|
108
|
+
isotope
|
|
109
|
+
= digits:$[0-9]+ &[A-Za-z*] { return parseInt(digits, 10); }
|
|
110
|
+
|
|
111
|
+
// Element inside brackets — can be aromatic two-letter (se, as),
|
|
112
|
+
// aromatic single-letter, or any standard element symbol, or wildcard
|
|
113
|
+
bracket_element
|
|
114
|
+
= aromatic_element
|
|
115
|
+
/ element_symbol
|
|
116
|
+
/ "*" { return "*"; }
|
|
117
|
+
|
|
118
|
+
// Standard element symbol: uppercase letter + optional lowercase
|
|
119
|
+
// e.g. C, Na, Fe, Zr
|
|
120
|
+
element_symbol
|
|
121
|
+
= a:$[A-Z] b:$[a-z]? { return b ? a + b : a; }
|
|
122
|
+
|
|
123
|
+
// Aromatic elements inside brackets
|
|
124
|
+
// OpenSMILES: b, c, n, o, p, s, se, as
|
|
125
|
+
aromatic_element
|
|
126
|
+
= "se" { return "se"; }
|
|
127
|
+
/ "as" { return "as"; }
|
|
128
|
+
/ c:[bcnops] { return c; }
|
|
129
|
+
|
|
130
|
+
// Chirality: @ or @@ (the codebase doesn't parse extended forms like @TH1)
|
|
131
|
+
chirality
|
|
132
|
+
= "@@" { return "@@"; }
|
|
133
|
+
/ "@" { return "@"; }
|
|
134
|
+
|
|
135
|
+
// Hydrogen count: H or H<digit>
|
|
136
|
+
// e.g. [NH3+] has hcount=3, [C@H] has hcount=1
|
|
137
|
+
hcount
|
|
138
|
+
= "H" n:$[0-9]? { return n ? parseInt(n, 10) : 1; }
|
|
139
|
+
|
|
140
|
+
// Charge: +, -, +2, -1, ++, --
|
|
141
|
+
// Ordered to try multi-char patterns before single-char
|
|
142
|
+
charge
|
|
143
|
+
= "++" { return 2; }
|
|
144
|
+
/ "--" { return -2; }
|
|
145
|
+
/ "+" n:$[0-9]+ { return parseInt(n, 10); }
|
|
146
|
+
/ "-" n:$[0-9]+ { return -parseInt(n, 10); }
|
|
147
|
+
/ "+" { return 1; }
|
|
148
|
+
/ "-" { return -1; }
|
|
149
|
+
|
|
150
|
+
// Atom class: :<digits>
|
|
151
|
+
// e.g. [C:1]
|
|
152
|
+
atom_class
|
|
153
|
+
= ":" n:$[0-9]+ { return parseInt(n, 10); }
|
|
154
|
+
|
|
155
|
+
// ----------------------------------------------------------
|
|
156
|
+
// Simple (non-bracketed) atom
|
|
157
|
+
//
|
|
158
|
+
// Mirrors tokenizer.js parseSimpleAtom (line 39) + isAtomStart (line 31):
|
|
159
|
+
// isAtomStart accepts /[A-Za-z*]/
|
|
160
|
+
// parseSimpleAtom checks for two-letter Cl/Br first, then single char
|
|
161
|
+
//
|
|
162
|
+
// The codebase does NOT restrict to the OpenSMILES organic subset —
|
|
163
|
+
// it accepts any letter as a valid atom. This is intentional for
|
|
164
|
+
// permissive parsing. The semantic layer validates later.
|
|
165
|
+
// ----------------------------------------------------------
|
|
166
|
+
simple_atom
|
|
167
|
+
= symbol:simple_atom_symbol { return { type: "simple_atom", symbol }; }
|
|
168
|
+
|
|
169
|
+
simple_atom_symbol
|
|
170
|
+
= "Br" { return "Br"; }
|
|
171
|
+
/ "Cl" { return "Cl"; }
|
|
172
|
+
/ c:[A-Za-z] { return c; }
|
|
173
|
+
/ "*" { return "*"; }
|
|
174
|
+
|
|
175
|
+
// ============================================================
|
|
176
|
+
// Bonds
|
|
177
|
+
//
|
|
178
|
+
// Mirrors: BOND_SYMBOLS in tokenizer.js (line 19)
|
|
179
|
+
// new Set(['-', '=', '#', ':', '/', '\\'])
|
|
180
|
+
// ============================================================
|
|
181
|
+
bond
|
|
182
|
+
= b:[-=#:/\\] { return b; }
|
|
183
|
+
|
|
184
|
+
// ============================================================
|
|
185
|
+
// Ring markers
|
|
186
|
+
//
|
|
187
|
+
// Mirrors: tokenizer.js lines 138-160
|
|
188
|
+
// '%' + two digits → ring number 10-99
|
|
189
|
+
// single digit → ring number 0-9
|
|
190
|
+
// ============================================================
|
|
191
|
+
ring_marker
|
|
192
|
+
= "%" d1:[0-9] d2:[0-9] { return parseInt(d1 + d2, 10); }
|
|
193
|
+
/ d:[0-9] { return parseInt(d, 10); }
|
|
194
|
+
|
|
195
|
+
// ============================================================
|
|
196
|
+
// Branches (recursive)
|
|
197
|
+
//
|
|
198
|
+
// Mirrors: BRANCH_OPEN → (bond? chain) → BRANCH_CLOSE
|
|
199
|
+
// in tokenizer.js lines 106-120 and buildAtomList lines 139-153
|
|
200
|
+
//
|
|
201
|
+
// A branch can start with a bond that applies to the first atom
|
|
202
|
+
// of the branch chain (e.g. C(=O) means double bond to O).
|
|
203
|
+
// ============================================================
|
|
204
|
+
branch
|
|
205
|
+
= "(" _ b:bond? _ c:chain _ ")" {
|
|
206
|
+
return { type: "branch", bond: b, chain: c };
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// ============================================================
|
|
210
|
+
// Whitespace (optional, skipped)
|
|
211
|
+
//
|
|
212
|
+
// Mirrors: tokenizer.js line 104 — /\s/ is skipped
|
|
213
|
+
// ============================================================
|
|
214
|
+
_ "whitespace"
|
|
215
|
+
= [ \t\n\r]*
|
package/package.json
CHANGED