smiles-js 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -1
- package/package.json +1 -1
- package/src/fragment.js +25 -0
- package/src/validator.js +10 -0
- package/test/concat.test.js +190 -0
package/README.md
CHANGED
|
@@ -22,10 +22,11 @@ console.log(toluene); // c1ccccc1C
|
|
|
22
22
|
|
|
23
23
|
## Core Concepts
|
|
24
24
|
|
|
25
|
-
Molecules are built by composing fragments. There are
|
|
25
|
+
Molecules are built by composing fragments. There are four composition operations:
|
|
26
26
|
|
|
27
27
|
| Operation | Syntax | Result |
|
|
28
28
|
|-----------|--------|--------|
|
|
29
|
+
| Concatenate | `a.concat(b)` | `a` and `b` joined linearly |
|
|
29
30
|
| Branch | `a(b)` | `b` attached as branch to `a` |
|
|
30
31
|
| Multiple branches | `a(b)(c)(d)` | `b`, `c`, `d` all branch from `a` |
|
|
31
32
|
| Nested branches | `a(b(c))` | `c` branches from `b`, which branches from `a` |
|
|
@@ -61,6 +62,56 @@ const c = Fragment('CCC');
|
|
|
61
62
|
const molecule = a(b(c)); // C(CC(CCC))
|
|
62
63
|
```
|
|
63
64
|
|
|
65
|
+
#### Concatenation with `.concat()`
|
|
66
|
+
|
|
67
|
+
The `concat` method joins fragments linearly (end-to-end) without branching:
|
|
68
|
+
|
|
69
|
+
```js
|
|
70
|
+
const ethyl = Fragment('CC');
|
|
71
|
+
const propyl = Fragment('CCC');
|
|
72
|
+
const pentane = ethyl.concat(propyl); // CCCCC
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
**Method chaining:**
|
|
76
|
+
|
|
77
|
+
```js
|
|
78
|
+
const hexane = Fragment('CC')
|
|
79
|
+
.concat('CC')
|
|
80
|
+
.concat('CC'); // CCCCCC
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**Static method:**
|
|
84
|
+
|
|
85
|
+
```js
|
|
86
|
+
const butane = Fragment.concat('CC', 'CC'); // CCCC
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
**Ring number handling:**
|
|
90
|
+
|
|
91
|
+
When concatenating fragments with rings, conflicting ring numbers are automatically remapped:
|
|
92
|
+
|
|
93
|
+
```js
|
|
94
|
+
const ring1 = Fragment('C1CCC1');
|
|
95
|
+
const ring2 = Fragment('C1CCC1');
|
|
96
|
+
const twoRings = ring1.concat(ring2); // C1CCC1C2CCC2
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
**Use cases:**
|
|
100
|
+
|
|
101
|
+
- Building linear chains: `Fragment('C').concat('C').concat('C')` → `CCC`
|
|
102
|
+
- Creating polymers: repeatedly concat to build long chains
|
|
103
|
+
- Combining building blocks: `benzene.concat(methyl)` → `c1ccccc1C`
|
|
104
|
+
|
|
105
|
+
**Difference from branching:**
|
|
106
|
+
|
|
107
|
+
```js
|
|
108
|
+
const a = Fragment('CC');
|
|
109
|
+
const b = Fragment('O');
|
|
110
|
+
|
|
111
|
+
a.concat(b); // CCO (linear)
|
|
112
|
+
a(b); // CC(O) (branched)
|
|
113
|
+
```
|
|
114
|
+
|
|
64
115
|
### Ring(atom, size)
|
|
65
116
|
|
|
66
117
|
Creates a simple ring.
|
package/package.json
CHANGED
package/src/fragment.js
CHANGED
|
@@ -39,6 +39,25 @@ export function Fragment(smiles) {
|
|
|
39
39
|
fragment.molecularWeight = calculateMolecularWeight(currentSmiles);
|
|
40
40
|
fragment.toString = () => currentSmiles;
|
|
41
41
|
fragment[Symbol.toPrimitive] = () => currentSmiles;
|
|
42
|
+
|
|
43
|
+
// Concat method for combining fragments linearly
|
|
44
|
+
fragment.concat = function(other) {
|
|
45
|
+
const otherSmiles = typeof other === 'function' ? other.smiles : String(other);
|
|
46
|
+
|
|
47
|
+
// Handle ring number conflicts
|
|
48
|
+
const usedInCurrent = findUsedRingNumbers(currentSmiles);
|
|
49
|
+
const usedInOther = findUsedRingNumbers(otherSmiles);
|
|
50
|
+
|
|
51
|
+
let remappedOther = otherSmiles;
|
|
52
|
+
for (const ringNum of usedInOther) {
|
|
53
|
+
if (usedInCurrent.has(ringNum)) {
|
|
54
|
+
const newNum = getNextRingNumber(currentSmiles + remappedOther);
|
|
55
|
+
remappedOther = remappedOther.replaceAll(ringNum, newNum.replace('%', ''));
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return createFragment(currentSmiles + remappedOther);
|
|
60
|
+
};
|
|
42
61
|
|
|
43
62
|
return fragment;
|
|
44
63
|
};
|
|
@@ -47,3 +66,9 @@ export function Fragment(smiles) {
|
|
|
47
66
|
}
|
|
48
67
|
|
|
49
68
|
Fragment.validate = validateSMILES;
|
|
69
|
+
|
|
70
|
+
// Static concat method for convenience: Fragment.concat(a, b)
|
|
71
|
+
Fragment.concat = function(a, b) {
|
|
72
|
+
const fragmentA = typeof a === 'string' ? Fragment(a) : a;
|
|
73
|
+
return fragmentA.concat(b);
|
|
74
|
+
};
|
package/src/validator.js
CHANGED
|
@@ -7,6 +7,16 @@ export function validateSMILES(smiles) {
|
|
|
7
7
|
for (let i = 0; i < smiles.length; i++) {
|
|
8
8
|
const char = smiles[i];
|
|
9
9
|
|
|
10
|
+
if (char === '[') {
|
|
11
|
+
// Skip over bracketed atoms (e.g., [NH4+], [O-], [13C])
|
|
12
|
+
let closingBracket = smiles.indexOf(']', i);
|
|
13
|
+
if (closingBracket === -1) {
|
|
14
|
+
return { valid: false, error: 'Unclosed bracket' };
|
|
15
|
+
}
|
|
16
|
+
i = closingBracket;
|
|
17
|
+
continue;
|
|
18
|
+
}
|
|
19
|
+
|
|
10
20
|
if (char === '(') {
|
|
11
21
|
branchCount++;
|
|
12
22
|
} else if (char === ')') {
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import { test } from 'node:test';
|
|
2
|
+
import assert from 'node:assert';
|
|
3
|
+
import { Fragment } from '../src/fragment.js';
|
|
4
|
+
|
|
5
|
+
test('concat combines two simple fragments', () => {
|
|
6
|
+
const a = Fragment('CC');
|
|
7
|
+
const b = Fragment('CC');
|
|
8
|
+
const result = a.concat(b);
|
|
9
|
+
assert.strictEqual(result.smiles, 'CCCC');
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
test('concat works with method chaining', () => {
|
|
13
|
+
const a = Fragment('C');
|
|
14
|
+
const b = Fragment('C');
|
|
15
|
+
const c = Fragment('C');
|
|
16
|
+
const result = a.concat(b).concat(c);
|
|
17
|
+
assert.strictEqual(result.smiles, 'CCC');
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
test('concat handles string arguments', () => {
|
|
21
|
+
const a = Fragment('CC');
|
|
22
|
+
const result = a.concat('CC');
|
|
23
|
+
assert.strictEqual(result.smiles, 'CCCC');
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
test('concat preserves properties', () => {
|
|
27
|
+
const a = Fragment('C');
|
|
28
|
+
const b = Fragment('C');
|
|
29
|
+
const result = a.concat(b);
|
|
30
|
+
|
|
31
|
+
assert.strictEqual(result.atoms, 2);
|
|
32
|
+
assert.strictEqual(result.formula, 'C2H6');
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
test('concat handles fragments with branches', () => {
|
|
36
|
+
const a = Fragment('C(C)C');
|
|
37
|
+
const b = Fragment('CC');
|
|
38
|
+
const result = a.concat(b);
|
|
39
|
+
assert.strictEqual(result.smiles, 'C(C)CCC');
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
test('concat handles rings without conflicts', () => {
|
|
43
|
+
const a = Fragment('C1CCC1');
|
|
44
|
+
const b = Fragment('CC');
|
|
45
|
+
const result = a.concat(b);
|
|
46
|
+
assert.strictEqual(result.smiles, 'C1CCC1CC');
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
test('concat remaps conflicting ring numbers', () => {
|
|
50
|
+
const a = Fragment('C1CCC1');
|
|
51
|
+
const b = Fragment('C1CCC1');
|
|
52
|
+
const result = a.concat(b);
|
|
53
|
+
|
|
54
|
+
// The second ring should be remapped to ring 2
|
|
55
|
+
assert.strictEqual(result.smiles, 'C1CCC1C2CCC2');
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
test('concat handles multiple ring conflicts', () => {
|
|
59
|
+
const a = Fragment('C1CCC1C2CCC2');
|
|
60
|
+
const b = Fragment('C1CCC1C2CCC2');
|
|
61
|
+
const result = a.concat(b);
|
|
62
|
+
|
|
63
|
+
// Rings should be remapped to 3 and 4
|
|
64
|
+
assert.strictEqual(result.smiles, 'C1CCC1C2CCC2C3CCC3C4CCC4');
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
test('concat handles aromatic rings', () => {
|
|
68
|
+
const benzene1 = Fragment('c1ccccc1');
|
|
69
|
+
const benzene2 = Fragment('c1ccccc1');
|
|
70
|
+
const result = benzene1.concat(benzene2);
|
|
71
|
+
|
|
72
|
+
assert.strictEqual(result.smiles, 'c1ccccc1c2ccccc2');
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
test('concat works with complex molecules', () => {
|
|
76
|
+
const phenyl = Fragment('c1ccccc1');
|
|
77
|
+
const methyl = Fragment('C');
|
|
78
|
+
const result = phenyl.concat(methyl);
|
|
79
|
+
|
|
80
|
+
assert.strictEqual(result.smiles, 'c1ccccc1C');
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
test('concat can be used multiple times', () => {
|
|
84
|
+
const c = Fragment('C');
|
|
85
|
+
const result = c.concat('C').concat('C').concat('C');
|
|
86
|
+
|
|
87
|
+
assert.strictEqual(result.smiles, 'CCCC');
|
|
88
|
+
assert.strictEqual(result.atoms, 4);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
test('Static Fragment.concat works', () => {
|
|
92
|
+
const a = Fragment('CC');
|
|
93
|
+
const b = Fragment('CC');
|
|
94
|
+
const result = Fragment.concat(a, b);
|
|
95
|
+
|
|
96
|
+
assert.strictEqual(result.smiles, 'CCCC');
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
test('Static Fragment.concat accepts strings', () => {
|
|
100
|
+
const result = Fragment.concat('CC', 'CC');
|
|
101
|
+
assert.strictEqual(result.smiles, 'CCCC');
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
test('concat with functional groups', () => {
|
|
105
|
+
const ethyl = Fragment('CC');
|
|
106
|
+
const hydroxyl = Fragment('O');
|
|
107
|
+
const result = ethyl.concat(hydroxyl);
|
|
108
|
+
|
|
109
|
+
assert.strictEqual(result.smiles, 'CCO');
|
|
110
|
+
assert.strictEqual(result.formula, 'C2H6O');
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
test('concat preserves original fragments', () => {
|
|
114
|
+
const a = Fragment('CC');
|
|
115
|
+
const b = Fragment('CC');
|
|
116
|
+
const result = a.concat(b);
|
|
117
|
+
|
|
118
|
+
// Original fragments should be unchanged
|
|
119
|
+
assert.strictEqual(a.smiles, 'CC');
|
|
120
|
+
assert.strictEqual(b.smiles, 'CC');
|
|
121
|
+
assert.strictEqual(result.smiles, 'CCCC');
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
test('concat with heterocycles', () => {
|
|
125
|
+
const pyridine = Fragment('n1ccccc1');
|
|
126
|
+
const methyl = Fragment('C');
|
|
127
|
+
const result = pyridine.concat(methyl);
|
|
128
|
+
|
|
129
|
+
assert.strictEqual(result.smiles, 'n1ccccc1C');
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
test('concat builds polymer-like chains', () => {
|
|
133
|
+
const ethylene = Fragment('CC');
|
|
134
|
+
let polymer = ethylene;
|
|
135
|
+
|
|
136
|
+
for (let i = 0; i < 4; i++) {
|
|
137
|
+
polymer = polymer.concat(ethylene);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
assert.strictEqual(polymer.smiles, 'CCCCCCCCCC'); // 5 ethylene units = 10 carbons
|
|
141
|
+
assert.strictEqual(polymer.atoms, 10);
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
test('concat with charged species', () => {
|
|
145
|
+
const a = Fragment('[NH4+]');
|
|
146
|
+
const b = Fragment('[O-]');
|
|
147
|
+
const result = a.concat(b);
|
|
148
|
+
|
|
149
|
+
assert.strictEqual(result.smiles, '[NH4+][O-]');
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
test('concat handles double and triple bonds', () => {
|
|
153
|
+
const ethylene = Fragment('C=C');
|
|
154
|
+
const methyl = Fragment('C');
|
|
155
|
+
const result = ethylene.concat(methyl);
|
|
156
|
+
|
|
157
|
+
assert.strictEqual(result.smiles, 'C=CC');
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
test('concat with stereochemistry', () => {
|
|
161
|
+
const a = Fragment('C[C@H](O)C');
|
|
162
|
+
const b = Fragment('C');
|
|
163
|
+
const result = a.concat(b);
|
|
164
|
+
|
|
165
|
+
assert.strictEqual(result.smiles, 'C[C@H](O)CC');
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
test('concat example from requirement: Fragment("CC") + Fragment("CC")', () => {
|
|
169
|
+
const a = Fragment('CC');
|
|
170
|
+
const b = Fragment('CC');
|
|
171
|
+
const result = a.concat(b);
|
|
172
|
+
|
|
173
|
+
assert.strictEqual(result.smiles, 'CCCC');
|
|
174
|
+
assert.strictEqual(String(result), 'CCCC');
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
test('concat returns a new Fragment with all methods', () => {
|
|
178
|
+
const a = Fragment('C');
|
|
179
|
+
const b = Fragment('C');
|
|
180
|
+
const result = a.concat(b);
|
|
181
|
+
|
|
182
|
+
// Should have all Fragment methods
|
|
183
|
+
assert.strictEqual(typeof result.concat, 'function');
|
|
184
|
+
assert.strictEqual(typeof result.toString, 'function');
|
|
185
|
+
assert.strictEqual(typeof result, 'function'); // Can be called as a function for branching
|
|
186
|
+
|
|
187
|
+
// Can still use for branching
|
|
188
|
+
const branched = result(Fragment('O'));
|
|
189
|
+
assert.strictEqual(branched.smiles, 'CC(O)');
|
|
190
|
+
});
|