selfies-js 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +274 -0
- package/package.json +65 -0
- package/src/alphabet.js +150 -0
- package/src/alphabet.test.js +82 -0
- package/src/chemistryValidator.js +236 -0
- package/src/cli.js +206 -0
- package/src/constraints.js +186 -0
- package/src/constraints.test.js +126 -0
- package/src/decoder.js +636 -0
- package/src/decoder.test.js +560 -0
- package/src/dsl/analyzer.js +170 -0
- package/src/dsl/analyzer.test.js +139 -0
- package/src/dsl/dsl.test.js +146 -0
- package/src/dsl/importer.js +238 -0
- package/src/dsl/index.js +32 -0
- package/src/dsl/lexer.js +264 -0
- package/src/dsl/lexer.test.js +115 -0
- package/src/dsl/parser.js +201 -0
- package/src/dsl/parser.test.js +148 -0
- package/src/dsl/resolver.js +136 -0
- package/src/dsl/resolver.test.js +99 -0
- package/src/dsl/symbolTable.js +56 -0
- package/src/dsl/symbolTable.test.js +68 -0
- package/src/dsl/valenceValidator.js +147 -0
- package/src/encoder.js +467 -0
- package/src/encoder.test.js +61 -0
- package/src/errors.js +79 -0
- package/src/errors.test.js +91 -0
- package/src/grammar_rules.js +146 -0
- package/src/index.js +70 -0
- package/src/parser.js +96 -0
- package/src/parser.test.js +96 -0
- package/src/properties/atoms.js +69 -0
- package/src/properties/atoms.test.js +116 -0
- package/src/properties/formula.js +111 -0
- package/src/properties/formula.test.js +95 -0
- package/src/properties/molecularWeight.js +80 -0
- package/src/properties/molecularWeight.test.js +84 -0
- package/src/properties/properties.test.js +77 -0
- package/src/renderers/README.md +127 -0
- package/src/renderers/svg.js +113 -0
- package/src/renderers/svg.test.js +42 -0
- package/src/syntax.js +641 -0
- package/src/syntax.test.js +363 -0
- package/src/tokenizer.js +99 -0
- package/src/tokenizer.test.js +55 -0
- package/src/validator.js +70 -0
- package/src/validator.test.js +44 -0
package/src/decoder.js
ADDED
|
@@ -0,0 +1,636 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Decoder - Converts SELFIES strings to SMILES
|
|
3
|
+
*
|
|
4
|
+
* This implements the SELFIES derivation state machine to properly
|
|
5
|
+
* reconstruct molecular structures with branches and rings.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { tokenize } from './tokenizer.js'
|
|
9
|
+
import { getBondingCapacity } from './constraints.js'
|
|
10
|
+
import {
|
|
11
|
+
processBranchSymbol,
|
|
12
|
+
processRingSymbol,
|
|
13
|
+
nextAtomState,
|
|
14
|
+
nextBranchState,
|
|
15
|
+
nextRingState,
|
|
16
|
+
getIndexFromSelfies,
|
|
17
|
+
INDEX_CODE
|
|
18
|
+
} from './grammar_rules.js'
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Decodes a SELFIES string to SMILES
|
|
22
|
+
* @param {string} selfies - The SELFIES string to decode
|
|
23
|
+
* @returns {string} SMILES representation
|
|
24
|
+
* @throws {Error} If the SELFIES string is invalid
|
|
25
|
+
*/
|
|
26
|
+
export function decode(selfies) {
|
|
27
|
+
const ast = decodeToAST(selfies)
|
|
28
|
+
return buildSmiles(ast.atoms, ast.bonds, ast.rings)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Decodes a SELFIES string to an Abstract Syntax Tree (AST)
|
|
33
|
+
* @param {string} selfies - The SELFIES string to decode
|
|
34
|
+
* @returns {Object} AST with atoms, bonds, and rings arrays
|
|
35
|
+
* @throws {Error} If the SELFIES string is invalid
|
|
36
|
+
*/
|
|
37
|
+
export function decodeToAST(selfies) {
|
|
38
|
+
const tokens = tokenize(selfies)
|
|
39
|
+
const atoms = []
|
|
40
|
+
const bonds = []
|
|
41
|
+
const rings = []
|
|
42
|
+
|
|
43
|
+
// Derivation state machine
|
|
44
|
+
let state = 0 // 0 = start, >0 = bonding capacity remaining
|
|
45
|
+
let prevAtomIndex = null
|
|
46
|
+
let i = 0
|
|
47
|
+
|
|
48
|
+
while (i < tokens.length) {
|
|
49
|
+
const token = tokens[i]
|
|
50
|
+
const content = token.slice(1, -1) // Remove brackets
|
|
51
|
+
|
|
52
|
+
// Skip [nop] tokens
|
|
53
|
+
if (content === 'nop') {
|
|
54
|
+
i++
|
|
55
|
+
continue
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Branch symbols
|
|
59
|
+
if (content.includes('Branch') || content.includes('ch')) {
|
|
60
|
+
const branchInfo = processBranchSymbol(token)
|
|
61
|
+
if (!branchInfo) {
|
|
62
|
+
i++
|
|
63
|
+
continue
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (state <= 1) {
|
|
67
|
+
// Skip branch at X0 or X1
|
|
68
|
+
i++
|
|
69
|
+
continue
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const { order: branchOrder, L } = branchInfo
|
|
73
|
+
const [branchInitState, nextState] = nextBranchState(branchOrder, state)
|
|
74
|
+
|
|
75
|
+
// Read length specifier (Q) - read L tokens
|
|
76
|
+
i++
|
|
77
|
+
if (i >= tokens.length) {
|
|
78
|
+
// Branch at end with no length
|
|
79
|
+
state = nextState
|
|
80
|
+
break
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const Q = readIndexFromTokens(tokens, i, L)
|
|
84
|
+
i += Q.consumed
|
|
85
|
+
|
|
86
|
+
// Derive branch
|
|
87
|
+
const branchResult = deriveBranch(
|
|
88
|
+
tokens,
|
|
89
|
+
i,
|
|
90
|
+
Q.value + 1,
|
|
91
|
+
branchInitState,
|
|
92
|
+
prevAtomIndex,
|
|
93
|
+
atoms,
|
|
94
|
+
bonds,
|
|
95
|
+
rings
|
|
96
|
+
)
|
|
97
|
+
i += branchResult.consumed
|
|
98
|
+
state = nextState
|
|
99
|
+
|
|
100
|
+
// If branch consumed all remaining tokens, stop
|
|
101
|
+
if (i >= tokens.length) {
|
|
102
|
+
break
|
|
103
|
+
}
|
|
104
|
+
continue
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Ring symbols
|
|
108
|
+
if (content.includes('Ring') || content.includes('ng')) {
|
|
109
|
+
const ringInfo = processRingSymbol(token)
|
|
110
|
+
if (!ringInfo) {
|
|
111
|
+
i++
|
|
112
|
+
continue
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (state === 0) {
|
|
116
|
+
// Skip ring at X0
|
|
117
|
+
i++
|
|
118
|
+
continue
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const { order: requestedOrder, L } = ringInfo
|
|
122
|
+
const [bondOrder, nextState] = nextRingState(requestedOrder, state)
|
|
123
|
+
|
|
124
|
+
// Read length specifier (Q) - read L tokens
|
|
125
|
+
i++
|
|
126
|
+
if (i >= tokens.length) {
|
|
127
|
+
// Ring at end - apply as bond to prev atom
|
|
128
|
+
if (prevAtomIndex !== null && bonds.length > 0) {
|
|
129
|
+
// Increase bond order of last bond
|
|
130
|
+
const lastBond = bonds[bonds.length - 1]
|
|
131
|
+
lastBond.order = Math.min(lastBond.order + bondOrder, 3)
|
|
132
|
+
}
|
|
133
|
+
state = nextState
|
|
134
|
+
break
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const Q = readIndexFromTokens(tokens, i, L)
|
|
138
|
+
i += Q.consumed
|
|
139
|
+
|
|
140
|
+
// Calculate ring closure atom index
|
|
141
|
+
const targetIndex = Math.max(0, prevAtomIndex - (Q.value + 1))
|
|
142
|
+
|
|
143
|
+
// Skip ring to self
|
|
144
|
+
if (targetIndex === prevAtomIndex) {
|
|
145
|
+
state = nextState
|
|
146
|
+
continue
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
handleRingClosure(targetIndex, prevAtomIndex, bondOrder, bonds, rings)
|
|
150
|
+
|
|
151
|
+
state = nextState
|
|
152
|
+
continue
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Regular atom symbols
|
|
156
|
+
let atomInfo
|
|
157
|
+
try {
|
|
158
|
+
atomInfo = parseAtomSymbol(content)
|
|
159
|
+
} catch (error) {
|
|
160
|
+
throw new Error(`Invalid SELFIES token ${token}: ${error.message}`)
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (atomInfo) {
|
|
164
|
+
const { element, bondOrder: requestedBond, stereo } = atomInfo
|
|
165
|
+
const capacity = getBondingCapacity(element)
|
|
166
|
+
|
|
167
|
+
// Determine actual bond order and next state
|
|
168
|
+
const [actualBond, nextState] = nextAtomState(requestedBond, capacity, state)
|
|
169
|
+
|
|
170
|
+
// Add atom
|
|
171
|
+
const atomIndex = atoms.length
|
|
172
|
+
atoms.push({ element, capacity, stereo })
|
|
173
|
+
|
|
174
|
+
// Add bond (if not first atom and has bonding)
|
|
175
|
+
if (actualBond > 0 && prevAtomIndex !== null) {
|
|
176
|
+
bonds.push({
|
|
177
|
+
from: prevAtomIndex,
|
|
178
|
+
to: atomIndex,
|
|
179
|
+
order: actualBond
|
|
180
|
+
})
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Update state
|
|
184
|
+
state = nextState
|
|
185
|
+
prevAtomIndex = atomIndex
|
|
186
|
+
|
|
187
|
+
if (state === null) {
|
|
188
|
+
i++
|
|
189
|
+
break // No more bonding capacity
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
i++
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Return AST
|
|
197
|
+
return { atoms, bonds, rings }
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Dumps the AST as formatted JSON string
|
|
202
|
+
* @param {string} selfies - The SELFIES string to decode
|
|
203
|
+
* @returns {string} Formatted JSON representation of the AST
|
|
204
|
+
*/
|
|
205
|
+
export function dumpAST(selfies) {
|
|
206
|
+
const ast = decodeToAST(selfies)
|
|
207
|
+
return JSON.stringify(ast, null, 2)
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Processes a branch token and returns updated state
|
|
212
|
+
* @returns {Object} { consumed, state } - tokens consumed and new state
|
|
213
|
+
*/
|
|
214
|
+
export function processBranchToken(token, tokens, i, state, prevAtomIndex, atoms, bonds, rings) {
|
|
215
|
+
const branchInfo = processBranchSymbol(token)
|
|
216
|
+
if (!branchInfo) {
|
|
217
|
+
return { consumed: 1, state }
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if (state <= 1) {
|
|
221
|
+
// Skip branch at X0 or X1
|
|
222
|
+
return { consumed: 1, state }
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
const { order: branchOrder, L } = branchInfo
|
|
226
|
+
const [branchInitState, nextState] = nextBranchState(branchOrder, state)
|
|
227
|
+
|
|
228
|
+
// Read length specifier (Q) - read L tokens
|
|
229
|
+
i++
|
|
230
|
+
if (i >= tokens.length) {
|
|
231
|
+
// Branch at end with no length
|
|
232
|
+
return { consumed: 1, state: nextState }
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
const Q = readIndexFromTokens(tokens, i, L)
|
|
236
|
+
const afterQ = i + Q.consumed
|
|
237
|
+
|
|
238
|
+
// Derive branch
|
|
239
|
+
const branchResult = deriveBranch(
|
|
240
|
+
tokens,
|
|
241
|
+
afterQ,
|
|
242
|
+
Q.value + 1,
|
|
243
|
+
branchInitState,
|
|
244
|
+
prevAtomIndex,
|
|
245
|
+
atoms,
|
|
246
|
+
bonds,
|
|
247
|
+
rings
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
const totalConsumed = 1 + Q.consumed + branchResult.consumed
|
|
251
|
+
return { consumed: totalConsumed, state: nextState }
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Processes a ring token and returns updated state
|
|
256
|
+
* @returns {Object} { consumed, state } - tokens consumed and new state
|
|
257
|
+
*/
|
|
258
|
+
export function processRingToken(token, tokens, i, state, prevAtomIndex, bonds, rings) {
|
|
259
|
+
const ringInfo = processRingSymbol(token)
|
|
260
|
+
if (!ringInfo) {
|
|
261
|
+
return { consumed: 1, state }
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
if (state === 0) {
|
|
265
|
+
// Skip ring at X0
|
|
266
|
+
return { consumed: 1, state }
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
const { order: requestedOrder, L } = ringInfo
|
|
270
|
+
const [bondOrder, nextState] = nextRingState(requestedOrder, state)
|
|
271
|
+
|
|
272
|
+
// Read length specifier (Q) - read L tokens
|
|
273
|
+
i++
|
|
274
|
+
if (i >= tokens.length) {
|
|
275
|
+
// Ring at end - apply as bond to prev atom
|
|
276
|
+
if (prevAtomIndex !== null && bonds.length > 0) {
|
|
277
|
+
// Increase bond order of last bond
|
|
278
|
+
const lastBond = bonds[bonds.length - 1]
|
|
279
|
+
lastBond.order = Math.min(lastBond.order + bondOrder, 3)
|
|
280
|
+
}
|
|
281
|
+
return { consumed: 1, state: nextState }
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
const Q = readIndexFromTokens(tokens, i, L)
|
|
285
|
+
|
|
286
|
+
// Calculate ring closure atom index
|
|
287
|
+
const targetIndex = Math.max(0, prevAtomIndex - (Q.value + 1))
|
|
288
|
+
|
|
289
|
+
// Skip ring to self
|
|
290
|
+
if (targetIndex === prevAtomIndex) {
|
|
291
|
+
return { consumed: 1 + Q.consumed, state: nextState }
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
handleRingClosure(targetIndex, prevAtomIndex, bondOrder, bonds, rings)
|
|
295
|
+
|
|
296
|
+
const totalConsumed = 1 + Q.consumed
|
|
297
|
+
return { consumed: totalConsumed, state: nextState }
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Handles ring closure between two atoms
|
|
302
|
+
*/
|
|
303
|
+
export function handleRingClosure(targetIndex, prevAtomIndex, bondOrder, bonds, rings) {
|
|
304
|
+
// Check if there's already a bond between these atoms
|
|
305
|
+
const existingBond = bonds.find(b =>
|
|
306
|
+
(b.from === targetIndex && b.to === prevAtomIndex) ||
|
|
307
|
+
(b.from === prevAtomIndex && b.to === targetIndex)
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
if (existingBond) {
|
|
311
|
+
// Ring on existing bond - increase bond order
|
|
312
|
+
existingBond.order = Math.min(existingBond.order + bondOrder, 3)
|
|
313
|
+
} else {
|
|
314
|
+
// Check if there's already a ring between these atoms
|
|
315
|
+
const existingRing = rings.find(r =>
|
|
316
|
+
(r.from === targetIndex && r.to === prevAtomIndex) ||
|
|
317
|
+
(r.from === prevAtomIndex && r.to === targetIndex)
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
if (existingRing) {
|
|
321
|
+
// Ring on existing ring - increase ring order
|
|
322
|
+
existingRing.order = Math.min(existingRing.order + bondOrder, 3)
|
|
323
|
+
} else {
|
|
324
|
+
// Add new ring closure
|
|
325
|
+
rings.push({
|
|
326
|
+
from: targetIndex,
|
|
327
|
+
to: prevAtomIndex,
|
|
328
|
+
order: bondOrder
|
|
329
|
+
})
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* Processes an atom token and returns updated state
|
|
336
|
+
* @returns {Object} { consumed, state, prevAtomIndex } - tokens consumed, new state, and atom index
|
|
337
|
+
*/
|
|
338
|
+
export function processAtomToken(content, state, prevAtomIndex, atoms, bonds) {
|
|
339
|
+
const atomInfo = parseAtomSymbol(content)
|
|
340
|
+
|
|
341
|
+
if (!atomInfo) {
|
|
342
|
+
// Invalid atom - return unchanged state
|
|
343
|
+
return { consumed: 1, state, prevAtomIndex }
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
const { element, bondOrder: requestedBond, stereo } = atomInfo
|
|
347
|
+
const capacity = getBondingCapacity(element)
|
|
348
|
+
|
|
349
|
+
// Determine actual bond order and next state
|
|
350
|
+
const [actualBond, nextState] = nextAtomState(requestedBond, capacity, state)
|
|
351
|
+
|
|
352
|
+
// Add atom
|
|
353
|
+
const atomIndex = atoms.length
|
|
354
|
+
atoms.push({ element, capacity, stereo })
|
|
355
|
+
|
|
356
|
+
// Add bond (if not first atom and has bonding)
|
|
357
|
+
if (actualBond > 0 && prevAtomIndex !== null) {
|
|
358
|
+
bonds.push({
|
|
359
|
+
from: prevAtomIndex,
|
|
360
|
+
to: atomIndex,
|
|
361
|
+
order: actualBond
|
|
362
|
+
})
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
return { consumed: 1, state: nextState, prevAtomIndex: atomIndex }
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/**
|
|
369
|
+
* Reads an index value from SELFIES tokens
|
|
370
|
+
* Returns {value, consumed} where consumed is number of tokens used
|
|
371
|
+
* @param {number} numTokens - Number of tokens to read (from Branch/Ring L value)
|
|
372
|
+
*/
|
|
373
|
+
export function readIndexFromTokens(tokens, startIndex, numTokens = 1) {
|
|
374
|
+
if (startIndex >= tokens.length) {
|
|
375
|
+
return { value: 0, consumed: 0 }
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
const symbols = []
|
|
379
|
+
let i = startIndex
|
|
380
|
+
|
|
381
|
+
// Read exactly numTokens tokens
|
|
382
|
+
while (i < tokens.length && symbols.length < numTokens) {
|
|
383
|
+
const symbol = tokens[i]
|
|
384
|
+
if (INDEX_CODE.hasOwnProperty(symbol)) {
|
|
385
|
+
symbols.push(symbol)
|
|
386
|
+
i++
|
|
387
|
+
} else {
|
|
388
|
+
// If we encounter a non-index token, treat as None (0 value)
|
|
389
|
+
symbols.push(null)
|
|
390
|
+
i++
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
if (symbols.length === 0) {
|
|
395
|
+
return { value: 0, consumed: 0 }
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// Calculate index value using getIndexFromSelfies
|
|
399
|
+
const value = getIndexFromSelfies(symbols)
|
|
400
|
+
return { value, consumed: symbols.length }
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
/**
|
|
404
|
+
* Derives a branch subtree
|
|
405
|
+
*/
|
|
406
|
+
export function deriveBranch(tokens, startIndex, maxDerive, initState, rootAtom, atoms, bonds, rings) {
|
|
407
|
+
let state = initState
|
|
408
|
+
let prevAtomIndex = rootAtom
|
|
409
|
+
let consumed = 0
|
|
410
|
+
let derived = 0
|
|
411
|
+
|
|
412
|
+
while (consumed < tokens.length - startIndex && derived < maxDerive) {
|
|
413
|
+
if (state === null || state === 0) break
|
|
414
|
+
|
|
415
|
+
const token = tokens[startIndex + consumed]
|
|
416
|
+
const content = token.slice(1, -1)
|
|
417
|
+
|
|
418
|
+
// Skip structural tokens in branch
|
|
419
|
+
if (content.includes('Branch') || content.includes('Ring') ||
|
|
420
|
+
content.includes('ch') || content.includes('ng')) {
|
|
421
|
+
consumed++
|
|
422
|
+
continue
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
let atomInfo
|
|
426
|
+
try {
|
|
427
|
+
atomInfo = parseAtomSymbol(content)
|
|
428
|
+
} catch (error) {
|
|
429
|
+
throw new Error(`Invalid branch atom [${content}]: ${error.message}`)
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
if (!atomInfo) {
|
|
433
|
+
consumed++
|
|
434
|
+
continue
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
const { element, bondOrder: requestedBond, stereo } = atomInfo
|
|
438
|
+
const capacity = getBondingCapacity(element)
|
|
439
|
+
const [actualBond, nextState] = nextAtomState(requestedBond, capacity, state)
|
|
440
|
+
|
|
441
|
+
// Add atom
|
|
442
|
+
const atomIndex = atoms.length
|
|
443
|
+
atoms.push({ element, capacity, stereo })
|
|
444
|
+
|
|
445
|
+
// Add bond
|
|
446
|
+
if (actualBond > 0 && prevAtomIndex !== null) {
|
|
447
|
+
bonds.push({
|
|
448
|
+
from: prevAtomIndex,
|
|
449
|
+
to: atomIndex,
|
|
450
|
+
order: actualBond
|
|
451
|
+
})
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// Update state
|
|
455
|
+
state = nextState
|
|
456
|
+
prevAtomIndex = atomIndex
|
|
457
|
+
derived++
|
|
458
|
+
consumed++
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
return { consumed, derived }
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
/**
|
|
465
|
+
* Parses an atom symbol and extracts element, bond order, and stereo
|
|
466
|
+
*/
|
|
467
|
+
export function parseAtomSymbol(content) {
|
|
468
|
+
let bondOrder = 1
|
|
469
|
+
let element = content
|
|
470
|
+
let stereo = null
|
|
471
|
+
|
|
472
|
+
if (content.startsWith('=')) {
|
|
473
|
+
bondOrder = 2
|
|
474
|
+
element = content.slice(1)
|
|
475
|
+
} else if (content.startsWith('#')) {
|
|
476
|
+
bondOrder = 3
|
|
477
|
+
element = content.slice(1)
|
|
478
|
+
} else if (content.startsWith('/') || content.startsWith('\\')) {
|
|
479
|
+
element = content.slice(1)
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
// Check for stereo notation: C@, C@@, C@H, etc.
|
|
483
|
+
if (element.includes('@')) {
|
|
484
|
+
stereo = element
|
|
485
|
+
// Extract base element (everything before @)
|
|
486
|
+
const match = element.match(/^([A-Z][a-z]?)/)
|
|
487
|
+
if (match) {
|
|
488
|
+
element = match[1]
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// Check if it's a valid element
|
|
493
|
+
const validElements = ['C', 'N', 'O', 'S', 'P', 'F', 'Cl', 'Br', 'I', 'B', 'H',
|
|
494
|
+
'Si', 'As', 'Se', 'Te', 'Al', 'Ga', 'Ge', 'Sn', 'Pb',
|
|
495
|
+
'Li', 'Na', 'K', 'Mg', 'Ca', 'Zn', 'Fe', 'Cu', 'Ni', 'Co',
|
|
496
|
+
'Mn', 'Cr', 'V', 'Ti', 'Sc']
|
|
497
|
+
if (!validElements.includes(element)) {
|
|
498
|
+
return null
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
return { element, bondOrder, stereo }
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* Assigns ring numbers to ring closures
|
|
506
|
+
*/
|
|
507
|
+
export function assignRingNumbers(rings) {
|
|
508
|
+
const ringNumbers = new Map()
|
|
509
|
+
let nextRingNum = 1
|
|
510
|
+
|
|
511
|
+
for (const ring of rings) {
|
|
512
|
+
if (!ringNumbers.has(`${ring.from}-${ring.to}`)) {
|
|
513
|
+
ringNumbers.set(`${ring.from}-${ring.to}`, nextRingNum)
|
|
514
|
+
ringNumbers.set(`${ring.to}-${ring.from}`, nextRingNum)
|
|
515
|
+
nextRingNum++
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
return ringNumbers
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
/**
|
|
523
|
+
* Builds adjacency list from bonds
|
|
524
|
+
*/
|
|
525
|
+
export function buildAdjacencyList(atoms, bonds) {
|
|
526
|
+
const adj = new Map()
|
|
527
|
+
for (let i = 0; i < atoms.length; i++) {
|
|
528
|
+
adj.set(i, [])
|
|
529
|
+
}
|
|
530
|
+
for (const bond of bonds) {
|
|
531
|
+
adj.get(bond.from).push({ to: bond.to, order: bond.order })
|
|
532
|
+
adj.get(bond.to).push({ to: bond.from, order: bond.order })
|
|
533
|
+
}
|
|
534
|
+
return adj
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
/**
|
|
538
|
+
* Writes bond symbol to SMILES array
|
|
539
|
+
*/
|
|
540
|
+
export function writeBondSymbol(bondOrder, smiles) {
|
|
541
|
+
if (bondOrder === 2) smiles.push('=')
|
|
542
|
+
if (bondOrder === 3) smiles.push('#')
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
/**
|
|
546
|
+
* Writes ring closures for an atom
|
|
547
|
+
*/
|
|
548
|
+
export function writeRingClosures(atomIndex, rings, ringNumbers, visited, smiles) {
|
|
549
|
+
for (const ring of rings) {
|
|
550
|
+
const isFrom = ring.from === atomIndex
|
|
551
|
+
const isTo = ring.to === atomIndex
|
|
552
|
+
|
|
553
|
+
if (isFrom && visited.has(ring.to)) {
|
|
554
|
+
// Closing ring: we've visited the other end
|
|
555
|
+
const ringNum = ringNumbers.get(`${atomIndex}-${ring.to}`)
|
|
556
|
+
writeBondSymbol(ring.order, smiles)
|
|
557
|
+
smiles.push(ringNum.toString())
|
|
558
|
+
} else if (isTo && visited.has(ring.from)) {
|
|
559
|
+
// Closing ring (other direction)
|
|
560
|
+
const ringNum = ringNumbers.get(`${ring.from}-${atomIndex}`)
|
|
561
|
+
writeBondSymbol(ring.order, smiles)
|
|
562
|
+
smiles.push(ringNum.toString())
|
|
563
|
+
} else if ((isFrom && !visited.has(ring.to)) || (isTo && !visited.has(ring.from))) {
|
|
564
|
+
// Opening ring: we haven't visited the other end yet
|
|
565
|
+
const ringNum = isFrom ?
|
|
566
|
+
ringNumbers.get(`${atomIndex}-${ring.to}`) :
|
|
567
|
+
ringNumbers.get(`${ring.from}-${atomIndex}`)
|
|
568
|
+
writeBondSymbol(ring.order, smiles)
|
|
569
|
+
smiles.push(ringNum.toString())
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
/**
|
|
575
|
+
* Writes atom symbol to SMILES array
|
|
576
|
+
*/
|
|
577
|
+
export function writeAtomSymbol(atom, smiles) {
|
|
578
|
+
if (atom.stereo) {
|
|
579
|
+
smiles.push(`[${atom.stereo}]`)
|
|
580
|
+
} else {
|
|
581
|
+
smiles.push(atom.element)
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
/**
|
|
586
|
+
* Builds SMILES string from atom/bond/ring structure
|
|
587
|
+
*/
|
|
588
|
+
function buildSmiles(atoms, bonds, rings) {
|
|
589
|
+
if (atoms.length === 0) return ''
|
|
590
|
+
|
|
591
|
+
const smiles = []
|
|
592
|
+
const visited = new Set()
|
|
593
|
+
const ringNumbers = assignRingNumbers(rings)
|
|
594
|
+
const adj = buildAdjacencyList(atoms, bonds)
|
|
595
|
+
|
|
596
|
+
// DFS to build SMILES
|
|
597
|
+
function dfs(atomIndex, parentIndex = null) {
|
|
598
|
+
if (visited.has(atomIndex)) return
|
|
599
|
+
|
|
600
|
+
visited.add(atomIndex)
|
|
601
|
+
const atom = atoms[atomIndex]
|
|
602
|
+
|
|
603
|
+
// Write atom (with stereo if present)
|
|
604
|
+
writeAtomSymbol(atom, smiles)
|
|
605
|
+
|
|
606
|
+
// Write ring closures for this atom
|
|
607
|
+
writeRingClosures(atomIndex, rings, ringNumbers, visited, smiles)
|
|
608
|
+
|
|
609
|
+
// Visit neighbors
|
|
610
|
+
const neighbors = adj.get(atomIndex) || []
|
|
611
|
+
const unvisited = neighbors.filter(n => !visited.has(n.to) && n.to !== parentIndex)
|
|
612
|
+
|
|
613
|
+
for (let i = 0; i < unvisited.length; i++) {
|
|
614
|
+
const neighbor = unvisited[i]
|
|
615
|
+
|
|
616
|
+
// Branch notation for multiple neighbors
|
|
617
|
+
// Last branch (continuation) gets no parentheses, earlier branches do
|
|
618
|
+
if (i < unvisited.length - 1) {
|
|
619
|
+
smiles.push('(')
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
// Bond order
|
|
623
|
+
writeBondSymbol(neighbor.order, smiles)
|
|
624
|
+
|
|
625
|
+
dfs(neighbor.to, atomIndex)
|
|
626
|
+
|
|
627
|
+
if (i < unvisited.length - 1) {
|
|
628
|
+
smiles.push(')')
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
dfs(0)
|
|
634
|
+
|
|
635
|
+
return smiles.join('')
|
|
636
|
+
}
|