selfies-js 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +274 -0
- package/package.json +65 -0
- package/src/alphabet.js +150 -0
- package/src/alphabet.test.js +82 -0
- package/src/chemistryValidator.js +236 -0
- package/src/cli.js +206 -0
- package/src/constraints.js +186 -0
- package/src/constraints.test.js +126 -0
- package/src/decoder.js +636 -0
- package/src/decoder.test.js +560 -0
- package/src/dsl/analyzer.js +170 -0
- package/src/dsl/analyzer.test.js +139 -0
- package/src/dsl/dsl.test.js +146 -0
- package/src/dsl/importer.js +238 -0
- package/src/dsl/index.js +32 -0
- package/src/dsl/lexer.js +264 -0
- package/src/dsl/lexer.test.js +115 -0
- package/src/dsl/parser.js +201 -0
- package/src/dsl/parser.test.js +148 -0
- package/src/dsl/resolver.js +136 -0
- package/src/dsl/resolver.test.js +99 -0
- package/src/dsl/symbolTable.js +56 -0
- package/src/dsl/symbolTable.test.js +68 -0
- package/src/dsl/valenceValidator.js +147 -0
- package/src/encoder.js +467 -0
- package/src/encoder.test.js +61 -0
- package/src/errors.js +79 -0
- package/src/errors.test.js +91 -0
- package/src/grammar_rules.js +146 -0
- package/src/index.js +70 -0
- package/src/parser.js +96 -0
- package/src/parser.test.js +96 -0
- package/src/properties/atoms.js +69 -0
- package/src/properties/atoms.test.js +116 -0
- package/src/properties/formula.js +111 -0
- package/src/properties/formula.test.js +95 -0
- package/src/properties/molecularWeight.js +80 -0
- package/src/properties/molecularWeight.test.js +84 -0
- package/src/properties/properties.test.js +77 -0
- package/src/renderers/README.md +127 -0
- package/src/renderers/svg.js +113 -0
- package/src/renderers/svg.test.js +42 -0
- package/src/syntax.js +641 -0
- package/src/syntax.test.js +363 -0
- package/src/tokenizer.js +99 -0
- package/src/tokenizer.test.js +55 -0
- package/src/validator.js +70 -0
- package/src/validator.test.js +44 -0
package/src/encoder.js
ADDED
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Encoder - Converts SMILES strings to SELFIES
|
|
3
|
+
*
|
|
4
|
+
* NOTE: This is post-MVP functionality. The encoder is more complex than
|
|
5
|
+
* the decoder as it requires parsing SMILES and making decisions about
|
|
6
|
+
* how to represent branches and rings in SELFIES format.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { getSelfiesFromIndex } from './grammar_rules.js'
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Encodes a SMILES string to SELFIES
|
|
13
|
+
* @param {string} smiles - The SMILES string to encode
|
|
14
|
+
* @returns {string} SELFIES representation
|
|
15
|
+
* @throws {EncodeError} If the SMILES string is invalid
|
|
16
|
+
*
|
|
17
|
+
* Example:
|
|
18
|
+
* encode('CCO') // => '[C][C][O]'
|
|
19
|
+
* encode('c1ccccc1') // => '[C][=C][C][=C][C][=C][Ring1][=Branch1]'
|
|
20
|
+
*/
|
|
21
|
+
export function encode(smiles) {
|
|
22
|
+
validateSmiles(smiles)
|
|
23
|
+
|
|
24
|
+
const state = createEncoderState()
|
|
25
|
+
let i = 0
|
|
26
|
+
|
|
27
|
+
while (i < smiles.length) {
|
|
28
|
+
const char = smiles[i]
|
|
29
|
+
|
|
30
|
+
if (isDigit(char)) {
|
|
31
|
+
i = handleRingClosure(smiles, i, state)
|
|
32
|
+
} else if (char === '(') {
|
|
33
|
+
i = handleBranch(smiles, i, state)
|
|
34
|
+
} else if (char === ')') {
|
|
35
|
+
throw new Error('Unexpected closing parenthesis')
|
|
36
|
+
} else if (char === '=') {
|
|
37
|
+
i = handleDoubleBond(smiles, i, state)
|
|
38
|
+
} else if (char === '#') {
|
|
39
|
+
i = handleTripleBond(smiles, i, state)
|
|
40
|
+
} else if (char === '/' || char === '\\') {
|
|
41
|
+
i = handleStereoBond(smiles, i, state)
|
|
42
|
+
} else if (char === '[') {
|
|
43
|
+
i = handleBracketAtom(smiles, i, state)
|
|
44
|
+
} else if (isUpperCase(char)) {
|
|
45
|
+
i = handleAliphaticAtom(smiles, i, state)
|
|
46
|
+
} else if (isLowerCase(char)) {
|
|
47
|
+
i = handleAromaticAtom(smiles, i, state)
|
|
48
|
+
} else {
|
|
49
|
+
throw new Error(`Invalid SMILES character: ${char}`)
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return state.tokens.join('')
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Validates SMILES string input
|
|
58
|
+
* @param {string} smiles - SMILES string to validate
|
|
59
|
+
* @throws {Error} If SMILES is empty or null
|
|
60
|
+
*/
|
|
61
|
+
function validateSmiles(smiles) {
|
|
62
|
+
if (!smiles || smiles.length === 0) {
|
|
63
|
+
throw new Error('Empty SMILES string')
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Creates initial encoder state
|
|
69
|
+
* @returns {Object} Encoder state object
|
|
70
|
+
*/
|
|
71
|
+
function createEncoderState() {
|
|
72
|
+
return {
|
|
73
|
+
tokens: [],
|
|
74
|
+
ringClosures: new Map(),
|
|
75
|
+
aromaticCounter: 0,
|
|
76
|
+
pendingStereoBond: null // Tracks / or \ for next bond
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Handles ring closure digits in SMILES
|
|
82
|
+
* @param {string} smiles - SMILES string
|
|
83
|
+
* @param {number} index - Current position
|
|
84
|
+
* @param {Object} state - Encoder state
|
|
85
|
+
* @returns {number} New position
|
|
86
|
+
*/
|
|
87
|
+
function handleRingClosure(smiles, index, state) {
|
|
88
|
+
const ringNum = parseInt(smiles[index])
|
|
89
|
+
|
|
90
|
+
if (state.ringClosures.has(ringNum)) {
|
|
91
|
+
closeRing(ringNum, state)
|
|
92
|
+
} else {
|
|
93
|
+
openRing(ringNum, state)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return index + 1
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Opens a new ring at the current position
|
|
101
|
+
* @param {number} ringNum - Ring number
|
|
102
|
+
* @param {Object} state - Encoder state
|
|
103
|
+
*/
|
|
104
|
+
function openRing(ringNum, state) {
|
|
105
|
+
state.ringClosures.set(ringNum, state.tokens.length - 1)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Closes an existing ring
|
|
110
|
+
* @param {number} ringNum - Ring number
|
|
111
|
+
* @param {Object} state - Encoder state
|
|
112
|
+
*/
|
|
113
|
+
function closeRing(ringNum, state) {
|
|
114
|
+
const ringStartPos = state.ringClosures.get(ringNum)
|
|
115
|
+
const atomsInBetween = state.tokens.length - ringStartPos - 1
|
|
116
|
+
|
|
117
|
+
// Add ring token with stereochemistry if present
|
|
118
|
+
if (state.pendingStereoBond) {
|
|
119
|
+
state.tokens.push(`[${state.pendingStereoBond}Ring1]`)
|
|
120
|
+
state.pendingStereoBond = null
|
|
121
|
+
} else {
|
|
122
|
+
state.tokens.push('[Ring1]')
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// For decoder formula: targetIndex = prevAtomIndex - (Q.value + 1)
|
|
126
|
+
// We want: Q.value + 1 = atomsInBetween, so Q.value = atomsInBetween - 1
|
|
127
|
+
state.tokens.push(getLengthToken(atomsInBetween - 1))
|
|
128
|
+
state.ringClosures.delete(ringNum)
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Handles branch notation in SMILES
|
|
133
|
+
* @param {string} smiles - SMILES string
|
|
134
|
+
* @param {number} index - Current position
|
|
135
|
+
* @param {Object} state - Encoder state
|
|
136
|
+
* @returns {number} New position
|
|
137
|
+
*/
|
|
138
|
+
function handleBranch(smiles, index, state) {
|
|
139
|
+
const { content, endIndex } = extractBranchContent(smiles, index)
|
|
140
|
+
const branchTokens = encode(content)
|
|
141
|
+
const branchSymbolCount = countSelfiesSymbols(branchTokens)
|
|
142
|
+
|
|
143
|
+
// Determine branch bond order from first character of branch content
|
|
144
|
+
// If branch starts with = or #, use bonded branch symbol
|
|
145
|
+
let branchSymbol = '[Branch1]'
|
|
146
|
+
if (content.startsWith('=')) {
|
|
147
|
+
branchSymbol = '[=Branch1]'
|
|
148
|
+
} else if (content.startsWith('#')) {
|
|
149
|
+
branchSymbol = '[#Branch1]'
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
state.tokens.push(branchSymbol)
|
|
153
|
+
// For decoder formula: reads Q.value + 1 atoms from branch
|
|
154
|
+
// We want: Q.value + 1 = branchSymbolCount, so Q.value = branchSymbolCount - 1
|
|
155
|
+
state.tokens.push(getLengthToken(branchSymbolCount - 1))
|
|
156
|
+
state.tokens.push(branchTokens)
|
|
157
|
+
|
|
158
|
+
return endIndex + 1
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Extracts content between matching parentheses
|
|
163
|
+
* @param {string} smiles - SMILES string
|
|
164
|
+
* @param {number} startIndex - Position of opening parenthesis
|
|
165
|
+
* @returns {Object} Branch content and end index
|
|
166
|
+
*/
|
|
167
|
+
function extractBranchContent(smiles, startIndex) {
|
|
168
|
+
const branchStart = startIndex + 1
|
|
169
|
+
let depth = 1
|
|
170
|
+
let branchEnd = branchStart
|
|
171
|
+
|
|
172
|
+
while (branchEnd < smiles.length && depth > 0) {
|
|
173
|
+
if (smiles[branchEnd] === '(') depth++
|
|
174
|
+
if (smiles[branchEnd] === ')') depth--
|
|
175
|
+
if (depth > 0) branchEnd++
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (depth !== 0) {
|
|
179
|
+
throw new Error('Unmatched parenthesis in SMILES')
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return {
|
|
183
|
+
content: smiles.substring(branchStart, branchEnd),
|
|
184
|
+
endIndex: branchEnd
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Handles double bond notation
|
|
190
|
+
* @param {string} smiles - SMILES string
|
|
191
|
+
* @param {number} index - Current position
|
|
192
|
+
* @param {Object} state - Encoder state
|
|
193
|
+
* @returns {number} New position
|
|
194
|
+
*/
|
|
195
|
+
function handleDoubleBond(smiles, index, state) {
|
|
196
|
+
return handleBondedAtom(smiles, index, state, '=')
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Handles triple bond notation
|
|
201
|
+
* @param {string} smiles - SMILES string
|
|
202
|
+
* @param {number} index - Current position
|
|
203
|
+
* @param {Object} state - Encoder state
|
|
204
|
+
* @returns {number} New position
|
|
205
|
+
*/
|
|
206
|
+
function handleTripleBond(smiles, index, state) {
|
|
207
|
+
return handleBondedAtom(smiles, index, state, '#')
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Handles stereochemistry bond notation (/ and \)
|
|
212
|
+
* @param {string} smiles - SMILES string
|
|
213
|
+
* @param {number} index - Current position
|
|
214
|
+
* @param {Object} state - Encoder state
|
|
215
|
+
* @returns {number} New position
|
|
216
|
+
*/
|
|
217
|
+
function handleStereoBond(smiles, index, state) {
|
|
218
|
+
const stereoChar = smiles[index]
|
|
219
|
+
|
|
220
|
+
// Store the stereochemistry marker to be used with the next ring closure
|
|
221
|
+
state.pendingStereoBond = stereoChar === '/' ? '-/' : '\\/'
|
|
222
|
+
|
|
223
|
+
return index + 1
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Handles atoms with bond prefix (= or #)
|
|
228
|
+
* @param {string} smiles - SMILES string
|
|
229
|
+
* @param {number} index - Current position
|
|
230
|
+
* @param {Object} state - Encoder state
|
|
231
|
+
* @param {string} bondSymbol - Bond symbol (= or #)
|
|
232
|
+
* @returns {number} New position
|
|
233
|
+
*/
|
|
234
|
+
function handleBondedAtom(smiles, index, state, bondSymbol) {
|
|
235
|
+
const nextIndex = index + 1
|
|
236
|
+
|
|
237
|
+
if (nextIndex >= smiles.length) {
|
|
238
|
+
throw new Error('Invalid SMILES: bond symbol at end')
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
const nextChar = smiles[nextIndex]
|
|
242
|
+
|
|
243
|
+
if (isUpperCase(nextChar)) {
|
|
244
|
+
return handleBondedUppercaseAtom(smiles, nextIndex, state, bondSymbol)
|
|
245
|
+
} else if (isLowerCase(nextChar)) {
|
|
246
|
+
state.tokens.push(`[${bondSymbol}${nextChar.toUpperCase()}]`)
|
|
247
|
+
return nextIndex + 1
|
|
248
|
+
} else {
|
|
249
|
+
throw new Error(`Invalid SMILES: unexpected character after ${bondSymbol}: ${nextChar}`)
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* Handles bonded uppercase atoms (checking for two-letter elements)
|
|
255
|
+
* @param {string} smiles - SMILES string
|
|
256
|
+
* @param {number} index - Current position at the atom
|
|
257
|
+
* @param {Object} state - Encoder state
|
|
258
|
+
* @param {string} bondSymbol - Bond symbol
|
|
259
|
+
* @returns {number} New position
|
|
260
|
+
*/
|
|
261
|
+
function handleBondedUppercaseAtom(smiles, index, state, bondSymbol) {
|
|
262
|
+
const char = smiles[index]
|
|
263
|
+
|
|
264
|
+
if (index + 1 < smiles.length && isLowerCase(smiles[index + 1])) {
|
|
265
|
+
state.tokens.push(`[${bondSymbol}${char}${smiles[index + 1]}]`)
|
|
266
|
+
return index + 2
|
|
267
|
+
} else {
|
|
268
|
+
state.tokens.push(`[${bondSymbol}${char}]`)
|
|
269
|
+
return index + 1
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Handles bracket atom notation like [nH], [NH2+], etc.
|
|
275
|
+
* @param {string} smiles - SMILES string
|
|
276
|
+
* @param {number} index - Current position
|
|
277
|
+
* @param {Object} state - Encoder state
|
|
278
|
+
* @returns {number} New position
|
|
279
|
+
*/
|
|
280
|
+
function handleBracketAtom(smiles, index, state) {
|
|
281
|
+
const closeBracket = smiles.indexOf(']', index)
|
|
282
|
+
|
|
283
|
+
if (closeBracket === -1) {
|
|
284
|
+
throw new Error('Invalid SMILES: unclosed bracket atom')
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
const bracketContent = smiles.substring(index + 1, closeBracket)
|
|
288
|
+
const element = extractElementFromBracket(bracketContent)
|
|
289
|
+
|
|
290
|
+
state.tokens.push(`[${element}]`)
|
|
291
|
+
return closeBracket + 1
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Extracts element symbol from bracket notation
|
|
296
|
+
* @param {string} bracketContent - Content inside brackets
|
|
297
|
+
* @returns {string} Element symbol
|
|
298
|
+
*/
|
|
299
|
+
function extractElementFromBracket(bracketContent) {
|
|
300
|
+
let elementMatch = bracketContent.match(/^([A-Z][a-z]?)/)
|
|
301
|
+
|
|
302
|
+
if (!elementMatch) {
|
|
303
|
+
elementMatch = bracketContent.match(/^([a-z]+)/)
|
|
304
|
+
if (elementMatch) {
|
|
305
|
+
return elementMatch[1].charAt(0).toUpperCase() + elementMatch[1].slice(1)
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
if (elementMatch) {
|
|
310
|
+
return elementMatch[1]
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
const firstLetter = bracketContent.match(/[A-Za-z]/)
|
|
314
|
+
if (firstLetter) {
|
|
315
|
+
return firstLetter[0].toUpperCase()
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
return 'C'
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Handles aliphatic (uppercase) atoms
|
|
323
|
+
* @param {string} smiles - SMILES string
|
|
324
|
+
* @param {number} index - Current position
|
|
325
|
+
* @param {Object} state - Encoder state
|
|
326
|
+
* @returns {number} New position
|
|
327
|
+
*/
|
|
328
|
+
function handleAliphaticAtom(smiles, index, state) {
|
|
329
|
+
const char = smiles[index]
|
|
330
|
+
|
|
331
|
+
// Check if next character forms a two-letter element
|
|
332
|
+
if (index + 1 < smiles.length && isLowerCase(smiles[index + 1])) {
|
|
333
|
+
const twoLetter = char + smiles[index + 1]
|
|
334
|
+
// Valid two-letter elements: Cl, Br, Si, etc.
|
|
335
|
+
// NOT Cc, Cn, Co, etc. which are separate atoms
|
|
336
|
+
if (isTwoLetterElement(twoLetter)) {
|
|
337
|
+
state.tokens.push(`[${twoLetter}]`)
|
|
338
|
+
return index + 2
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// Single-letter element
|
|
343
|
+
state.tokens.push(`[${char}]`)
|
|
344
|
+
return index + 1
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Handles aromatic (lowercase) atoms
|
|
349
|
+
* @param {string} smiles - SMILES string
|
|
350
|
+
* @param {number} index - Current position
|
|
351
|
+
* @param {Object} state - Encoder state
|
|
352
|
+
* @returns {number} New position
|
|
353
|
+
*/
|
|
354
|
+
function handleAromaticAtom(smiles, index, state) {
|
|
355
|
+
const element = smiles[index].toUpperCase()
|
|
356
|
+
|
|
357
|
+
if (state.aromaticCounter % 2 === 0) {
|
|
358
|
+
state.tokens.push(`[${element}]`)
|
|
359
|
+
} else {
|
|
360
|
+
state.tokens.push(`[=${element}]`)
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
state.aromaticCounter++
|
|
364
|
+
return index + 1
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
/**
|
|
368
|
+
* Counts SELFIES symbols in a string
|
|
369
|
+
* @param {string} selfiesString - SELFIES string
|
|
370
|
+
* @returns {number} Number of symbols (opening brackets)
|
|
371
|
+
*/
|
|
372
|
+
function countSelfiesSymbols(selfiesString) {
|
|
373
|
+
let count = 0
|
|
374
|
+
for (let i = 0; i < selfiesString.length; i++) {
|
|
375
|
+
if (selfiesString[i] === '[') count++
|
|
376
|
+
}
|
|
377
|
+
return count
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* Generates length token for branch/ring notation
|
|
382
|
+
* Uses INDEX_ALPHABET from grammar_rules.js for consistency with decoder
|
|
383
|
+
* @param {number} length - Length value (0-indexed, matching atom count)
|
|
384
|
+
* @returns {string} Length token in SELFIES format
|
|
385
|
+
* @throws {Error} If length is out of range
|
|
386
|
+
*/
|
|
387
|
+
function getLengthToken(length) {
|
|
388
|
+
// Use getSelfiesFromIndex to convert length to SELFIES symbols
|
|
389
|
+
// This ensures encoder and decoder use the same INDEX_ALPHABET
|
|
390
|
+
const symbols = getSelfiesFromIndex(length)
|
|
391
|
+
|
|
392
|
+
// For single symbol, return it directly
|
|
393
|
+
if (symbols.length === 1) {
|
|
394
|
+
return symbols[0]
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// For multiple symbols, join them (for large indices requiring multiple tokens)
|
|
398
|
+
return symbols.join('')
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
/**
|
|
402
|
+
* Checks if a two-character string is a valid two-letter element symbol
|
|
403
|
+
* @param {string} symbol - Two-character string to check
|
|
404
|
+
* @returns {boolean} True if valid two-letter element
|
|
405
|
+
*/
|
|
406
|
+
function isTwoLetterElement(symbol) {
|
|
407
|
+
const twoLetterElements = new Set([
|
|
408
|
+
'Cl', 'Br', 'Si', 'Se', 'As', 'Al', 'Ca', 'Mg', 'Na', 'He',
|
|
409
|
+
'Li', 'Be', 'Ne', 'Ar', 'Kr', 'Xe', 'Rn', 'Sc', 'Ti', 'Cr',
|
|
410
|
+
'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'Sr', 'Zr',
|
|
411
|
+
'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn',
|
|
412
|
+
'Sb', 'Te', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu',
|
|
413
|
+
'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta',
|
|
414
|
+
'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po',
|
|
415
|
+
'At', 'Ra', 'Ac', 'Th', 'Pa', 'Np', 'Pu', 'Am', 'Cm', 'Bk',
|
|
416
|
+
'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh',
|
|
417
|
+
'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og'
|
|
418
|
+
])
|
|
419
|
+
return twoLetterElements.has(symbol)
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
/**
|
|
423
|
+
* Checks if character is uppercase letter
|
|
424
|
+
* @param {string} char - Character to check
|
|
425
|
+
* @returns {boolean} True if uppercase
|
|
426
|
+
*/
|
|
427
|
+
function isUpperCase(char) {
|
|
428
|
+
return char >= 'A' && char <= 'Z'
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
/**
|
|
432
|
+
* Checks if character is lowercase letter
|
|
433
|
+
* @param {string} char - Character to check
|
|
434
|
+
* @returns {boolean} True if lowercase
|
|
435
|
+
*/
|
|
436
|
+
function isLowerCase(char) {
|
|
437
|
+
return char >= 'a' && char <= 'z'
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
/**
|
|
441
|
+
* Checks if character is a digit
|
|
442
|
+
* @param {string} char - Character to check
|
|
443
|
+
* @returns {boolean} True if digit
|
|
444
|
+
*/
|
|
445
|
+
function isDigit(char) {
|
|
446
|
+
return char >= '0' && char <= '9'
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
/**
|
|
450
|
+
* Parses a SMILES string into a molecule graph
|
|
451
|
+
* @param {string} smiles - SMILES string
|
|
452
|
+
* @returns {Object} Molecule graph structure
|
|
453
|
+
*/
|
|
454
|
+
function parseSmiles(smiles) {
|
|
455
|
+
// TODO: Implement SMILES parser (POST-MVP)
|
|
456
|
+
throw new Error('Not implemented - encoder is post-MVP')
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
/**
|
|
460
|
+
* Converts aromatic bonds to explicit single/double bonds (Kekulization)
|
|
461
|
+
* @param {Object} graph - Molecule graph
|
|
462
|
+
* @returns {Object} Kekulized graph
|
|
463
|
+
*/
|
|
464
|
+
function kekulize(graph) {
|
|
465
|
+
// TODO: Implement Kekulization (POST-MVP)
|
|
466
|
+
throw new Error('Not implemented - encoder is post-MVP')
|
|
467
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for SMILES encoding
|
|
3
|
+
* NOTE: Encoder is post-MVP, these tests are placeholders
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { describe, test, expect } from 'bun:test'
|
|
7
|
+
import { encode } from '../src/encoder.js'
|
|
8
|
+
|
|
9
|
+
describe('encode', () => {
|
|
10
|
+
// TODO: Basic molecules (POST-MVP)
|
|
11
|
+
test('encodes methane', () => {
|
|
12
|
+
expect(encode('C')).toBe('[C]')
|
|
13
|
+
})
|
|
14
|
+
|
|
15
|
+
test('encodes ethane', () => {
|
|
16
|
+
expect(encode('CC')).toBe('[C][C]')
|
|
17
|
+
})
|
|
18
|
+
|
|
19
|
+
test('encodes ethanol', () => {
|
|
20
|
+
expect(encode('CCO')).toBe('[C][C][O]')
|
|
21
|
+
})
|
|
22
|
+
|
|
23
|
+
// TODO: Bond orders (POST-MVP)
|
|
24
|
+
test('encodes ethene', () => {
|
|
25
|
+
expect(encode('C=C')).toBe('[C][=C]')
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
test('encodes acetylene', () => {
|
|
29
|
+
expect(encode('C#C')).toBe('[C][#C]')
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
// TODO: Branches (POST-MVP)
|
|
33
|
+
test('encodes isobutane', () => {
|
|
34
|
+
expect(encode('CC(C)C')).toBe('[C][C][Branch1][C][C][C]')
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
// TODO: Rings (POST-MVP)
|
|
38
|
+
test('encodes benzene', () => {
|
|
39
|
+
// Note: Using lowercase aromatic SMILES for benzene
|
|
40
|
+
// Fixed: Now uses [=Branch1] (Q=4) instead of [=N] (Q=11)
|
|
41
|
+
expect(encode('c1ccccc1')).toBe('[C][=C][C][=C][C][=C][Ring1][=Branch1]')
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
test('encodes cyclopropane', () => {
|
|
45
|
+
// Fixed: Now uses [Ring1] (Q=1) instead of [=C] (Q=12)
|
|
46
|
+
expect(encode('C1CC1')).toBe('[C][C][C][Ring1][Ring1]')
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
// Error cases
|
|
50
|
+
test('throws on empty SMILES', () => {
|
|
51
|
+
expect(() => encode('')).toThrow('Empty SMILES string')
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
test('throws on unmatched parenthesis', () => {
|
|
55
|
+
expect(() => encode('C(C')).toThrow('Unmatched parenthesis')
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
test('throws on invalid bond at end', () => {
|
|
59
|
+
expect(() => encode('CC=')).toThrow('bond symbol at end')
|
|
60
|
+
})
|
|
61
|
+
})
|
package/src/errors.js
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Errors - Custom error types for selfies-js
|
|
3
|
+
*
|
|
4
|
+
* Defines specific error classes for different failure modes.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Base error class for selfies-js errors
|
|
9
|
+
*/
|
|
10
|
+
export class SelfiesError extends Error {
|
|
11
|
+
constructor(message) {
|
|
12
|
+
super(message)
|
|
13
|
+
this.name = 'SelfiesError'
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Error thrown when decoding fails
|
|
19
|
+
*/
|
|
20
|
+
export class DecodeError extends SelfiesError {
|
|
21
|
+
constructor(message, token = null, position = null) {
|
|
22
|
+
super(message)
|
|
23
|
+
this.name = 'DecodeError'
|
|
24
|
+
this.token = token
|
|
25
|
+
this.position = position
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Error thrown when encoding fails
|
|
31
|
+
*/
|
|
32
|
+
export class EncodeError extends SelfiesError {
|
|
33
|
+
constructor(message, smiles = null) {
|
|
34
|
+
super(message)
|
|
35
|
+
this.name = 'EncodeError'
|
|
36
|
+
this.smiles = smiles
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Error thrown when resolution fails
|
|
42
|
+
*/
|
|
43
|
+
export class ResolveError extends SelfiesError {
|
|
44
|
+
constructor(message, name = null) {
|
|
45
|
+
super(message)
|
|
46
|
+
this.name = 'ResolveError'
|
|
47
|
+
this.definitionName = name
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Error thrown when validation fails
|
|
53
|
+
*/
|
|
54
|
+
export class ValidationError extends SelfiesError {
|
|
55
|
+
constructor(message, token = null, position = null) {
|
|
56
|
+
super(message)
|
|
57
|
+
this.name = 'ValidationError'
|
|
58
|
+
this.token = token
|
|
59
|
+
this.position = position
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Error thrown when DSL parsing fails
|
|
65
|
+
*/
|
|
66
|
+
export class ParseError extends SelfiesError {
|
|
67
|
+
constructor(message, line = null, column = null) {
|
|
68
|
+
super(message)
|
|
69
|
+
this.name = 'ParseError'
|
|
70
|
+
this.line = line
|
|
71
|
+
this.column = column
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// TODO: Add more specific error types as needed during implementation
|
|
76
|
+
// Examples:
|
|
77
|
+
// - CyclicDependencyError
|
|
78
|
+
// - UndefinedReferenceError
|
|
79
|
+
// - InvalidTokenError
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for error classes
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, test, expect } from 'bun:test'
|
|
6
|
+
import {
|
|
7
|
+
SelfiesError,
|
|
8
|
+
DecodeError,
|
|
9
|
+
EncodeError,
|
|
10
|
+
ResolveError,
|
|
11
|
+
ValidationError,
|
|
12
|
+
ParseError
|
|
13
|
+
} from './errors.js'
|
|
14
|
+
|
|
15
|
+
describe('SelfiesError', () => {
|
|
16
|
+
test('creates base error', () => {
|
|
17
|
+
const error = new SelfiesError('test message')
|
|
18
|
+
expect(error.message).toBe('test message')
|
|
19
|
+
expect(error.name).toBe('SelfiesError')
|
|
20
|
+
expect(error instanceof Error).toBe(true)
|
|
21
|
+
})
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
describe('DecodeError', () => {
|
|
25
|
+
test('creates decode error with message', () => {
|
|
26
|
+
const error = new DecodeError('decode failed')
|
|
27
|
+
expect(error.message).toBe('decode failed')
|
|
28
|
+
expect(error.name).toBe('DecodeError')
|
|
29
|
+
expect(error instanceof SelfiesError).toBe(true)
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
test('includes token and position', () => {
|
|
33
|
+
const error = new DecodeError('invalid token', '[Xyz]', 5)
|
|
34
|
+
expect(error.token).toBe('[Xyz]')
|
|
35
|
+
expect(error.position).toBe(5)
|
|
36
|
+
})
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
describe('EncodeError', () => {
|
|
40
|
+
test('creates encode error with message', () => {
|
|
41
|
+
const error = new EncodeError('encode failed')
|
|
42
|
+
expect(error.message).toBe('encode failed')
|
|
43
|
+
expect(error.name).toBe('EncodeError')
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
test('includes smiles string', () => {
|
|
47
|
+
const error = new EncodeError('invalid smiles', 'invalid')
|
|
48
|
+
expect(error.smiles).toBe('invalid')
|
|
49
|
+
})
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
describe('ResolveError', () => {
|
|
53
|
+
test('creates resolve error with message', () => {
|
|
54
|
+
const error = new ResolveError('resolve failed')
|
|
55
|
+
expect(error.message).toBe('resolve failed')
|
|
56
|
+
expect(error.name).toBe('ResolveError')
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
test('includes definition name', () => {
|
|
60
|
+
const error = new ResolveError('undefined reference', 'methyl')
|
|
61
|
+
expect(error.definitionName).toBe('methyl')
|
|
62
|
+
})
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
describe('ValidationError', () => {
|
|
66
|
+
test('creates validation error', () => {
|
|
67
|
+
const error = new ValidationError('validation failed')
|
|
68
|
+
expect(error.message).toBe('validation failed')
|
|
69
|
+
expect(error.name).toBe('ValidationError')
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
test('includes token and position', () => {
|
|
73
|
+
const error = new ValidationError('invalid', '[Bad]', 10)
|
|
74
|
+
expect(error.token).toBe('[Bad]')
|
|
75
|
+
expect(error.position).toBe(10)
|
|
76
|
+
})
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
describe('ParseError', () => {
|
|
80
|
+
test('creates parse error', () => {
|
|
81
|
+
const error = new ParseError('parse failed')
|
|
82
|
+
expect(error.message).toBe('parse failed')
|
|
83
|
+
expect(error.name).toBe('ParseError')
|
|
84
|
+
})
|
|
85
|
+
|
|
86
|
+
test('includes line and column', () => {
|
|
87
|
+
const error = new ParseError('syntax error', 5, 10)
|
|
88
|
+
expect(error.line).toBe(5)
|
|
89
|
+
expect(error.column).toBe(10)
|
|
90
|
+
})
|
|
91
|
+
})
|