selfies-js 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +274 -0
- package/package.json +65 -0
- package/src/alphabet.js +150 -0
- package/src/alphabet.test.js +82 -0
- package/src/chemistryValidator.js +236 -0
- package/src/cli.js +206 -0
- package/src/constraints.js +186 -0
- package/src/constraints.test.js +126 -0
- package/src/decoder.js +636 -0
- package/src/decoder.test.js +560 -0
- package/src/dsl/analyzer.js +170 -0
- package/src/dsl/analyzer.test.js +139 -0
- package/src/dsl/dsl.test.js +146 -0
- package/src/dsl/importer.js +238 -0
- package/src/dsl/index.js +32 -0
- package/src/dsl/lexer.js +264 -0
- package/src/dsl/lexer.test.js +115 -0
- package/src/dsl/parser.js +201 -0
- package/src/dsl/parser.test.js +148 -0
- package/src/dsl/resolver.js +136 -0
- package/src/dsl/resolver.test.js +99 -0
- package/src/dsl/symbolTable.js +56 -0
- package/src/dsl/symbolTable.test.js +68 -0
- package/src/dsl/valenceValidator.js +147 -0
- package/src/encoder.js +467 -0
- package/src/encoder.test.js +61 -0
- package/src/errors.js +79 -0
- package/src/errors.test.js +91 -0
- package/src/grammar_rules.js +146 -0
- package/src/index.js +70 -0
- package/src/parser.js +96 -0
- package/src/parser.test.js +96 -0
- package/src/properties/atoms.js +69 -0
- package/src/properties/atoms.test.js +116 -0
- package/src/properties/formula.js +111 -0
- package/src/properties/formula.test.js +95 -0
- package/src/properties/molecularWeight.js +80 -0
- package/src/properties/molecularWeight.test.js +84 -0
- package/src/properties/properties.test.js +77 -0
- package/src/renderers/README.md +127 -0
- package/src/renderers/svg.js +113 -0
- package/src/renderers/svg.test.js +42 -0
- package/src/syntax.js +641 -0
- package/src/syntax.test.js +363 -0
- package/src/tokenizer.js +99 -0
- package/src/tokenizer.test.js +55 -0
- package/src/validator.js +70 -0
- package/src/validator.test.js +44 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chemistry Validator - Validates molecular chemistry using RDKit
|
|
3
|
+
*
|
|
4
|
+
* Provides chemistry-aware validation beyond syntax checking.
|
|
5
|
+
* Uses RDKit to verify that decoded molecules are chemically valid.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { decode } from './decoder.js'
|
|
9
|
+
import { initRDKit } from './renderers/svg.js'
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Checks if a SELFIES string decodes to a chemically valid molecule
|
|
13
|
+
* @param {string} selfies - The SELFIES string to validate
|
|
14
|
+
* @returns {Promise<boolean>} True if molecule is chemically valid
|
|
15
|
+
*
|
|
16
|
+
* Uses RDKit's molecule validation to ensure:
|
|
17
|
+
* - Proper valence satisfaction
|
|
18
|
+
* - Valid bonding patterns
|
|
19
|
+
* - Chemically feasible structure
|
|
20
|
+
*
|
|
21
|
+
* Example:
|
|
22
|
+
* await isChemicallyValid('[C][C][O]') // => true
|
|
23
|
+
* await isChemicallyValid('[C][=C][=C][=C]') // => false (too many double bonds)
|
|
24
|
+
*/
|
|
25
|
+
export async function isChemicallyValid(selfies) {
|
|
26
|
+
try {
|
|
27
|
+
const RDKit = await initRDKit()
|
|
28
|
+
const smiles = decode(selfies)
|
|
29
|
+
|
|
30
|
+
// Empty SMILES is not valid
|
|
31
|
+
if (!smiles || smiles.length === 0) {
|
|
32
|
+
return false
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const mol = RDKit.get_mol(smiles)
|
|
36
|
+
|
|
37
|
+
if (!mol) {
|
|
38
|
+
return false
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const valid = mol.is_valid()
|
|
42
|
+
mol.delete()
|
|
43
|
+
return valid
|
|
44
|
+
} catch (error) {
|
|
45
|
+
return false
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Gets the canonical SMILES representation of a SELFIES string
|
|
51
|
+
* @param {string} selfies - The SELFIES string to convert
|
|
52
|
+
* @returns {Promise<string|null>} Canonical SMILES, or null if invalid
|
|
53
|
+
*
|
|
54
|
+
* Canonical SMILES allows proper comparison of molecular structures
|
|
55
|
+
* that may have different string representations.
|
|
56
|
+
*
|
|
57
|
+
* Example:
|
|
58
|
+
* await getCanonicalSmiles('[C][C][O]') // => 'CCO'
|
|
59
|
+
* await getCanonicalSmiles('[C][=C][C][=C][C][=C][Ring1][=Branch1]') // => 'c1ccccc1'
|
|
60
|
+
*/
|
|
61
|
+
export async function getCanonicalSmiles(selfies) {
|
|
62
|
+
try {
|
|
63
|
+
const RDKit = await initRDKit()
|
|
64
|
+
const smiles = decode(selfies)
|
|
65
|
+
|
|
66
|
+
// Empty SMILES returns null
|
|
67
|
+
if (!smiles || smiles.length === 0) {
|
|
68
|
+
return null
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const mol = RDKit.get_mol(smiles)
|
|
72
|
+
|
|
73
|
+
if (!mol || !mol.is_valid()) {
|
|
74
|
+
if (mol) mol.delete()
|
|
75
|
+
return null
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const canonical = mol.get_smiles()
|
|
79
|
+
mol.delete()
|
|
80
|
+
return canonical
|
|
81
|
+
} catch (error) {
|
|
82
|
+
return null
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Validates a roundtrip: SMILES → SELFIES → SMILES using canonical comparison
|
|
88
|
+
* @param {string} originalSmiles - The original SMILES string
|
|
89
|
+
* @param {string} selfies - The SELFIES encoding
|
|
90
|
+
* @returns {Promise<boolean>} True if roundtrip preserves molecular structure
|
|
91
|
+
*
|
|
92
|
+
* This is the gold standard for validation - ensures that encoding and
|
|
93
|
+
* decoding preserve the actual molecular structure, not just the string.
|
|
94
|
+
*
|
|
95
|
+
* Example:
|
|
96
|
+
* const selfies = encode('CCO')
|
|
97
|
+
* await validateRoundtrip('CCO', selfies) // => true
|
|
98
|
+
*/
|
|
99
|
+
export async function validateRoundtrip(originalSmiles, selfies) {
|
|
100
|
+
try {
|
|
101
|
+
const RDKit = await initRDKit()
|
|
102
|
+
|
|
103
|
+
// Get canonical form of original
|
|
104
|
+
const mol1 = RDKit.get_mol(originalSmiles)
|
|
105
|
+
if (!mol1 || !mol1.is_valid()) {
|
|
106
|
+
if (mol1) mol1.delete()
|
|
107
|
+
return false
|
|
108
|
+
}
|
|
109
|
+
const canonical1 = mol1.get_smiles()
|
|
110
|
+
mol1.delete()
|
|
111
|
+
|
|
112
|
+
// Get canonical form of decoded SELFIES
|
|
113
|
+
const decoded = decode(selfies)
|
|
114
|
+
const mol2 = RDKit.get_mol(decoded)
|
|
115
|
+
if (!mol2 || !mol2.is_valid()) {
|
|
116
|
+
if (mol2) mol2.delete()
|
|
117
|
+
return false
|
|
118
|
+
}
|
|
119
|
+
const canonical2 = mol2.get_smiles()
|
|
120
|
+
mol2.delete()
|
|
121
|
+
|
|
122
|
+
return canonical1 === canonical2
|
|
123
|
+
} catch (error) {
|
|
124
|
+
return false
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Gets detailed validation information about a SELFIES string
|
|
130
|
+
* @param {string} selfies - The SELFIES string to validate
|
|
131
|
+
* @returns {Promise<Object>} Validation result with details
|
|
132
|
+
*
|
|
133
|
+
* Returns object with:
|
|
134
|
+
* - isValid: boolean
|
|
135
|
+
* - smiles: decoded SMILES (or null)
|
|
136
|
+
* - canonical: canonical SMILES (or null)
|
|
137
|
+
* - error: error message if invalid (or null)
|
|
138
|
+
*
|
|
139
|
+
* Example:
|
|
140
|
+
* const result = await getValidationDetails('[C][C][O]')
|
|
141
|
+
* // => { isValid: true, smiles: 'CCO', canonical: 'CCO', error: null }
|
|
142
|
+
*/
|
|
143
|
+
export async function getValidationDetails(selfies) {
|
|
144
|
+
const result = {
|
|
145
|
+
isValid: false,
|
|
146
|
+
smiles: null,
|
|
147
|
+
canonical: null,
|
|
148
|
+
error: null
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
try {
|
|
152
|
+
const RDKit = await initRDKit()
|
|
153
|
+
|
|
154
|
+
// Try to decode
|
|
155
|
+
try {
|
|
156
|
+
result.smiles = decode(selfies)
|
|
157
|
+
} catch (error) {
|
|
158
|
+
result.error = `Decode error: ${error.message}`
|
|
159
|
+
return result
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Empty SMILES means the SELFIES contained only invalid/unknown tokens
|
|
163
|
+
if (!result.smiles || result.smiles.length === 0) {
|
|
164
|
+
result.error = 'Decoded to empty SMILES (invalid tokens in SELFIES)'
|
|
165
|
+
return result
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Try to create molecule
|
|
169
|
+
const mol = RDKit.get_mol(result.smiles)
|
|
170
|
+
if (!mol) {
|
|
171
|
+
result.error = 'RDKit could not parse SMILES'
|
|
172
|
+
return result
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Check validity
|
|
176
|
+
if (!mol.is_valid()) {
|
|
177
|
+
mol.delete()
|
|
178
|
+
result.error = 'Molecule is not chemically valid'
|
|
179
|
+
return result
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Get canonical form
|
|
183
|
+
result.canonical = mol.get_smiles()
|
|
184
|
+
result.isValid = true
|
|
185
|
+
mol.delete()
|
|
186
|
+
|
|
187
|
+
} catch (error) {
|
|
188
|
+
result.error = error.message
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
return result
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Batch validates multiple SELFIES strings
|
|
196
|
+
* @param {string[]} selfiesArray - Array of SELFIES strings
|
|
197
|
+
* @returns {Promise<Object>} Validation statistics
|
|
198
|
+
*
|
|
199
|
+
* Returns:
|
|
200
|
+
* - total: number of strings tested
|
|
201
|
+
* - valid: number of valid molecules
|
|
202
|
+
* - invalid: number of invalid molecules
|
|
203
|
+
* - validPercentage: percentage valid
|
|
204
|
+
* - failures: array of {selfies, error} for invalid ones
|
|
205
|
+
*
|
|
206
|
+
* Example:
|
|
207
|
+
* const results = await batchValidate(['[C][C][O]', '[C][=C]'])
|
|
208
|
+
* // => { total: 2, valid: 2, invalid: 0, validPercentage: 100, failures: [] }
|
|
209
|
+
*/
|
|
210
|
+
export async function batchValidate(selfiesArray) {
|
|
211
|
+
const results = {
|
|
212
|
+
total: selfiesArray.length,
|
|
213
|
+
valid: 0,
|
|
214
|
+
invalid: 0,
|
|
215
|
+
validPercentage: 0,
|
|
216
|
+
failures: []
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
for (const selfies of selfiesArray) {
|
|
220
|
+
const isValid = await isChemicallyValid(selfies)
|
|
221
|
+
if (isValid) {
|
|
222
|
+
results.valid++
|
|
223
|
+
} else {
|
|
224
|
+
results.invalid++
|
|
225
|
+
const details = await getValidationDetails(selfies)
|
|
226
|
+
results.failures.push({
|
|
227
|
+
selfies,
|
|
228
|
+
error: details.error
|
|
229
|
+
})
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
results.validPercentage = (results.valid / results.total) * 100
|
|
234
|
+
|
|
235
|
+
return results
|
|
236
|
+
}
|
package/src/cli.js
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* CLI - Command-line interface for executing .selfies files
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* selfies-js run <file.selfies> [options]
|
|
7
|
+
* selfies-js validate <file.selfies>
|
|
8
|
+
* selfies-js list <file.selfies>
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { loadFile } from './dsl/importer.js'
|
|
12
|
+
import { resolve, resolveAll } from './dsl/resolver.js'
|
|
13
|
+
import { decode } from './decoder.js'
|
|
14
|
+
import { readFileSync } from 'fs'
|
|
15
|
+
|
|
16
|
+
const COMMANDS = {
|
|
17
|
+
run: runCommand,
|
|
18
|
+
validate: validateCommand,
|
|
19
|
+
list: listCommand,
|
|
20
|
+
help: helpCommand
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Main CLI entry point
|
|
25
|
+
*/
|
|
26
|
+
function main() {
|
|
27
|
+
const args = process.argv.slice(2)
|
|
28
|
+
|
|
29
|
+
if (args.length === 0) {
|
|
30
|
+
helpCommand()
|
|
31
|
+
process.exit(0)
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const command = args[0]
|
|
35
|
+
const commandFn = COMMANDS[command]
|
|
36
|
+
|
|
37
|
+
if (!commandFn) {
|
|
38
|
+
console.error(`Unknown command: ${command}`)
|
|
39
|
+
console.error('Run "selfies-js help" for usage information')
|
|
40
|
+
process.exit(1)
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
try {
|
|
44
|
+
commandFn(args.slice(1))
|
|
45
|
+
} catch (error) {
|
|
46
|
+
console.error(`Error: ${error.message}`)
|
|
47
|
+
process.exit(1)
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Run command - executes a .selfies file and outputs resolved molecules
|
|
53
|
+
*/
|
|
54
|
+
function runCommand(args) {
|
|
55
|
+
if (args.length === 0) {
|
|
56
|
+
console.error('Usage: selfies-js run <file.selfies> [options]')
|
|
57
|
+
process.exit(1)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const filePath = args[0]
|
|
61
|
+
const options = parseOptions(args.slice(1))
|
|
62
|
+
|
|
63
|
+
// Load the file with imports
|
|
64
|
+
const program = loadFile(filePath)
|
|
65
|
+
|
|
66
|
+
// Check for errors
|
|
67
|
+
if (program.errors.length > 0) {
|
|
68
|
+
console.error('Compilation errors:')
|
|
69
|
+
for (const error of program.errors) {
|
|
70
|
+
console.error(` ${error.message}`)
|
|
71
|
+
}
|
|
72
|
+
process.exit(1)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Resolve all definitions
|
|
76
|
+
const resolved = resolveAll(program, { validateValence: !options.noValidate })
|
|
77
|
+
|
|
78
|
+
// Output results
|
|
79
|
+
if (options.format === 'smiles') {
|
|
80
|
+
for (const [name, selfies] of resolved) {
|
|
81
|
+
try {
|
|
82
|
+
const smiles = decode(selfies)
|
|
83
|
+
console.log(`${name}: ${smiles}`)
|
|
84
|
+
} catch (error) {
|
|
85
|
+
console.error(`${name}: Error - ${error.message}`)
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
} else {
|
|
89
|
+
for (const [name, selfies] of resolved) {
|
|
90
|
+
console.log(`${name}: ${selfies}`)
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Validate command - checks a .selfies file for errors
|
|
97
|
+
*/
|
|
98
|
+
function validateCommand(args) {
|
|
99
|
+
if (args.length === 0) {
|
|
100
|
+
console.error('Usage: selfies-js validate <file.selfies>')
|
|
101
|
+
process.exit(1)
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const filePath = args[0]
|
|
105
|
+
const program = loadFile(filePath)
|
|
106
|
+
|
|
107
|
+
// Check for parse errors
|
|
108
|
+
if (program.errors.length > 0) {
|
|
109
|
+
console.log('Validation failed with errors:')
|
|
110
|
+
for (const error of program.errors) {
|
|
111
|
+
console.log(` Line ${error.line}: ${error.message}`)
|
|
112
|
+
}
|
|
113
|
+
process.exit(1)
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Check for warnings
|
|
117
|
+
if (program.warnings && program.warnings.length > 0) {
|
|
118
|
+
console.log('Warnings:')
|
|
119
|
+
for (const warning of program.warnings) {
|
|
120
|
+
console.log(` Line ${warning.line}: ${warning.message}`)
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Try to resolve all definitions
|
|
125
|
+
try {
|
|
126
|
+
resolveAll(program)
|
|
127
|
+
console.log(`✓ File is valid (${program.definitions.size} definitions)`)
|
|
128
|
+
} catch (error) {
|
|
129
|
+
console.log(`Validation failed: ${error.message}`)
|
|
130
|
+
process.exit(1)
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* List command - lists all definitions in a .selfies file
|
|
136
|
+
*/
|
|
137
|
+
function listCommand(args) {
|
|
138
|
+
if (args.length === 0) {
|
|
139
|
+
console.error('Usage: selfies-js list <file.selfies>')
|
|
140
|
+
process.exit(1)
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const filePath = args[0]
|
|
144
|
+
const program = loadFile(filePath)
|
|
145
|
+
|
|
146
|
+
console.log(`Definitions in ${filePath}:`)
|
|
147
|
+
for (const [name, definition] of program.definitions) {
|
|
148
|
+
const tokens = definition.tokens.join('')
|
|
149
|
+
console.log(` [${name}] = ${tokens}`)
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Help command - displays usage information
|
|
155
|
+
*/
|
|
156
|
+
function helpCommand() {
|
|
157
|
+
console.log(`
|
|
158
|
+
selfies-js - CLI for SELFIES DSL
|
|
159
|
+
|
|
160
|
+
Usage:
|
|
161
|
+
selfies-js run <file.selfies> [options] Execute a .selfies file
|
|
162
|
+
selfies-js validate <file.selfies> Validate a .selfies file
|
|
163
|
+
selfies-js list <file.selfies> List definitions in a file
|
|
164
|
+
selfies-js help Show this help message
|
|
165
|
+
|
|
166
|
+
Run command options:
|
|
167
|
+
--format=smiles Output as SMILES instead of SELFIES
|
|
168
|
+
--no-validate Skip valence validation
|
|
169
|
+
|
|
170
|
+
Examples:
|
|
171
|
+
selfies-js run molecules.selfies
|
|
172
|
+
selfies-js run molecules.selfies --format=smiles
|
|
173
|
+
selfies-js validate molecules.selfies
|
|
174
|
+
selfies-js list molecules.selfies
|
|
175
|
+
`)
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Parses command-line options
|
|
180
|
+
*/
|
|
181
|
+
function parseOptions(args) {
|
|
182
|
+
const options = {
|
|
183
|
+
format: 'selfies',
|
|
184
|
+
noValidate: false
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
for (const arg of args) {
|
|
188
|
+
if (arg.startsWith('--format=')) {
|
|
189
|
+
options.format = arg.split('=')[1]
|
|
190
|
+
} else if (arg === '--no-validate') {
|
|
191
|
+
options.noValidate = true
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return options
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Run CLI if executed directly
|
|
199
|
+
// Handle both Unix and Windows paths
|
|
200
|
+
const scriptPath = process.argv[1]?.replace(/\\/g, '/')
|
|
201
|
+
const metaPath = import.meta.url.replace('file:///', '').replace('file://', '')
|
|
202
|
+
if (scriptPath && (metaPath.endsWith(scriptPath) || scriptPath.endsWith(metaPath.split('/').pop()))) {
|
|
203
|
+
main()
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
export { main }
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Constraints - Semantic constraint management for SELFIES
|
|
3
|
+
*
|
|
4
|
+
* Manages bonding constraints that define what chemical structures are valid.
|
|
5
|
+
* Based on selfies-py's bond_constraints.py system.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Preset constraint configurations
|
|
10
|
+
* Maps element symbols (with optional charge) to maximum bonding capacity
|
|
11
|
+
*/
|
|
12
|
+
const PRESET_CONSTRAINTS = {
|
|
13
|
+
// Default constraints (balanced between permissive and realistic)
|
|
14
|
+
default: {
|
|
15
|
+
'H': 1,
|
|
16
|
+
'F': 1, 'Cl': 1, 'Br': 1, 'I': 1,
|
|
17
|
+
'O': 2, 'O+1': 3, 'O-1': 1,
|
|
18
|
+
'N': 3, 'N+1': 4, 'N-1': 2,
|
|
19
|
+
'C': 4, 'C+1': 3, 'C-1': 3,
|
|
20
|
+
'B': 3, 'B+1': 2, 'B-1': 4,
|
|
21
|
+
'S': 6, 'S+1': 5, 'S-1': 5,
|
|
22
|
+
'P': 5, 'P+1': 4, 'P-1': 6,
|
|
23
|
+
'?': 8 // Default for unspecified atoms
|
|
24
|
+
},
|
|
25
|
+
|
|
26
|
+
// Octet rule (stricter, follows traditional chemistry)
|
|
27
|
+
octet_rule: {
|
|
28
|
+
'H': 1,
|
|
29
|
+
'F': 1, 'Cl': 1, 'Br': 1, 'I': 1,
|
|
30
|
+
'O': 2, 'O+1': 3, 'O-1': 1,
|
|
31
|
+
'N': 3, 'N+1': 4, 'N-1': 2,
|
|
32
|
+
'C': 4, 'C+1': 3, 'C-1': 3,
|
|
33
|
+
'B': 3, 'B+1': 2, 'B-1': 4,
|
|
34
|
+
'S': 2, 'S+1': 3, 'S-1': 1, // Stricter than default
|
|
35
|
+
'P': 3, 'P+1': 2, 'P-1': 4, // Stricter than default
|
|
36
|
+
'?': 8
|
|
37
|
+
},
|
|
38
|
+
|
|
39
|
+
// Hypervalent (more permissive for heavy elements)
|
|
40
|
+
hypervalent: {
|
|
41
|
+
'H': 1,
|
|
42
|
+
'F': 1,
|
|
43
|
+
'Cl': 7, 'Br': 7, 'I': 7, // More permissive for halogens
|
|
44
|
+
'O': 2, 'O+1': 3, 'O-1': 1,
|
|
45
|
+
'N': 5, 'N+1': 6, 'N-1': 4, // More permissive
|
|
46
|
+
'C': 4, 'C+1': 3, 'C-1': 3,
|
|
47
|
+
'B': 3, 'B+1': 2, 'B-1': 4,
|
|
48
|
+
'S': 6, 'S+1': 5, 'S-1': 5,
|
|
49
|
+
'P': 5, 'P+1': 4, 'P-1': 6,
|
|
50
|
+
'?': 8
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Current active constraints (default to 'default' preset)
|
|
56
|
+
* This is module-level state
|
|
57
|
+
*/
|
|
58
|
+
let _currentConstraints = { ...PRESET_CONSTRAINTS.default }
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Gets a preset constraint configuration by name
|
|
62
|
+
* @param {string} name - Preset name: "default", "octet_rule", or "hypervalent"
|
|
63
|
+
* @returns {Object} Constraint object mapping element → max bonds
|
|
64
|
+
* @throws {Error} If preset name is unknown
|
|
65
|
+
*
|
|
66
|
+
* Example:
|
|
67
|
+
* const constraints = getPresetConstraints('default')
|
|
68
|
+
* // { 'C': 4, 'N': 3, 'O': 2, ... }
|
|
69
|
+
*/
|
|
70
|
+
export function getPresetConstraints(name) {
|
|
71
|
+
if (!(name in PRESET_CONSTRAINTS)) {
|
|
72
|
+
throw new Error(`Unknown preset: ${name}. Valid presets: default, octet_rule, hypervalent`)
|
|
73
|
+
}
|
|
74
|
+
// Return a copy to prevent mutation
|
|
75
|
+
return { ...PRESET_CONSTRAINTS[name] }
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Gets the current semantic constraints
|
|
80
|
+
* @returns {Object} Current constraint configuration
|
|
81
|
+
*
|
|
82
|
+
* Example:
|
|
83
|
+
* const constraints = getSemanticConstraints()
|
|
84
|
+
* console.log(constraints['C']) // 4
|
|
85
|
+
*/
|
|
86
|
+
export function getSemanticConstraints() {
|
|
87
|
+
// Return a copy to prevent mutation
|
|
88
|
+
return { ..._currentConstraints }
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Sets new semantic constraints
|
|
93
|
+
* @param {Object} constraints - Constraint object mapping element → max bonds
|
|
94
|
+
* @throws {Error} If constraints are invalid
|
|
95
|
+
*
|
|
96
|
+
* Example:
|
|
97
|
+
* setSemanticConstraints({
|
|
98
|
+
* 'C': 4,
|
|
99
|
+
* 'N': 3,
|
|
100
|
+
* 'O': 2,
|
|
101
|
+
* '?': 8 // default for unknown
|
|
102
|
+
* })
|
|
103
|
+
*/
|
|
104
|
+
export function setSemanticConstraints(constraints) {
|
|
105
|
+
// Validate constraints object
|
|
106
|
+
validateConstraints(constraints)
|
|
107
|
+
|
|
108
|
+
// Update current constraints
|
|
109
|
+
_currentConstraints = { ...constraints }
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Gets bonding capacity for a specific element (optionally with charge)
|
|
114
|
+
* @param {string} element - Element symbol (e.g., 'C', 'N', 'O')
|
|
115
|
+
* @param {number} charge - Optional charge (default: 0)
|
|
116
|
+
* @returns {number} Maximum number of bonds for this element
|
|
117
|
+
*
|
|
118
|
+
* Example:
|
|
119
|
+
* getBondingCapacity('C') // 4
|
|
120
|
+
* getBondingCapacity('N', 1) // 4 (N+1)
|
|
121
|
+
* getBondingCapacity('O', -1) // 1 (O-1)
|
|
122
|
+
*/
|
|
123
|
+
export function getBondingCapacity(element, charge = 0) {
|
|
124
|
+
// Build key from element + charge
|
|
125
|
+
const key = charge === 0 ? element : `${element}${charge > 0 ? '+' : ''}${charge}`
|
|
126
|
+
|
|
127
|
+
// Look up in current constraints
|
|
128
|
+
if (key in _currentConstraints) {
|
|
129
|
+
return _currentConstraints[key]
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Fall back to '?' default if not found
|
|
133
|
+
return _currentConstraints['?']
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Validates that a constraint object is well-formed
|
|
138
|
+
* @param {Object} constraints - Constraint object to validate
|
|
139
|
+
* @returns {boolean} True if valid
|
|
140
|
+
* @throws {Error} If constraints are invalid with explanation
|
|
141
|
+
*/
|
|
142
|
+
export function validateConstraints(constraints) {
|
|
143
|
+
// Check that constraints is an object
|
|
144
|
+
if (typeof constraints !== 'object' || constraints === null) {
|
|
145
|
+
throw new Error('Constraints must be an object')
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Check that '?' default is present
|
|
149
|
+
if (!('?' in constraints)) {
|
|
150
|
+
throw new Error("Constraints must include '?' default for unknown elements")
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Check that all values are positive integers
|
|
154
|
+
for (const [element, capacity] of Object.entries(constraints)) {
|
|
155
|
+
if (!Number.isInteger(capacity)) {
|
|
156
|
+
throw new Error(`Bonding capacity for ${element} must be an integer, got ${capacity}`)
|
|
157
|
+
}
|
|
158
|
+
if (capacity < 0) {
|
|
159
|
+
throw new Error(`Bonding capacity for ${element} must be non-negative, got ${capacity}`)
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return true
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Checks if an atom with given bonds would violate constraints
|
|
168
|
+
* @param {string} element - Element symbol
|
|
169
|
+
* @param {number} charge - Atom charge
|
|
170
|
+
* @param {number} usedBonds - Number of bonds already used
|
|
171
|
+
* @param {number} newBondOrder - Order of new bond to add
|
|
172
|
+
* @returns {boolean} True if adding bond would violate constraints
|
|
173
|
+
*
|
|
174
|
+
* Used during parsing to enforce semantic validity
|
|
175
|
+
*/
|
|
176
|
+
export function wouldViolateConstraints(element, charge, usedBonds, newBondOrder) {
|
|
177
|
+
const capacity = getBondingCapacity(element, charge)
|
|
178
|
+
return usedBonds + newBondOrder > capacity
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Resets constraints to default preset
|
|
183
|
+
*/
|
|
184
|
+
export function resetConstraints() {
|
|
185
|
+
_currentConstraints = { ...PRESET_CONSTRAINTS.default }
|
|
186
|
+
}
|