selfies-js 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +274 -0
- package/package.json +65 -0
- package/src/alphabet.js +150 -0
- package/src/alphabet.test.js +82 -0
- package/src/chemistryValidator.js +236 -0
- package/src/cli.js +206 -0
- package/src/constraints.js +186 -0
- package/src/constraints.test.js +126 -0
- package/src/decoder.js +636 -0
- package/src/decoder.test.js +560 -0
- package/src/dsl/analyzer.js +170 -0
- package/src/dsl/analyzer.test.js +139 -0
- package/src/dsl/dsl.test.js +146 -0
- package/src/dsl/importer.js +238 -0
- package/src/dsl/index.js +32 -0
- package/src/dsl/lexer.js +264 -0
- package/src/dsl/lexer.test.js +115 -0
- package/src/dsl/parser.js +201 -0
- package/src/dsl/parser.test.js +148 -0
- package/src/dsl/resolver.js +136 -0
- package/src/dsl/resolver.test.js +99 -0
- package/src/dsl/symbolTable.js +56 -0
- package/src/dsl/symbolTable.test.js +68 -0
- package/src/dsl/valenceValidator.js +147 -0
- package/src/encoder.js +467 -0
- package/src/encoder.test.js +61 -0
- package/src/errors.js +79 -0
- package/src/errors.test.js +91 -0
- package/src/grammar_rules.js +146 -0
- package/src/index.js +70 -0
- package/src/parser.js +96 -0
- package/src/parser.test.js +96 -0
- package/src/properties/atoms.js +69 -0
- package/src/properties/atoms.test.js +116 -0
- package/src/properties/formula.js +111 -0
- package/src/properties/formula.test.js +95 -0
- package/src/properties/molecularWeight.js +80 -0
- package/src/properties/molecularWeight.test.js +84 -0
- package/src/properties/properties.test.js +77 -0
- package/src/renderers/README.md +127 -0
- package/src/renderers/svg.js +113 -0
- package/src/renderers/svg.test.js +42 -0
- package/src/syntax.js +641 -0
- package/src/syntax.test.js +363 -0
- package/src/tokenizer.js +99 -0
- package/src/tokenizer.test.js +55 -0
- package/src/validator.js +70 -0
- package/src/validator.test.js +44 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for DSL parser
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, test, expect } from 'bun:test'
|
|
6
|
+
import { parse } from './parser.js'
|
|
7
|
+
|
|
8
|
+
describe('parse', () => {
|
|
9
|
+
// Basic parsing
|
|
10
|
+
test('parses simple definition', () => {
|
|
11
|
+
const program = parse('[methyl] = [C]')
|
|
12
|
+
expect(program.definitions.has('methyl')).toBe(true)
|
|
13
|
+
expect(program.errors).toEqual([])
|
|
14
|
+
|
|
15
|
+
const def = program.definitions.get('methyl')
|
|
16
|
+
expect(def).toMatchObject({
|
|
17
|
+
name: 'methyl',
|
|
18
|
+
tokens: ['[C]'],
|
|
19
|
+
line: 1
|
|
20
|
+
})
|
|
21
|
+
})
|
|
22
|
+
|
|
23
|
+
test('parses multiple definitions', () => {
|
|
24
|
+
const source = '[methyl] = [C]\n[ethyl] = [C][C]'
|
|
25
|
+
const program = parse(source)
|
|
26
|
+
expect(program.definitions.size).toBe(2)
|
|
27
|
+
expect(program.definitions.has('methyl')).toBe(true)
|
|
28
|
+
expect(program.definitions.has('ethyl')).toBe(true)
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
test('ignores comments', () => {
|
|
32
|
+
const source = '# Comment\n[methyl] = [C]'
|
|
33
|
+
const program = parse(source)
|
|
34
|
+
expect(program.definitions.size).toBe(1)
|
|
35
|
+
expect(program.definitions.has('methyl')).toBe(true)
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
test('handles inline comments', () => {
|
|
39
|
+
const source = '[methyl] = [C] # This is a carbon'
|
|
40
|
+
const program = parse(source)
|
|
41
|
+
expect(program.definitions.size).toBe(1)
|
|
42
|
+
expect(program.errors).toEqual([])
|
|
43
|
+
})
|
|
44
|
+
|
|
45
|
+
test('handles empty lines', () => {
|
|
46
|
+
const source = '[methyl] = [C]\n\n[ethyl] = [C][C]'
|
|
47
|
+
const program = parse(source)
|
|
48
|
+
expect(program.definitions.size).toBe(2)
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
// Error detection
|
|
52
|
+
test('detects duplicate definitions', () => {
|
|
53
|
+
const source = '[methyl] = [C]\n[methyl] = [C][C]'
|
|
54
|
+
const program = parse(source)
|
|
55
|
+
expect(program.errors.length).toBeGreaterThan(0)
|
|
56
|
+
expect(program.errors[0].message).toContain('Duplicate')
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
test('detects syntax errors - missing equals', () => {
|
|
60
|
+
const source = '[methyl] [C]'
|
|
61
|
+
const program = parse(source)
|
|
62
|
+
expect(program.errors.length).toBeGreaterThan(0)
|
|
63
|
+
expect(program.errors[0].message).toContain('=')
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
test('detects syntax errors - missing tokens', () => {
|
|
67
|
+
const source = '[methyl] ='
|
|
68
|
+
const program = parse(source)
|
|
69
|
+
expect(program.errors.length).toBeGreaterThan(0)
|
|
70
|
+
expect(program.errors[0].message).toContain('at least one token')
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
test('detects syntax errors - unexpected token type', () => {
|
|
74
|
+
const source = '[methyl] = = [C]'
|
|
75
|
+
const program = parse(source)
|
|
76
|
+
expect(program.errors.length).toBeGreaterThan(0)
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
// Definition structure
|
|
80
|
+
test('definition includes correct range', () => {
|
|
81
|
+
const program = parse('[methyl] = [C]')
|
|
82
|
+
const def = program.definitions.get('methyl')
|
|
83
|
+
expect(def.range).toBeDefined()
|
|
84
|
+
expect(Array.isArray(def.range)).toBe(true)
|
|
85
|
+
expect(def.range.length).toBe(2)
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
test('definition includes line number', () => {
|
|
89
|
+
const source = '\n\n[methyl] = [C]'
|
|
90
|
+
const program = parse(source)
|
|
91
|
+
const def = program.definitions.get('methyl')
|
|
92
|
+
expect(def.line).toBe(3)
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
// Complex definitions
|
|
96
|
+
test('parses complex SELFIES tokens', () => {
|
|
97
|
+
const source = '[alcohol] = [C][=O][Branch1][C][O]'
|
|
98
|
+
const program = parse(source)
|
|
99
|
+
const def = program.definitions.get('alcohol')
|
|
100
|
+
expect(def.tokens).toEqual(['[C]', '[=O]', '[Branch1]', '[C]', '[O]'])
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
test('handles multiple tokens per definition', () => {
|
|
104
|
+
const source = '[ethanol] = [C][C][O]'
|
|
105
|
+
const program = parse(source)
|
|
106
|
+
const def = program.definitions.get('ethanol')
|
|
107
|
+
expect(def.tokens).toHaveLength(3)
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
// Program structure
|
|
111
|
+
test('returns program with definitions map', () => {
|
|
112
|
+
const program = parse('[methyl] = [C]')
|
|
113
|
+
expect(program).toHaveProperty('definitions')
|
|
114
|
+
expect(program.definitions).toBeInstanceOf(Map)
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
test('returns program with errors array', () => {
|
|
118
|
+
const program = parse('[methyl] = [C]')
|
|
119
|
+
expect(program).toHaveProperty('errors')
|
|
120
|
+
expect(Array.isArray(program.errors)).toBe(true)
|
|
121
|
+
})
|
|
122
|
+
|
|
123
|
+
test('returns program with warnings array', () => {
|
|
124
|
+
const program = parse('[methyl] = [C]')
|
|
125
|
+
expect(program).toHaveProperty('warnings')
|
|
126
|
+
expect(Array.isArray(program.warnings)).toBe(true)
|
|
127
|
+
})
|
|
128
|
+
|
|
129
|
+
// Diagnostic structure
|
|
130
|
+
test('diagnostic includes line and column', () => {
|
|
131
|
+
const source = '[methyl] [C]' // missing =
|
|
132
|
+
const program = parse(source)
|
|
133
|
+
const diag = program.errors[0]
|
|
134
|
+
expect(diag).toMatchObject({
|
|
135
|
+
severity: 'error',
|
|
136
|
+
line: expect.any(Number),
|
|
137
|
+
column: expect.any(Number)
|
|
138
|
+
})
|
|
139
|
+
})
|
|
140
|
+
|
|
141
|
+
test('diagnostic includes range', () => {
|
|
142
|
+
const source = '[methyl] [C]' // missing =
|
|
143
|
+
const program = parse(source)
|
|
144
|
+
const diag = program.errors[0]
|
|
145
|
+
expect(diag.range).toBeDefined()
|
|
146
|
+
expect(Array.isArray(diag.range)).toBe(true)
|
|
147
|
+
})
|
|
148
|
+
})
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resolver - Expands DSL definitions to primitive SELFIES
|
|
3
|
+
*
|
|
4
|
+
* Recursively resolves references to other definitions until only
|
|
5
|
+
* primitive SELFIES tokens remain.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { decode } from '../decoder.js'
|
|
9
|
+
import { validateValence } from './valenceValidator.js'
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Custom error for resolution failures
|
|
13
|
+
*/
|
|
14
|
+
export class ResolveError extends Error {
|
|
15
|
+
constructor(message, name) {
|
|
16
|
+
super(message)
|
|
17
|
+
this.name = 'ResolveError'
|
|
18
|
+
this.definitionName = name
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Resolves a definition name to its primitive SELFIES string
|
|
24
|
+
* @param {Object} program - Program object from parser
|
|
25
|
+
* @param {string} name - Name to resolve
|
|
26
|
+
* @param {Object} options - Resolution options
|
|
27
|
+
* @param {boolean} options.decode - If true, return SMILES instead of SELFIES
|
|
28
|
+
* @param {boolean} options.validateValence - If true, validate chemical valence (default: true)
|
|
29
|
+
* @returns {string} Resolved SELFIES (or SMILES if decode option is true)
|
|
30
|
+
* @throws {ResolveError} If name is not defined or circular reference detected
|
|
31
|
+
*
|
|
32
|
+
* Example:
|
|
33
|
+
* const program = parse('[methyl] = [C]\n[ethanol] = [methyl][C][O]')
|
|
34
|
+
* resolve(program, 'ethanol') // => '[C][C][O]'
|
|
35
|
+
* resolve(program, 'ethanol', { decode: true }) // => 'CCO'
|
|
36
|
+
*/
|
|
37
|
+
export function resolve(program, name, options = {}) {
|
|
38
|
+
// Look up the definition
|
|
39
|
+
if (!program.definitions.has(name)) {
|
|
40
|
+
throw new ResolveError(`Undefined definition: ${name}`, name)
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Resolve recursively with cycle detection
|
|
44
|
+
const visiting = new Set()
|
|
45
|
+
const resolved = resolveRecursive(program, name, visiting)
|
|
46
|
+
|
|
47
|
+
// Join tokens to form SELFIES string
|
|
48
|
+
const selfies = resolved.join('')
|
|
49
|
+
|
|
50
|
+
// Validate valence if requested (default: true)
|
|
51
|
+
if (options.validateValence !== false) {
|
|
52
|
+
const valenceErrors = validateValence(selfies, name)
|
|
53
|
+
if (valenceErrors.length > 0) {
|
|
54
|
+
throw new ResolveError(valenceErrors[0].message, name)
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Optionally decode to SMILES
|
|
59
|
+
if (options.decode) {
|
|
60
|
+
return decode(selfies)
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return selfies
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Resolves all definitions in a program
|
|
68
|
+
* @param {Object} program - Program object from parser
|
|
69
|
+
* @param {Object} options - Resolution options
|
|
70
|
+
* @returns {Map<string, string>} Map of name to resolved SELFIES
|
|
71
|
+
*/
|
|
72
|
+
export function resolveAll(program, options = {}) {
|
|
73
|
+
const resolved = new Map()
|
|
74
|
+
|
|
75
|
+
for (const [name, definition] of program.definitions) {
|
|
76
|
+
try {
|
|
77
|
+
resolved.set(name, resolve(program, name, options))
|
|
78
|
+
} catch (error) {
|
|
79
|
+
// Skip definitions that can't be resolved (e.g., circular dependencies)
|
|
80
|
+
// The error will be caught when trying to resolve individually
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return resolved
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Internal recursive resolution with cycle detection
|
|
89
|
+
* @param {Object} program - Program object
|
|
90
|
+
* @param {string} name - Name to resolve
|
|
91
|
+
* @param {Set<string>} visiting - Set of names currently being visited (for cycle detection)
|
|
92
|
+
* @returns {string[]} Resolved primitive tokens
|
|
93
|
+
*/
|
|
94
|
+
function resolveRecursive(program, name, visiting = new Set()) {
|
|
95
|
+
// Check for circular dependency
|
|
96
|
+
if (visiting.has(name)) {
|
|
97
|
+
throw new ResolveError(`Circular dependency detected involving '${name}'`, name)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Mark as visiting
|
|
101
|
+
visiting.add(name)
|
|
102
|
+
|
|
103
|
+
// Get definition
|
|
104
|
+
const definition = program.definitions.get(name)
|
|
105
|
+
const resolvedTokens = []
|
|
106
|
+
|
|
107
|
+
// Resolve each token
|
|
108
|
+
for (const token of definition.tokens) {
|
|
109
|
+
if (isReference(token, program)) {
|
|
110
|
+
// It's a reference to another definition - resolve it recursively
|
|
111
|
+
const refName = token.slice(1, -1) // Remove brackets
|
|
112
|
+
const refResolved = resolveRecursive(program, refName, visiting)
|
|
113
|
+
resolvedTokens.push(...refResolved)
|
|
114
|
+
} else {
|
|
115
|
+
// It's a primitive token - keep it as is
|
|
116
|
+
resolvedTokens.push(token)
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Unmark as visiting
|
|
121
|
+
visiting.delete(name)
|
|
122
|
+
|
|
123
|
+
return resolvedTokens
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Checks if a token is a reference to another definition
|
|
128
|
+
* @param {string} token - Token to check
|
|
129
|
+
* @param {Object} program - Program object
|
|
130
|
+
* @returns {boolean} True if token is a defined name
|
|
131
|
+
*/
|
|
132
|
+
function isReference(token, program) {
|
|
133
|
+
// Strip brackets and check if it's a defined name
|
|
134
|
+
const name = token.slice(1, -1)
|
|
135
|
+
return program.definitions.has(name)
|
|
136
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for resolver
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, test, expect } from 'bun:test'
|
|
6
|
+
import { parse } from './parser.js'
|
|
7
|
+
import { resolve, resolveAll } from './resolver.js'
|
|
8
|
+
|
|
9
|
+
describe('resolve', () => {
|
|
10
|
+
test('resolves simple definition', () => {
|
|
11
|
+
const program = parse('[methyl] = [C]')
|
|
12
|
+
expect(resolve(program, 'methyl')).toBe('[C]')
|
|
13
|
+
})
|
|
14
|
+
|
|
15
|
+
test('resolves nested definitions', () => {
|
|
16
|
+
const source = '[methyl] = [C]\n[ethyl] = [methyl][C]\n[ethanol] = [ethyl][O]'
|
|
17
|
+
const program = parse(source)
|
|
18
|
+
expect(resolve(program, 'ethanol')).toBe('[C][C][O]')
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
test('resolves with decode option', () => {
|
|
22
|
+
const program = parse('[ethanol] = [C][C][O]')
|
|
23
|
+
expect(resolve(program, 'ethanol', { decode: true })).toBe('CCO')
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
test('handles multiple references in one definition', () => {
|
|
27
|
+
const source = '[methyl] = [C]\n[ethyl] = [methyl][methyl]'
|
|
28
|
+
const program = parse(source)
|
|
29
|
+
expect(resolve(program, 'ethyl')).toBe('[C][C]')
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
test('handles deeply nested references', () => {
|
|
33
|
+
const source = '[a] = [C]\n[b] = [a][a]\n[c] = [b][b]\n[d] = [c][c]'
|
|
34
|
+
const program = parse(source)
|
|
35
|
+
expect(resolve(program, 'd')).toBe('[C][C][C][C][C][C][C][C]')
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
test('throws on undefined name', () => {
|
|
39
|
+
const program = parse('[methyl] = [C]')
|
|
40
|
+
expect(() => resolve(program, 'undefined')).toThrow(/Undefined definition/)
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
test('detects circular dependencies', () => {
|
|
44
|
+
const source = '[a] = [b]\n[b] = [a]'
|
|
45
|
+
const program = parse(source)
|
|
46
|
+
expect(() => resolve(program, 'a')).toThrow(/Circular dependency/)
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
test('detects self-referential definitions', () => {
|
|
50
|
+
const source = '[a] = [a]'
|
|
51
|
+
const program = parse(source)
|
|
52
|
+
expect(() => resolve(program, 'a')).toThrow(/Circular dependency/)
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
test('detects indirect circular dependencies', () => {
|
|
56
|
+
const source = '[a] = [b]\n[b] = [c]\n[c] = [a]'
|
|
57
|
+
const program = parse(source)
|
|
58
|
+
expect(() => resolve(program, 'a')).toThrow(/Circular dependency/)
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
test('resolves mixed primitive and reference tokens', () => {
|
|
62
|
+
const source = '[methyl] = [C]\n[ethanol] = [methyl][C][O]'
|
|
63
|
+
const program = parse(source)
|
|
64
|
+
expect(resolve(program, 'ethanol')).toBe('[C][C][O]')
|
|
65
|
+
})
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
describe('resolveAll', () => {
|
|
69
|
+
test('resolves all definitions', () => {
|
|
70
|
+
const source = '[methyl] = [C]\n[ethyl] = [methyl][C]'
|
|
71
|
+
const program = parse(source)
|
|
72
|
+
const resolved = resolveAll(program)
|
|
73
|
+
expect(resolved.get('methyl')).toBe('[C]')
|
|
74
|
+
expect(resolved.get('ethyl')).toBe('[C][C]')
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
test('skips definitions with circular dependencies', () => {
|
|
78
|
+
const source = '[a] = [C]\n[b] = [c]\n[c] = [b]'
|
|
79
|
+
const program = parse(source)
|
|
80
|
+
const resolved = resolveAll(program)
|
|
81
|
+
expect(resolved.get('a')).toBe('[C]')
|
|
82
|
+
expect(resolved.has('b')).toBe(false)
|
|
83
|
+
expect(resolved.has('c')).toBe(false)
|
|
84
|
+
})
|
|
85
|
+
|
|
86
|
+
test('resolves all with decode option', () => {
|
|
87
|
+
const source = '[methyl] = [C]\n[ethyl] = [methyl][C]'
|
|
88
|
+
const program = parse(source)
|
|
89
|
+
const resolved = resolveAll(program, { decode: true })
|
|
90
|
+
expect(resolved.get('methyl')).toBe('C')
|
|
91
|
+
expect(resolved.get('ethyl')).toBe('CC')
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
test('returns empty map for empty program', () => {
|
|
95
|
+
const program = parse('')
|
|
96
|
+
const resolved = resolveAll(program)
|
|
97
|
+
expect(resolved.size).toBe(0)
|
|
98
|
+
})
|
|
99
|
+
})
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Symbol Table - Manages name-to-definition mappings for DSL
|
|
3
|
+
*
|
|
4
|
+
* Provides utilities for working with the symbol table in a Program object.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Creates an empty symbol table
|
|
9
|
+
* @returns {Map<string, Object>} Empty symbol table
|
|
10
|
+
*/
|
|
11
|
+
export function createSymbolTable() {
|
|
12
|
+
return new Map()
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Adds a definition to the symbol table
|
|
17
|
+
* @param {Map} symbolTable - Symbol table to update
|
|
18
|
+
* @param {string} name - Definition name
|
|
19
|
+
* @param {Object} definition - Definition object
|
|
20
|
+
* @throws {Error} If name already exists
|
|
21
|
+
*/
|
|
22
|
+
export function addDefinition(symbolTable, name, definition) {
|
|
23
|
+
if (symbolTable.has(name)) {
|
|
24
|
+
throw new Error(`Definition '${name}' already exists`)
|
|
25
|
+
}
|
|
26
|
+
symbolTable.set(name, definition)
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Looks up a definition by name
|
|
31
|
+
* @param {Map} symbolTable - Symbol table to search
|
|
32
|
+
* @param {string} name - Name to look up
|
|
33
|
+
* @returns {Object|undefined} Definition if found, undefined otherwise
|
|
34
|
+
*/
|
|
35
|
+
export function lookup(symbolTable, name) {
|
|
36
|
+
return symbolTable.get(name)
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Checks if a name is defined in the symbol table
|
|
41
|
+
* @param {Map} symbolTable - Symbol table to check
|
|
42
|
+
* @param {string} name - Name to check
|
|
43
|
+
* @returns {boolean} True if name exists
|
|
44
|
+
*/
|
|
45
|
+
export function has(symbolTable, name) {
|
|
46
|
+
return symbolTable.has(name)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Gets all definition names in the symbol table
|
|
51
|
+
* @param {Map} symbolTable - Symbol table
|
|
52
|
+
* @returns {string[]} Array of definition names
|
|
53
|
+
*/
|
|
54
|
+
export function getNames(symbolTable) {
|
|
55
|
+
return Array.from(symbolTable.keys())
|
|
56
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for symbol table
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, test, expect } from 'bun:test'
|
|
6
|
+
import { createSymbolTable, addDefinition, lookup, has, getNames } from './symbolTable.js'
|
|
7
|
+
|
|
8
|
+
describe('symbolTable', () => {
|
|
9
|
+
test('createSymbolTable returns empty map', () => {
|
|
10
|
+
const table = createSymbolTable()
|
|
11
|
+
expect(table).toBeInstanceOf(Map)
|
|
12
|
+
expect(table.size).toBe(0)
|
|
13
|
+
})
|
|
14
|
+
|
|
15
|
+
test('addDefinition adds entry', () => {
|
|
16
|
+
const table = createSymbolTable()
|
|
17
|
+
const def = { name: 'test', tokens: ['[C]'], line: 1, range: [0, 5] }
|
|
18
|
+
addDefinition(table, 'test', def)
|
|
19
|
+
expect(table.size).toBe(1)
|
|
20
|
+
expect(table.has('test')).toBe(true)
|
|
21
|
+
})
|
|
22
|
+
|
|
23
|
+
test('addDefinition throws on duplicate', () => {
|
|
24
|
+
const table = createSymbolTable()
|
|
25
|
+
const def = { name: 'test', tokens: ['[C]'], line: 1, range: [0, 5] }
|
|
26
|
+
addDefinition(table, 'test', def)
|
|
27
|
+
expect(() => addDefinition(table, 'test', def)).toThrow(/already exists/)
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
test('lookup returns definition', () => {
|
|
31
|
+
const table = createSymbolTable()
|
|
32
|
+
const def = { name: 'test', tokens: ['[C]'], line: 1, range: [0, 5] }
|
|
33
|
+
addDefinition(table, 'test', def)
|
|
34
|
+
expect(lookup(table, 'test')).toBe(def)
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
test('lookup returns undefined for missing', () => {
|
|
38
|
+
const table = createSymbolTable()
|
|
39
|
+
expect(lookup(table, 'missing')).toBeUndefined()
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
test('has returns true for existing', () => {
|
|
43
|
+
const table = createSymbolTable()
|
|
44
|
+
const def = { name: 'test', tokens: ['[C]'], line: 1, range: [0, 5] }
|
|
45
|
+
addDefinition(table, 'test', def)
|
|
46
|
+
expect(has(table, 'test')).toBe(true)
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
test('has returns false for missing', () => {
|
|
50
|
+
const table = createSymbolTable()
|
|
51
|
+
expect(has(table, 'missing')).toBe(false)
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
test('getNames returns all names', () => {
|
|
55
|
+
const table = createSymbolTable()
|
|
56
|
+
addDefinition(table, 'a', { name: 'a', tokens: ['[C]'], line: 1, range: [0, 5] })
|
|
57
|
+
addDefinition(table, 'b', { name: 'b', tokens: ['[N]'], line: 2, range: [6, 11] })
|
|
58
|
+
const names = getNames(table)
|
|
59
|
+
expect(names).toHaveLength(2)
|
|
60
|
+
expect(names).toContain('a')
|
|
61
|
+
expect(names).toContain('b')
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
test('getNames returns empty array for empty table', () => {
|
|
65
|
+
const table = createSymbolTable()
|
|
66
|
+
expect(getNames(table)).toEqual([])
|
|
67
|
+
})
|
|
68
|
+
})
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Valence Validator - Validates chemical valence in DSL definitions
|
|
3
|
+
*
|
|
4
|
+
* Checks that SELFIES molecules defined in the DSL follow valence rules
|
|
5
|
+
* and returns compilation errors for invalid structures.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { getBondingCapacity } from '../constraints.js'
|
|
9
|
+
import { decode } from '../decoder.js'
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Validates valence for a resolved SELFIES string
|
|
13
|
+
* @param {string} selfies - Resolved SELFIES string
|
|
14
|
+
* @param {string} defName - Definition name (for error messages)
|
|
15
|
+
* @returns {Object[]} Array of valence error diagnostics
|
|
16
|
+
*
|
|
17
|
+
* Each diagnostic contains:
|
|
18
|
+
* {
|
|
19
|
+
* message: string,
|
|
20
|
+
* severity: 'error',
|
|
21
|
+
* definitionName: string
|
|
22
|
+
* }
|
|
23
|
+
*/
|
|
24
|
+
export function validateValence(selfies, defName) {
|
|
25
|
+
const errors = []
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
// Try to decode to SMILES - this will catch many structural issues
|
|
29
|
+
const smiles = decode(selfies)
|
|
30
|
+
|
|
31
|
+
// Parse the SELFIES to extract atoms and bonds
|
|
32
|
+
const tokens = tokenizeSelfies(selfies)
|
|
33
|
+
const atomBonds = calculateBonds(tokens)
|
|
34
|
+
|
|
35
|
+
// Check each atom's valence
|
|
36
|
+
for (const [atom, bondCount] of Object.entries(atomBonds)) {
|
|
37
|
+
const { element, charge } = parseAtom(atom)
|
|
38
|
+
const maxBonds = getBondingCapacity(element, charge)
|
|
39
|
+
|
|
40
|
+
if (bondCount > maxBonds) {
|
|
41
|
+
errors.push({
|
|
42
|
+
message: `Valence error in '${defName}': ${element} has ${bondCount} bonds but max is ${maxBonds}`,
|
|
43
|
+
severity: 'error',
|
|
44
|
+
definitionName: defName
|
|
45
|
+
})
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
} catch (error) {
|
|
49
|
+
// If decoding fails, it's a structural error
|
|
50
|
+
errors.push({
|
|
51
|
+
message: `Invalid structure in '${defName}': ${error.message}`,
|
|
52
|
+
severity: 'error',
|
|
53
|
+
definitionName: defName
|
|
54
|
+
})
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return errors
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Validates valence for all definitions in a program
|
|
62
|
+
* @param {Object} program - Program object with definitions
|
|
63
|
+
* @param {Map} resolvedMap - Map of resolved SELFIES strings
|
|
64
|
+
* @returns {Object[]} Array of all valence errors
|
|
65
|
+
*/
|
|
66
|
+
export function validateProgramValence(program, resolvedMap) {
|
|
67
|
+
const allErrors = []
|
|
68
|
+
|
|
69
|
+
for (const [name, definition] of program.definitions) {
|
|
70
|
+
if (resolvedMap.has(name)) {
|
|
71
|
+
const selfies = resolvedMap.get(name)
|
|
72
|
+
const errors = validateValence(selfies, name)
|
|
73
|
+
allErrors.push(...errors)
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return allErrors
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Tokenizes SELFIES string into individual tokens
|
|
82
|
+
* @param {string} selfies - SELFIES string
|
|
83
|
+
* @returns {string[]} Array of tokens
|
|
84
|
+
*/
|
|
85
|
+
function tokenizeSelfies(selfies) {
|
|
86
|
+
const tokens = selfies.match(/\[[^\]]+\]/g) || []
|
|
87
|
+
return tokens
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Calculates bond counts for each atom
|
|
92
|
+
* @param {string[]} tokens - SELFIES tokens
|
|
93
|
+
* @returns {Object} Map of atom index to bond count
|
|
94
|
+
*/
|
|
95
|
+
function calculateBonds(tokens) {
|
|
96
|
+
const atomBonds = {}
|
|
97
|
+
let atomIndex = 0
|
|
98
|
+
let currentBondOrder = 1
|
|
99
|
+
|
|
100
|
+
for (const token of tokens) {
|
|
101
|
+
const content = token.slice(1, -1) // Remove brackets
|
|
102
|
+
|
|
103
|
+
// Check if it's a bond modifier
|
|
104
|
+
if (content.startsWith('=')) {
|
|
105
|
+
currentBondOrder = 2
|
|
106
|
+
continue
|
|
107
|
+
} else if (content.startsWith('#')) {
|
|
108
|
+
currentBondOrder = 3
|
|
109
|
+
continue
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Check if it's a structural token (Branch, Ring)
|
|
113
|
+
if (content.includes('Branch') || content.includes('Ring')) {
|
|
114
|
+
continue
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// It's an atom
|
|
118
|
+
const atomKey = `${atomIndex}:${content}`
|
|
119
|
+
atomBonds[atomKey] = (atomBonds[atomKey] || 0) + currentBondOrder
|
|
120
|
+
|
|
121
|
+
// Reset bond order for next atom
|
|
122
|
+
currentBondOrder = 1
|
|
123
|
+
atomIndex++
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return atomBonds
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Parses atom token to extract element and charge
|
|
131
|
+
* @param {string} atomKey - Atom key like "0:C" or "1:N+1"
|
|
132
|
+
* @returns {Object} {element, charge}
|
|
133
|
+
*/
|
|
134
|
+
function parseAtom(atomKey) {
|
|
135
|
+
const [_, content] = atomKey.split(':')
|
|
136
|
+
|
|
137
|
+
// Check for charge
|
|
138
|
+
const chargeMatch = content.match(/([A-Z][a-z]?)([+-]\d+)?/)
|
|
139
|
+
if (chargeMatch) {
|
|
140
|
+
const element = chargeMatch[1]
|
|
141
|
+
const chargeStr = chargeMatch[2] || '+0'
|
|
142
|
+
const charge = parseInt(chargeStr)
|
|
143
|
+
return { element, charge }
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return { element: content, charge: 0 }
|
|
147
|
+
}
|