selfies-js 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +274 -0
  3. package/package.json +65 -0
  4. package/src/alphabet.js +150 -0
  5. package/src/alphabet.test.js +82 -0
  6. package/src/chemistryValidator.js +236 -0
  7. package/src/cli.js +206 -0
  8. package/src/constraints.js +186 -0
  9. package/src/constraints.test.js +126 -0
  10. package/src/decoder.js +636 -0
  11. package/src/decoder.test.js +560 -0
  12. package/src/dsl/analyzer.js +170 -0
  13. package/src/dsl/analyzer.test.js +139 -0
  14. package/src/dsl/dsl.test.js +146 -0
  15. package/src/dsl/importer.js +238 -0
  16. package/src/dsl/index.js +32 -0
  17. package/src/dsl/lexer.js +264 -0
  18. package/src/dsl/lexer.test.js +115 -0
  19. package/src/dsl/parser.js +201 -0
  20. package/src/dsl/parser.test.js +148 -0
  21. package/src/dsl/resolver.js +136 -0
  22. package/src/dsl/resolver.test.js +99 -0
  23. package/src/dsl/symbolTable.js +56 -0
  24. package/src/dsl/symbolTable.test.js +68 -0
  25. package/src/dsl/valenceValidator.js +147 -0
  26. package/src/encoder.js +467 -0
  27. package/src/encoder.test.js +61 -0
  28. package/src/errors.js +79 -0
  29. package/src/errors.test.js +91 -0
  30. package/src/grammar_rules.js +146 -0
  31. package/src/index.js +70 -0
  32. package/src/parser.js +96 -0
  33. package/src/parser.test.js +96 -0
  34. package/src/properties/atoms.js +69 -0
  35. package/src/properties/atoms.test.js +116 -0
  36. package/src/properties/formula.js +111 -0
  37. package/src/properties/formula.test.js +95 -0
  38. package/src/properties/molecularWeight.js +80 -0
  39. package/src/properties/molecularWeight.test.js +84 -0
  40. package/src/properties/properties.test.js +77 -0
  41. package/src/renderers/README.md +127 -0
  42. package/src/renderers/svg.js +113 -0
  43. package/src/renderers/svg.test.js +42 -0
  44. package/src/syntax.js +641 -0
  45. package/src/syntax.test.js +363 -0
  46. package/src/tokenizer.js +99 -0
  47. package/src/tokenizer.test.js +55 -0
  48. package/src/validator.js +70 -0
  49. package/src/validator.test.js +44 -0
@@ -0,0 +1,148 @@
1
+ /**
2
+ * Tests for DSL parser
3
+ */
4
+
5
+ import { describe, test, expect } from 'bun:test'
6
+ import { parse } from './parser.js'
7
+
8
+ describe('parse', () => {
9
+ // Basic parsing
10
+ test('parses simple definition', () => {
11
+ const program = parse('[methyl] = [C]')
12
+ expect(program.definitions.has('methyl')).toBe(true)
13
+ expect(program.errors).toEqual([])
14
+
15
+ const def = program.definitions.get('methyl')
16
+ expect(def).toMatchObject({
17
+ name: 'methyl',
18
+ tokens: ['[C]'],
19
+ line: 1
20
+ })
21
+ })
22
+
23
+ test('parses multiple definitions', () => {
24
+ const source = '[methyl] = [C]\n[ethyl] = [C][C]'
25
+ const program = parse(source)
26
+ expect(program.definitions.size).toBe(2)
27
+ expect(program.definitions.has('methyl')).toBe(true)
28
+ expect(program.definitions.has('ethyl')).toBe(true)
29
+ })
30
+
31
+ test('ignores comments', () => {
32
+ const source = '# Comment\n[methyl] = [C]'
33
+ const program = parse(source)
34
+ expect(program.definitions.size).toBe(1)
35
+ expect(program.definitions.has('methyl')).toBe(true)
36
+ })
37
+
38
+ test('handles inline comments', () => {
39
+ const source = '[methyl] = [C] # This is a carbon'
40
+ const program = parse(source)
41
+ expect(program.definitions.size).toBe(1)
42
+ expect(program.errors).toEqual([])
43
+ })
44
+
45
+ test('handles empty lines', () => {
46
+ const source = '[methyl] = [C]\n\n[ethyl] = [C][C]'
47
+ const program = parse(source)
48
+ expect(program.definitions.size).toBe(2)
49
+ })
50
+
51
+ // Error detection
52
+ test('detects duplicate definitions', () => {
53
+ const source = '[methyl] = [C]\n[methyl] = [C][C]'
54
+ const program = parse(source)
55
+ expect(program.errors.length).toBeGreaterThan(0)
56
+ expect(program.errors[0].message).toContain('Duplicate')
57
+ })
58
+
59
+ test('detects syntax errors - missing equals', () => {
60
+ const source = '[methyl] [C]'
61
+ const program = parse(source)
62
+ expect(program.errors.length).toBeGreaterThan(0)
63
+ expect(program.errors[0].message).toContain('=')
64
+ })
65
+
66
+ test('detects syntax errors - missing tokens', () => {
67
+ const source = '[methyl] ='
68
+ const program = parse(source)
69
+ expect(program.errors.length).toBeGreaterThan(0)
70
+ expect(program.errors[0].message).toContain('at least one token')
71
+ })
72
+
73
+ test('detects syntax errors - unexpected token type', () => {
74
+ const source = '[methyl] = = [C]'
75
+ const program = parse(source)
76
+ expect(program.errors.length).toBeGreaterThan(0)
77
+ })
78
+
79
+ // Definition structure
80
+ test('definition includes correct range', () => {
81
+ const program = parse('[methyl] = [C]')
82
+ const def = program.definitions.get('methyl')
83
+ expect(def.range).toBeDefined()
84
+ expect(Array.isArray(def.range)).toBe(true)
85
+ expect(def.range.length).toBe(2)
86
+ })
87
+
88
+ test('definition includes line number', () => {
89
+ const source = '\n\n[methyl] = [C]'
90
+ const program = parse(source)
91
+ const def = program.definitions.get('methyl')
92
+ expect(def.line).toBe(3)
93
+ })
94
+
95
+ // Complex definitions
96
+ test('parses complex SELFIES tokens', () => {
97
+ const source = '[alcohol] = [C][=O][Branch1][C][O]'
98
+ const program = parse(source)
99
+ const def = program.definitions.get('alcohol')
100
+ expect(def.tokens).toEqual(['[C]', '[=O]', '[Branch1]', '[C]', '[O]'])
101
+ })
102
+
103
+ test('handles multiple tokens per definition', () => {
104
+ const source = '[ethanol] = [C][C][O]'
105
+ const program = parse(source)
106
+ const def = program.definitions.get('ethanol')
107
+ expect(def.tokens).toHaveLength(3)
108
+ })
109
+
110
+ // Program structure
111
+ test('returns program with definitions map', () => {
112
+ const program = parse('[methyl] = [C]')
113
+ expect(program).toHaveProperty('definitions')
114
+ expect(program.definitions).toBeInstanceOf(Map)
115
+ })
116
+
117
+ test('returns program with errors array', () => {
118
+ const program = parse('[methyl] = [C]')
119
+ expect(program).toHaveProperty('errors')
120
+ expect(Array.isArray(program.errors)).toBe(true)
121
+ })
122
+
123
+ test('returns program with warnings array', () => {
124
+ const program = parse('[methyl] = [C]')
125
+ expect(program).toHaveProperty('warnings')
126
+ expect(Array.isArray(program.warnings)).toBe(true)
127
+ })
128
+
129
+ // Diagnostic structure
130
+ test('diagnostic includes line and column', () => {
131
+ const source = '[methyl] [C]' // missing =
132
+ const program = parse(source)
133
+ const diag = program.errors[0]
134
+ expect(diag).toMatchObject({
135
+ severity: 'error',
136
+ line: expect.any(Number),
137
+ column: expect.any(Number)
138
+ })
139
+ })
140
+
141
+ test('diagnostic includes range', () => {
142
+ const source = '[methyl] [C]' // missing =
143
+ const program = parse(source)
144
+ const diag = program.errors[0]
145
+ expect(diag.range).toBeDefined()
146
+ expect(Array.isArray(diag.range)).toBe(true)
147
+ })
148
+ })
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Resolver - Expands DSL definitions to primitive SELFIES
3
+ *
4
+ * Recursively resolves references to other definitions until only
5
+ * primitive SELFIES tokens remain.
6
+ */
7
+
8
+ import { decode } from '../decoder.js'
9
+ import { validateValence } from './valenceValidator.js'
10
+
11
+ /**
12
+ * Custom error for resolution failures
13
+ */
14
+ export class ResolveError extends Error {
15
+ constructor(message, name) {
16
+ super(message)
17
+ this.name = 'ResolveError'
18
+ this.definitionName = name
19
+ }
20
+ }
21
+
22
+ /**
23
+ * Resolves a definition name to its primitive SELFIES string
24
+ * @param {Object} program - Program object from parser
25
+ * @param {string} name - Name to resolve
26
+ * @param {Object} options - Resolution options
27
+ * @param {boolean} options.decode - If true, return SMILES instead of SELFIES
28
+ * @param {boolean} options.validateValence - If true, validate chemical valence (default: true)
29
+ * @returns {string} Resolved SELFIES (or SMILES if decode option is true)
30
+ * @throws {ResolveError} If name is not defined or circular reference detected
31
+ *
32
+ * Example:
33
+ * const program = parse('[methyl] = [C]\n[ethanol] = [methyl][C][O]')
34
+ * resolve(program, 'ethanol') // => '[C][C][O]'
35
+ * resolve(program, 'ethanol', { decode: true }) // => 'CCO'
36
+ */
37
+ export function resolve(program, name, options = {}) {
38
+ // Look up the definition
39
+ if (!program.definitions.has(name)) {
40
+ throw new ResolveError(`Undefined definition: ${name}`, name)
41
+ }
42
+
43
+ // Resolve recursively with cycle detection
44
+ const visiting = new Set()
45
+ const resolved = resolveRecursive(program, name, visiting)
46
+
47
+ // Join tokens to form SELFIES string
48
+ const selfies = resolved.join('')
49
+
50
+ // Validate valence if requested (default: true)
51
+ if (options.validateValence !== false) {
52
+ const valenceErrors = validateValence(selfies, name)
53
+ if (valenceErrors.length > 0) {
54
+ throw new ResolveError(valenceErrors[0].message, name)
55
+ }
56
+ }
57
+
58
+ // Optionally decode to SMILES
59
+ if (options.decode) {
60
+ return decode(selfies)
61
+ }
62
+
63
+ return selfies
64
+ }
65
+
66
+ /**
67
+ * Resolves all definitions in a program
68
+ * @param {Object} program - Program object from parser
69
+ * @param {Object} options - Resolution options
70
+ * @returns {Map<string, string>} Map of name to resolved SELFIES
71
+ */
72
+ export function resolveAll(program, options = {}) {
73
+ const resolved = new Map()
74
+
75
+ for (const [name, definition] of program.definitions) {
76
+ try {
77
+ resolved.set(name, resolve(program, name, options))
78
+ } catch (error) {
79
+ // Skip definitions that can't be resolved (e.g., circular dependencies)
80
+ // The error will be caught when trying to resolve individually
81
+ }
82
+ }
83
+
84
+ return resolved
85
+ }
86
+
87
+ /**
88
+ * Internal recursive resolution with cycle detection
89
+ * @param {Object} program - Program object
90
+ * @param {string} name - Name to resolve
91
+ * @param {Set<string>} visiting - Set of names currently being visited (for cycle detection)
92
+ * @returns {string[]} Resolved primitive tokens
93
+ */
94
+ function resolveRecursive(program, name, visiting = new Set()) {
95
+ // Check for circular dependency
96
+ if (visiting.has(name)) {
97
+ throw new ResolveError(`Circular dependency detected involving '${name}'`, name)
98
+ }
99
+
100
+ // Mark as visiting
101
+ visiting.add(name)
102
+
103
+ // Get definition
104
+ const definition = program.definitions.get(name)
105
+ const resolvedTokens = []
106
+
107
+ // Resolve each token
108
+ for (const token of definition.tokens) {
109
+ if (isReference(token, program)) {
110
+ // It's a reference to another definition - resolve it recursively
111
+ const refName = token.slice(1, -1) // Remove brackets
112
+ const refResolved = resolveRecursive(program, refName, visiting)
113
+ resolvedTokens.push(...refResolved)
114
+ } else {
115
+ // It's a primitive token - keep it as is
116
+ resolvedTokens.push(token)
117
+ }
118
+ }
119
+
120
+ // Unmark as visiting
121
+ visiting.delete(name)
122
+
123
+ return resolvedTokens
124
+ }
125
+
126
+ /**
127
+ * Checks if a token is a reference to another definition
128
+ * @param {string} token - Token to check
129
+ * @param {Object} program - Program object
130
+ * @returns {boolean} True if token is a defined name
131
+ */
132
+ function isReference(token, program) {
133
+ // Strip brackets and check if it's a defined name
134
+ const name = token.slice(1, -1)
135
+ return program.definitions.has(name)
136
+ }
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Tests for resolver
3
+ */
4
+
5
+ import { describe, test, expect } from 'bun:test'
6
+ import { parse } from './parser.js'
7
+ import { resolve, resolveAll } from './resolver.js'
8
+
9
+ describe('resolve', () => {
10
+ test('resolves simple definition', () => {
11
+ const program = parse('[methyl] = [C]')
12
+ expect(resolve(program, 'methyl')).toBe('[C]')
13
+ })
14
+
15
+ test('resolves nested definitions', () => {
16
+ const source = '[methyl] = [C]\n[ethyl] = [methyl][C]\n[ethanol] = [ethyl][O]'
17
+ const program = parse(source)
18
+ expect(resolve(program, 'ethanol')).toBe('[C][C][O]')
19
+ })
20
+
21
+ test('resolves with decode option', () => {
22
+ const program = parse('[ethanol] = [C][C][O]')
23
+ expect(resolve(program, 'ethanol', { decode: true })).toBe('CCO')
24
+ })
25
+
26
+ test('handles multiple references in one definition', () => {
27
+ const source = '[methyl] = [C]\n[ethyl] = [methyl][methyl]'
28
+ const program = parse(source)
29
+ expect(resolve(program, 'ethyl')).toBe('[C][C]')
30
+ })
31
+
32
+ test('handles deeply nested references', () => {
33
+ const source = '[a] = [C]\n[b] = [a][a]\n[c] = [b][b]\n[d] = [c][c]'
34
+ const program = parse(source)
35
+ expect(resolve(program, 'd')).toBe('[C][C][C][C][C][C][C][C]')
36
+ })
37
+
38
+ test('throws on undefined name', () => {
39
+ const program = parse('[methyl] = [C]')
40
+ expect(() => resolve(program, 'undefined')).toThrow(/Undefined definition/)
41
+ })
42
+
43
+ test('detects circular dependencies', () => {
44
+ const source = '[a] = [b]\n[b] = [a]'
45
+ const program = parse(source)
46
+ expect(() => resolve(program, 'a')).toThrow(/Circular dependency/)
47
+ })
48
+
49
+ test('detects self-referential definitions', () => {
50
+ const source = '[a] = [a]'
51
+ const program = parse(source)
52
+ expect(() => resolve(program, 'a')).toThrow(/Circular dependency/)
53
+ })
54
+
55
+ test('detects indirect circular dependencies', () => {
56
+ const source = '[a] = [b]\n[b] = [c]\n[c] = [a]'
57
+ const program = parse(source)
58
+ expect(() => resolve(program, 'a')).toThrow(/Circular dependency/)
59
+ })
60
+
61
+ test('resolves mixed primitive and reference tokens', () => {
62
+ const source = '[methyl] = [C]\n[ethanol] = [methyl][C][O]'
63
+ const program = parse(source)
64
+ expect(resolve(program, 'ethanol')).toBe('[C][C][O]')
65
+ })
66
+ })
67
+
68
+ describe('resolveAll', () => {
69
+ test('resolves all definitions', () => {
70
+ const source = '[methyl] = [C]\n[ethyl] = [methyl][C]'
71
+ const program = parse(source)
72
+ const resolved = resolveAll(program)
73
+ expect(resolved.get('methyl')).toBe('[C]')
74
+ expect(resolved.get('ethyl')).toBe('[C][C]')
75
+ })
76
+
77
+ test('skips definitions with circular dependencies', () => {
78
+ const source = '[a] = [C]\n[b] = [c]\n[c] = [b]'
79
+ const program = parse(source)
80
+ const resolved = resolveAll(program)
81
+ expect(resolved.get('a')).toBe('[C]')
82
+ expect(resolved.has('b')).toBe(false)
83
+ expect(resolved.has('c')).toBe(false)
84
+ })
85
+
86
+ test('resolves all with decode option', () => {
87
+ const source = '[methyl] = [C]\n[ethyl] = [methyl][C]'
88
+ const program = parse(source)
89
+ const resolved = resolveAll(program, { decode: true })
90
+ expect(resolved.get('methyl')).toBe('C')
91
+ expect(resolved.get('ethyl')).toBe('CC')
92
+ })
93
+
94
+ test('returns empty map for empty program', () => {
95
+ const program = parse('')
96
+ const resolved = resolveAll(program)
97
+ expect(resolved.size).toBe(0)
98
+ })
99
+ })
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Symbol Table - Manages name-to-definition mappings for DSL
3
+ *
4
+ * Provides utilities for working with the symbol table in a Program object.
5
+ */
6
+
7
+ /**
8
+ * Creates an empty symbol table
9
+ * @returns {Map<string, Object>} Empty symbol table
10
+ */
11
+ export function createSymbolTable() {
12
+ return new Map()
13
+ }
14
+
15
+ /**
16
+ * Adds a definition to the symbol table
17
+ * @param {Map} symbolTable - Symbol table to update
18
+ * @param {string} name - Definition name
19
+ * @param {Object} definition - Definition object
20
+ * @throws {Error} If name already exists
21
+ */
22
+ export function addDefinition(symbolTable, name, definition) {
23
+ if (symbolTable.has(name)) {
24
+ throw new Error(`Definition '${name}' already exists`)
25
+ }
26
+ symbolTable.set(name, definition)
27
+ }
28
+
29
+ /**
30
+ * Looks up a definition by name
31
+ * @param {Map} symbolTable - Symbol table to search
32
+ * @param {string} name - Name to look up
33
+ * @returns {Object|undefined} Definition if found, undefined otherwise
34
+ */
35
+ export function lookup(symbolTable, name) {
36
+ return symbolTable.get(name)
37
+ }
38
+
39
+ /**
40
+ * Checks if a name is defined in the symbol table
41
+ * @param {Map} symbolTable - Symbol table to check
42
+ * @param {string} name - Name to check
43
+ * @returns {boolean} True if name exists
44
+ */
45
+ export function has(symbolTable, name) {
46
+ return symbolTable.has(name)
47
+ }
48
+
49
+ /**
50
+ * Gets all definition names in the symbol table
51
+ * @param {Map} symbolTable - Symbol table
52
+ * @returns {string[]} Array of definition names
53
+ */
54
+ export function getNames(symbolTable) {
55
+ return Array.from(symbolTable.keys())
56
+ }
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Tests for symbol table
3
+ */
4
+
5
+ import { describe, test, expect } from 'bun:test'
6
+ import { createSymbolTable, addDefinition, lookup, has, getNames } from './symbolTable.js'
7
+
8
+ describe('symbolTable', () => {
9
+ test('createSymbolTable returns empty map', () => {
10
+ const table = createSymbolTable()
11
+ expect(table).toBeInstanceOf(Map)
12
+ expect(table.size).toBe(0)
13
+ })
14
+
15
+ test('addDefinition adds entry', () => {
16
+ const table = createSymbolTable()
17
+ const def = { name: 'test', tokens: ['[C]'], line: 1, range: [0, 5] }
18
+ addDefinition(table, 'test', def)
19
+ expect(table.size).toBe(1)
20
+ expect(table.has('test')).toBe(true)
21
+ })
22
+
23
+ test('addDefinition throws on duplicate', () => {
24
+ const table = createSymbolTable()
25
+ const def = { name: 'test', tokens: ['[C]'], line: 1, range: [0, 5] }
26
+ addDefinition(table, 'test', def)
27
+ expect(() => addDefinition(table, 'test', def)).toThrow(/already exists/)
28
+ })
29
+
30
+ test('lookup returns definition', () => {
31
+ const table = createSymbolTable()
32
+ const def = { name: 'test', tokens: ['[C]'], line: 1, range: [0, 5] }
33
+ addDefinition(table, 'test', def)
34
+ expect(lookup(table, 'test')).toBe(def)
35
+ })
36
+
37
+ test('lookup returns undefined for missing', () => {
38
+ const table = createSymbolTable()
39
+ expect(lookup(table, 'missing')).toBeUndefined()
40
+ })
41
+
42
+ test('has returns true for existing', () => {
43
+ const table = createSymbolTable()
44
+ const def = { name: 'test', tokens: ['[C]'], line: 1, range: [0, 5] }
45
+ addDefinition(table, 'test', def)
46
+ expect(has(table, 'test')).toBe(true)
47
+ })
48
+
49
+ test('has returns false for missing', () => {
50
+ const table = createSymbolTable()
51
+ expect(has(table, 'missing')).toBe(false)
52
+ })
53
+
54
+ test('getNames returns all names', () => {
55
+ const table = createSymbolTable()
56
+ addDefinition(table, 'a', { name: 'a', tokens: ['[C]'], line: 1, range: [0, 5] })
57
+ addDefinition(table, 'b', { name: 'b', tokens: ['[N]'], line: 2, range: [6, 11] })
58
+ const names = getNames(table)
59
+ expect(names).toHaveLength(2)
60
+ expect(names).toContain('a')
61
+ expect(names).toContain('b')
62
+ })
63
+
64
+ test('getNames returns empty array for empty table', () => {
65
+ const table = createSymbolTable()
66
+ expect(getNames(table)).toEqual([])
67
+ })
68
+ })
@@ -0,0 +1,147 @@
1
+ /**
2
+ * Valence Validator - Validates chemical valence in DSL definitions
3
+ *
4
+ * Checks that SELFIES molecules defined in the DSL follow valence rules
5
+ * and returns compilation errors for invalid structures.
6
+ */
7
+
8
+ import { getBondingCapacity } from '../constraints.js'
9
+ import { decode } from '../decoder.js'
10
+
11
+ /**
12
+ * Validates valence for a resolved SELFIES string
13
+ * @param {string} selfies - Resolved SELFIES string
14
+ * @param {string} defName - Definition name (for error messages)
15
+ * @returns {Object[]} Array of valence error diagnostics
16
+ *
17
+ * Each diagnostic contains:
18
+ * {
19
+ * message: string,
20
+ * severity: 'error',
21
+ * definitionName: string
22
+ * }
23
+ */
24
+ export function validateValence(selfies, defName) {
25
+ const errors = []
26
+
27
+ try {
28
+ // Try to decode to SMILES - this will catch many structural issues
29
+ const smiles = decode(selfies)
30
+
31
+ // Parse the SELFIES to extract atoms and bonds
32
+ const tokens = tokenizeSelfies(selfies)
33
+ const atomBonds = calculateBonds(tokens)
34
+
35
+ // Check each atom's valence
36
+ for (const [atom, bondCount] of Object.entries(atomBonds)) {
37
+ const { element, charge } = parseAtom(atom)
38
+ const maxBonds = getBondingCapacity(element, charge)
39
+
40
+ if (bondCount > maxBonds) {
41
+ errors.push({
42
+ message: `Valence error in '${defName}': ${element} has ${bondCount} bonds but max is ${maxBonds}`,
43
+ severity: 'error',
44
+ definitionName: defName
45
+ })
46
+ }
47
+ }
48
+ } catch (error) {
49
+ // If decoding fails, it's a structural error
50
+ errors.push({
51
+ message: `Invalid structure in '${defName}': ${error.message}`,
52
+ severity: 'error',
53
+ definitionName: defName
54
+ })
55
+ }
56
+
57
+ return errors
58
+ }
59
+
60
+ /**
61
+ * Validates valence for all definitions in a program
62
+ * @param {Object} program - Program object with definitions
63
+ * @param {Map} resolvedMap - Map of resolved SELFIES strings
64
+ * @returns {Object[]} Array of all valence errors
65
+ */
66
+ export function validateProgramValence(program, resolvedMap) {
67
+ const allErrors = []
68
+
69
+ for (const [name, definition] of program.definitions) {
70
+ if (resolvedMap.has(name)) {
71
+ const selfies = resolvedMap.get(name)
72
+ const errors = validateValence(selfies, name)
73
+ allErrors.push(...errors)
74
+ }
75
+ }
76
+
77
+ return allErrors
78
+ }
79
+
80
+ /**
81
+ * Tokenizes SELFIES string into individual tokens
82
+ * @param {string} selfies - SELFIES string
83
+ * @returns {string[]} Array of tokens
84
+ */
85
+ function tokenizeSelfies(selfies) {
86
+ const tokens = selfies.match(/\[[^\]]+\]/g) || []
87
+ return tokens
88
+ }
89
+
90
+ /**
91
+ * Calculates bond counts for each atom
92
+ * @param {string[]} tokens - SELFIES tokens
93
+ * @returns {Object} Map of atom index to bond count
94
+ */
95
+ function calculateBonds(tokens) {
96
+ const atomBonds = {}
97
+ let atomIndex = 0
98
+ let currentBondOrder = 1
99
+
100
+ for (const token of tokens) {
101
+ const content = token.slice(1, -1) // Remove brackets
102
+
103
+ // Check if it's a bond modifier
104
+ if (content.startsWith('=')) {
105
+ currentBondOrder = 2
106
+ continue
107
+ } else if (content.startsWith('#')) {
108
+ currentBondOrder = 3
109
+ continue
110
+ }
111
+
112
+ // Check if it's a structural token (Branch, Ring)
113
+ if (content.includes('Branch') || content.includes('Ring')) {
114
+ continue
115
+ }
116
+
117
+ // It's an atom
118
+ const atomKey = `${atomIndex}:${content}`
119
+ atomBonds[atomKey] = (atomBonds[atomKey] || 0) + currentBondOrder
120
+
121
+ // Reset bond order for next atom
122
+ currentBondOrder = 1
123
+ atomIndex++
124
+ }
125
+
126
+ return atomBonds
127
+ }
128
+
129
+ /**
130
+ * Parses atom token to extract element and charge
131
+ * @param {string} atomKey - Atom key like "0:C" or "1:N+1"
132
+ * @returns {Object} {element, charge}
133
+ */
134
+ function parseAtom(atomKey) {
135
+ const [_, content] = atomKey.split(':')
136
+
137
+ // Check for charge
138
+ const chargeMatch = content.match(/([A-Z][a-z]?)([+-]\d+)?/)
139
+ if (chargeMatch) {
140
+ const element = chargeMatch[1]
141
+ const chargeStr = chargeMatch[2] || '+0'
142
+ const charge = parseInt(chargeStr)
143
+ return { element, charge }
144
+ }
145
+
146
+ return { element: content, charge: 0 }
147
+ }