selfies-js 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +274 -0
  3. package/package.json +65 -0
  4. package/src/alphabet.js +150 -0
  5. package/src/alphabet.test.js +82 -0
  6. package/src/chemistryValidator.js +236 -0
  7. package/src/cli.js +206 -0
  8. package/src/constraints.js +186 -0
  9. package/src/constraints.test.js +126 -0
  10. package/src/decoder.js +636 -0
  11. package/src/decoder.test.js +560 -0
  12. package/src/dsl/analyzer.js +170 -0
  13. package/src/dsl/analyzer.test.js +139 -0
  14. package/src/dsl/dsl.test.js +146 -0
  15. package/src/dsl/importer.js +238 -0
  16. package/src/dsl/index.js +32 -0
  17. package/src/dsl/lexer.js +264 -0
  18. package/src/dsl/lexer.test.js +115 -0
  19. package/src/dsl/parser.js +201 -0
  20. package/src/dsl/parser.test.js +148 -0
  21. package/src/dsl/resolver.js +136 -0
  22. package/src/dsl/resolver.test.js +99 -0
  23. package/src/dsl/symbolTable.js +56 -0
  24. package/src/dsl/symbolTable.test.js +68 -0
  25. package/src/dsl/valenceValidator.js +147 -0
  26. package/src/encoder.js +467 -0
  27. package/src/encoder.test.js +61 -0
  28. package/src/errors.js +79 -0
  29. package/src/errors.test.js +91 -0
  30. package/src/grammar_rules.js +146 -0
  31. package/src/index.js +70 -0
  32. package/src/parser.js +96 -0
  33. package/src/parser.test.js +96 -0
  34. package/src/properties/atoms.js +69 -0
  35. package/src/properties/atoms.test.js +116 -0
  36. package/src/properties/formula.js +111 -0
  37. package/src/properties/formula.test.js +95 -0
  38. package/src/properties/molecularWeight.js +80 -0
  39. package/src/properties/molecularWeight.test.js +84 -0
  40. package/src/properties/properties.test.js +77 -0
  41. package/src/renderers/README.md +127 -0
  42. package/src/renderers/svg.js +113 -0
  43. package/src/renderers/svg.test.js +42 -0
  44. package/src/syntax.js +641 -0
  45. package/src/syntax.test.js +363 -0
  46. package/src/tokenizer.js +99 -0
  47. package/src/tokenizer.test.js +55 -0
  48. package/src/validator.js +70 -0
  49. package/src/validator.test.js +44 -0
@@ -0,0 +1,146 @@
1
+ /**
2
+ * Grammar rules for SELFIES derivation
3
+ * Based on selfies-py/selfies/grammar_rules.py
4
+ */
5
+
6
+ // Index alphabet for Q-value calculations
7
+ export const INDEX_ALPHABET = [
8
+ '[C]', '[Ring1]', '[Ring2]',
9
+ '[Branch1]', '[=Branch1]', '[#Branch1]',
10
+ '[Branch2]', '[=Branch2]', '[#Branch2]',
11
+ '[O]', '[N]', '[=N]', '[=C]', '[#C]', '[S]', '[P]'
12
+ ]
13
+
14
+ // Index code mapping
15
+ export const INDEX_CODE = {}
16
+ for (let i = 0; i < INDEX_ALPHABET.length; i++) {
17
+ INDEX_CODE[INDEX_ALPHABET[i]] = i
18
+ }
19
+
20
+ /**
21
+ * Process branch symbol and extract (bond_order, L)
22
+ * where L is the number of tokens to read for the Q-value
23
+ */
24
+ export function processBranchSymbol(symbol) {
25
+ const match = symbol.match(/^\[(=|#)?Branch([1-3])\]$/)
26
+ if (!match) return null
27
+
28
+ const bondChar = match[1] || ''
29
+ const L = parseInt(match[2])
30
+ const order = bondChar === '=' ? 2 : bondChar === '#' ? 3 : 1
31
+
32
+ return { order, L }
33
+ }
34
+
35
+ /**
36
+ * Process ring symbol and extract (bond_order, L, stereo)
37
+ * where L is the number of tokens to read for the Q-value
38
+ */
39
+ export function processRingSymbol(symbol) {
40
+ // Basic rings: [Ring1], [=Ring1], [#Ring1], etc.
41
+ const basicMatch = symbol.match(/^\[(=|#)?Ring([1-3])\]$/)
42
+ if (basicMatch) {
43
+ const bondChar = basicMatch[1] || ''
44
+ const L = parseInt(basicMatch[2])
45
+ const order = bondChar === '=' ? 2 : bondChar === '#' ? 3 : 1
46
+ return { order, L, stereo: null }
47
+ }
48
+
49
+ // Stereo rings: [-/Ring1], [\/Ring1], etc.
50
+ const stereoMatch = symbol.match(/^\[([-\\/])([-\\/])Ring([1-3])\]$/)
51
+ if (stereoMatch) {
52
+ const L = parseInt(stereoMatch[3])
53
+ return { order: 1, L, stereo: [stereoMatch[1], stereoMatch[2]] }
54
+ }
55
+
56
+ return null
57
+ }
58
+
59
+ /**
60
+ * Calculate next state after processing an atom
61
+ * Returns [actualBondOrder, nextState]
62
+ */
63
+ export function nextAtomState(requestedBondOrder, bondingCapacity, state) {
64
+ let actualBondOrder = requestedBondOrder
65
+
66
+ if (state === 0) {
67
+ actualBondOrder = 0
68
+ } else {
69
+ actualBondOrder = Math.min(requestedBondOrder, state, bondingCapacity)
70
+ }
71
+
72
+ const bondsLeft = bondingCapacity - actualBondOrder
73
+ const nextState = bondsLeft === 0 ? null : bondsLeft
74
+
75
+ return [actualBondOrder, nextState]
76
+ }
77
+
78
+ /**
79
+ * Calculate branch init state and next state
80
+ * Returns [branchInitState, nextState]
81
+ */
82
+ export function nextBranchState(branchType, state) {
83
+ if (state <= 1) {
84
+ throw new Error('Branch requires state > 1')
85
+ }
86
+
87
+ const branchInitState = Math.min(state - 1, branchType)
88
+ const nextState = state - branchInitState
89
+
90
+ return [branchInitState, nextState]
91
+ }
92
+
93
+ /**
94
+ * Calculate bond order and next state for ring
95
+ * Returns [bondOrder, nextState]
96
+ */
97
+ export function nextRingState(ringType, state) {
98
+ if (state === 0) {
99
+ throw new Error('Ring requires state > 0')
100
+ }
101
+
102
+ const bondOrder = Math.min(ringType, state)
103
+ const bondsLeft = state - bondOrder
104
+ const nextState = bondsLeft === 0 ? null : bondsLeft
105
+
106
+ return [bondOrder, nextState]
107
+ }
108
+
109
+ /**
110
+ * Get index value from SELFIES symbols
111
+ * @param {string[]} symbols - Array of symbol contents (without brackets)
112
+ */
113
+ export function getIndexFromSelfies(symbols) {
114
+ let index = 0
115
+ const base = INDEX_ALPHABET.length
116
+
117
+ for (let i = 0; i < symbols.length; i++) {
118
+ const symbolIndex = symbols.length - 1 - i
119
+ const code = INDEX_CODE[symbols[symbolIndex]] || 0
120
+ index += code * Math.pow(base, i)
121
+ }
122
+
123
+ return index
124
+ }
125
+
126
+ /**
127
+ * Get SELFIES symbols from index value
128
+ */
129
+ export function getSelfiesFromIndex(index) {
130
+ if (index < 0) {
131
+ throw new Error('Index must be non-negative')
132
+ }
133
+ if (index === 0) {
134
+ return [INDEX_ALPHABET[0]]
135
+ }
136
+
137
+ const symbols = []
138
+ const base = INDEX_ALPHABET.length
139
+
140
+ while (index > 0) {
141
+ symbols.push(INDEX_ALPHABET[index % base])
142
+ index = Math.floor(index / base)
143
+ }
144
+
145
+ return symbols.reverse()
146
+ }
package/src/index.js ADDED
@@ -0,0 +1,70 @@
1
+ /**
2
+ * selfies-js - Pure JavaScript SELFIES encoder/decoder
3
+ *
4
+ * Public API exports
5
+ */
6
+
7
+ // Core functionality
8
+ export { decode, decodeToAST, dumpAST } from './decoder.js'
9
+ export { encode } from './encoder.js'
10
+ export { isValid } from './validator.js'
11
+
12
+ // Tokenization
13
+ export { tokenize, join, lenSelfies } from './tokenizer.js'
14
+
15
+ // Properties
16
+ export { getMolecularWeight } from './properties/molecularWeight.js'
17
+ export { getFormula } from './properties/formula.js'
18
+
19
+ // Alphabet
20
+ export { getAlphabet, getSemanticAlphabet, getAlphabetFromSelfies } from './alphabet.js'
21
+
22
+ // Constraints
23
+ export {
24
+ getPresetConstraints,
25
+ getSemanticConstraints,
26
+ setSemanticConstraints,
27
+ getBondingCapacity,
28
+ resetConstraints
29
+ } from './constraints.js'
30
+
31
+ // DSL
32
+ export { parse } from './dsl/parser.js'
33
+ export { resolve, resolveAll } from './dsl/resolver.js'
34
+ export { getDependencies, getDependents } from './dsl/analyzer.js'
35
+
36
+ // Errors
37
+ export {
38
+ SelfiesError,
39
+ DecodeError,
40
+ EncodeError,
41
+ ResolveError,
42
+ ValidationError,
43
+ ParseError,
44
+ } from './errors.js'
45
+
46
+ // Chemistry Validation (RDKit-based)
47
+ export {
48
+ isChemicallyValid,
49
+ getCanonicalSmiles,
50
+ validateRoundtrip,
51
+ getValidationDetails,
52
+ batchValidate
53
+ } from './chemistryValidator.js'
54
+
55
+ // Renderers (RDKit-based)
56
+ export { renderSelfies, initRDKit } from './renderers/svg.js'
57
+
58
+ // Syntax Highlighting API
59
+ export {
60
+ tokenizeSelfies,
61
+ tokenizeDSL,
62
+ SyntaxTokenType,
63
+ TokenModifier,
64
+ getColorScheme,
65
+ getTextMateScopes,
66
+ getMonacoTokenTypes,
67
+ createMonacoLanguage,
68
+ validateTokenization,
69
+ highlightToHtml
70
+ } from './syntax.js'
package/src/parser.js ADDED
@@ -0,0 +1,96 @@
1
+ /**
2
+ * Parser - Converts SELFIES tokens into an internal molecule representation (IR)
3
+ *
4
+ * The IR is a graph structure with atoms and bonds that can be used for
5
+ * decoding to SMILES, validation, and property calculation.
6
+ */
7
+
8
+ /**
9
+ * Parses SELFIES tokens into a molecule IR
10
+ * @param {string[]} tokens - Array of SELFIES tokens
11
+ * @returns {Object} Molecule IR with atoms and bonds
12
+ *
13
+ * IR Structure:
14
+ * {
15
+ * atoms: [
16
+ * { element: 'C', index: 0, valence: 4, usedValence: 0 },
17
+ * { element: 'O', index: 1, valence: 2, usedValence: 0 },
18
+ * ...
19
+ * ],
20
+ * bonds: [
21
+ * { from: 0, to: 1, order: 1 },
22
+ * ...
23
+ * ]
24
+ * }
25
+ */
26
+ import { getValence } from './properties/atoms.js'
27
+
28
+ export function parse(tokens) {
29
+ const atoms = []
30
+ const bonds = []
31
+
32
+ for (let i = 0; i < tokens.length; i++) {
33
+ const tokenInfo = parseToken(tokens[i])
34
+
35
+ // Skip branch/ring tokens for now (simplified)
36
+ if (isBranchToken(tokens[i]) || isRingToken(tokens[i])) {
37
+ continue
38
+ }
39
+
40
+ if (tokenInfo.element) {
41
+ const atom = {
42
+ element: tokenInfo.element,
43
+ index: atoms.length,
44
+ valence: getValence(tokenInfo.element),
45
+ usedValence: 0
46
+ }
47
+ atoms.push(atom)
48
+
49
+ // Create bond to previous atom
50
+ if (atoms.length > 1) {
51
+ bonds.push({
52
+ from: atoms.length - 2,
53
+ to: atoms.length - 1,
54
+ order: tokenInfo.bondOrder
55
+ })
56
+ }
57
+ }
58
+ }
59
+
60
+ return { atoms, bonds }
61
+ }
62
+
63
+ /**
64
+ * Extracts element symbol and bond prefix from a token
65
+ * @param {string} token - SELFIES token like '[C]', '[=O]', '[#N]'
66
+ * @returns {{element: string, bondOrder: number}} Parsed token info
67
+ */
68
+ function parseToken(token) {
69
+ const content = token.slice(1, -1)
70
+
71
+ if (content.startsWith('=')) {
72
+ return { element: content.slice(1), bondOrder: 2 }
73
+ } else if (content.startsWith('#')) {
74
+ return { element: content.slice(1), bondOrder: 3 }
75
+ } else {
76
+ return { element: content, bondOrder: 1 }
77
+ }
78
+ }
79
+
80
+ /**
81
+ * Determines if a token is a branch token
82
+ * @param {string} token - SELFIES token
83
+ * @returns {boolean} True if token is [Branch1], [Branch2], or [Branch3]
84
+ */
85
+ function isBranchToken(token) {
86
+ return token.match(/^\[=?#?Branch[123]\]$/) !== null
87
+ }
88
+
89
+ /**
90
+ * Determines if a token is a ring token
91
+ * @param {string} token - SELFIES token
92
+ * @returns {boolean} True if token is [Ring1], [Ring2], or [Ring3]
93
+ */
94
+ function isRingToken(token) {
95
+ return token.match(/^\[=?#?Ring[123]\]$/) !== null
96
+ }
@@ -0,0 +1,96 @@
1
+ /**
2
+ * Tests for SELFIES parser (tokens → IR)
3
+ */
4
+
5
+ import { describe, test, expect } from 'bun:test'
6
+ import { parse } from './parser.js'
7
+
8
+ describe('parse', () => {
9
+ // TODO: Basic parsing
10
+ test('parses simple molecule', () => {
11
+ // TODO: const tokens = ['[C]', '[C]', '[O]']
12
+ // TODO: const ir = parse(tokens)
13
+ // TODO: expect(ir.atoms).toHaveLength(3)
14
+ // TODO: expect(ir.bonds).toHaveLength(2)
15
+ })
16
+
17
+ test('parses with bond modifiers', () => {
18
+ // TODO: const tokens = ['[C]', '[=C]']
19
+ // TODO: const ir = parse(tokens)
20
+ // TODO: expect(ir.bonds[0].order).toBe(2) // double bond
21
+ })
22
+
23
+ test('parses triple bonds', () => {
24
+ // TODO: const tokens = ['[C]', '[#N]']
25
+ // TODO: const ir = parse(tokens)
26
+ // TODO: expect(ir.bonds[0].order).toBe(3) // triple bond
27
+ })
28
+
29
+ // TODO: Branch parsing
30
+ test('parses simple branch', () => {
31
+ // TODO: const tokens = ['[C]', '[C]', '[Branch1]', '[C]', '[C]', '[C]']
32
+ // TODO: const ir = parse(tokens)
33
+ // TODO: expect(ir.atoms).toHaveLength(4)
34
+ // TODO: expect(ir.bonds).toHaveLength(3)
35
+ })
36
+
37
+ test('parses nested branches', () => {
38
+ // TODO: const tokens = ['[C]', '[C]', '[Branch1]', '[C]', '[C]', '[Branch1]', '[C]', '[C]', '[C]']
39
+ // TODO: const ir = parse(tokens)
40
+ // TODO: Verify branch structure
41
+ })
42
+
43
+ // TODO: Ring parsing
44
+ test('parses simple ring', () => {
45
+ // TODO: const tokens = ['[C]', '[C]', '[C]', '[Ring1]', '[C]']
46
+ // TODO: const ir = parse(tokens)
47
+ // TODO: Verify ring closure bond exists
48
+ })
49
+
50
+ test('parses benzene ring', () => {
51
+ // TODO: const tokens = ['[C]', '[=C]', '[C]', '[=C]', '[C]', '[=C]', '[Ring1]', '[=Branch1]']
52
+ // TODO: const ir = parse(tokens)
53
+ // TODO: expect(ir.atoms).toHaveLength(6)
54
+ // TODO: Verify alternating double bonds
55
+ })
56
+
57
+ // TODO: IR structure validation
58
+ test('returns valid IR structure', () => {
59
+ // TODO: const tokens = ['[C]', '[C]']
60
+ // TODO: const ir = parse(tokens)
61
+ // TODO: expect(ir).toHaveProperty('atoms')
62
+ // TODO: expect(ir).toHaveProperty('bonds')
63
+ // TODO: expect(Array.isArray(ir.atoms)).toBe(true)
64
+ // TODO: expect(Array.isArray(ir.bonds)).toBe(true)
65
+ })
66
+
67
+ test('atoms have required properties', () => {
68
+ // TODO: const ir = parse(['[C]'])
69
+ // TODO: const atom = ir.atoms[0]
70
+ // TODO: expect(atom).toHaveProperty('element')
71
+ // TODO: expect(atom).toHaveProperty('index')
72
+ // TODO: expect(atom).toHaveProperty('valence')
73
+ // TODO: expect(atom).toHaveProperty('usedValence')
74
+ })
75
+
76
+ test('bonds have required properties', () => {
77
+ // TODO: const ir = parse(['[C]', '[C]'])
78
+ // TODO: const bond = ir.bonds[0]
79
+ // TODO: expect(bond).toHaveProperty('from')
80
+ // TODO: expect(bond).toHaveProperty('to')
81
+ // TODO: expect(bond).toHaveProperty('order')
82
+ })
83
+
84
+ // TODO: Error cases
85
+ test('throws on invalid tokens', () => {
86
+ // TODO: expect(() => parse(['[Xyz]'])).toThrow()
87
+ })
88
+
89
+ test('throws on malformed branch', () => {
90
+ // TODO: expect(() => parse(['[Branch1]'])).toThrow() // branch at start
91
+ })
92
+
93
+ test('throws on malformed ring', () => {
94
+ // TODO: expect(() => parse(['[Ring1]'])).toThrow() // ring at start
95
+ })
96
+ })
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Atoms - Atomic data for SELFIES elements
3
+ *
4
+ * Contains atomic masses, valences, and other properties for
5
+ * supported elements.
6
+ */
7
+
8
+ /**
9
+ * Atomic data for supported elements
10
+ * Maps element symbol to properties
11
+ */
12
+ export const ATOMIC_DATA = {
13
+ 'C': { mass: 12.011, valence: 4, name: 'Carbon' },
14
+ 'N': { mass: 14.007, valence: 3, name: 'Nitrogen' },
15
+ 'O': { mass: 15.999, valence: 2, name: 'Oxygen' },
16
+ 'S': { mass: 32.06, valence: 2, name: 'Sulfur' },
17
+ 'P': { mass: 30.974, valence: 3, name: 'Phosphorus' },
18
+ 'F': { mass: 18.998, valence: 1, name: 'Fluorine' },
19
+ 'Cl': { mass: 35.45, valence: 1, name: 'Chlorine' },
20
+ 'Br': { mass: 79.904, valence: 1, name: 'Bromine' },
21
+ 'I': { mass: 126.904, valence: 1, name: 'Iodine' },
22
+ 'B': { mass: 10.81, valence: 3, name: 'Boron' },
23
+ 'H': { mass: 1.008, valence: 1, name: 'Hydrogen' }
24
+ }
25
+
26
+ /**
27
+ * Gets atomic mass for an element
28
+ * @param {string} element - Element symbol (e.g., 'C', 'N', 'O')
29
+ * @returns {number} Atomic mass in g/mol
30
+ * @throws {Error} If element is not supported
31
+ */
32
+ export function getAtomicMass(element) {
33
+ const data = ATOMIC_DATA[element]
34
+ if (!data) {
35
+ throw new Error(`Unsupported element: ${element}`)
36
+ }
37
+ return data.mass
38
+ }
39
+
40
+ /**
41
+ * Gets standard valence for an element
42
+ * @param {string} element - Element symbol
43
+ * @returns {number} Standard valence
44
+ * @throws {Error} If element is not supported
45
+ */
46
+ export function getValence(element) {
47
+ const data = ATOMIC_DATA[element]
48
+ if (!data) {
49
+ throw new Error(`Unsupported element: ${element}`)
50
+ }
51
+ return data.valence
52
+ }
53
+
54
+ /**
55
+ * Checks if an element is supported
56
+ * @param {string} element - Element symbol to check
57
+ * @returns {boolean} True if element is supported
58
+ */
59
+ export function isSupported(element) {
60
+ return element in ATOMIC_DATA
61
+ }
62
+
63
+ /**
64
+ * Gets list of all supported element symbols
65
+ * @returns {string[]} Array of element symbols
66
+ */
67
+ export function getSupportedElements() {
68
+ return Object.keys(ATOMIC_DATA)
69
+ }
@@ -0,0 +1,116 @@
1
+ /**
2
+ * Tests for atomic data
3
+ */
4
+
5
+ import { describe, test, expect } from 'bun:test'
6
+ import {
7
+ ATOMIC_DATA,
8
+ getAtomicMass,
9
+ getValence,
10
+ isSupported,
11
+ getSupportedElements
12
+ } from './atoms.js'
13
+
14
+ describe('ATOMIC_DATA', () => {
15
+ test('contains required elements', () => {
16
+ expect(ATOMIC_DATA).toHaveProperty('C')
17
+ expect(ATOMIC_DATA).toHaveProperty('N')
18
+ expect(ATOMIC_DATA).toHaveProperty('O')
19
+ expect(ATOMIC_DATA).toHaveProperty('S')
20
+ expect(ATOMIC_DATA).toHaveProperty('P')
21
+ expect(ATOMIC_DATA).toHaveProperty('F')
22
+ expect(ATOMIC_DATA).toHaveProperty('Cl')
23
+ expect(ATOMIC_DATA).toHaveProperty('Br')
24
+ expect(ATOMIC_DATA).toHaveProperty('I')
25
+ expect(ATOMIC_DATA).toHaveProperty('B')
26
+ })
27
+
28
+ test('each element has mass and valence', () => {
29
+ for (const [element, data] of Object.entries(ATOMIC_DATA)) {
30
+ expect(data).toHaveProperty('mass')
31
+ expect(data).toHaveProperty('valence')
32
+ expect(data).toHaveProperty('name')
33
+ }
34
+ })
35
+ })
36
+
37
+ describe('getAtomicMass', () => {
38
+ test('returns correct mass for carbon', () => {
39
+ expect(getAtomicMass('C')).toBeCloseTo(12.011, 2)
40
+ })
41
+
42
+ test('returns correct mass for oxygen', () => {
43
+ expect(getAtomicMass('O')).toBeCloseTo(15.999, 2)
44
+ })
45
+
46
+ test('returns correct mass for nitrogen', () => {
47
+ expect(getAtomicMass('N')).toBeCloseTo(14.007, 2)
48
+ })
49
+
50
+ test('throws on unsupported element', () => {
51
+ expect(() => getAtomicMass('Xx')).toThrow()
52
+ })
53
+ })
54
+
55
+ describe('getValence', () => {
56
+ test('returns correct valence for carbon', () => {
57
+ expect(getValence('C')).toBe(4)
58
+ })
59
+
60
+ test('returns correct valence for nitrogen', () => {
61
+ expect(getValence('N')).toBe(3)
62
+ })
63
+
64
+ test('returns correct valence for oxygen', () => {
65
+ expect(getValence('O')).toBe(2)
66
+ })
67
+
68
+ test('returns correct valence for halogens', () => {
69
+ expect(getValence('F')).toBe(1)
70
+ expect(getValence('Cl')).toBe(1)
71
+ expect(getValence('Br')).toBe(1)
72
+ expect(getValence('I')).toBe(1)
73
+ })
74
+
75
+ test('throws on unsupported element', () => {
76
+ expect(() => getValence('Xx')).toThrow()
77
+ })
78
+ })
79
+
80
+ describe('isSupported', () => {
81
+ test('returns true for supported elements', () => {
82
+ expect(isSupported('C')).toBe(true)
83
+ expect(isSupported('N')).toBe(true)
84
+ expect(isSupported('O')).toBe(true)
85
+ })
86
+
87
+ test('returns false for unsupported elements', () => {
88
+ expect(isSupported('Xx')).toBe(false)
89
+ expect(isSupported('He')).toBe(false)
90
+ })
91
+ })
92
+
93
+ describe('getSupportedElements', () => {
94
+ test('returns array of elements', () => {
95
+ const elements = getSupportedElements()
96
+ expect(Array.isArray(elements)).toBe(true)
97
+ expect(elements.length).toBeGreaterThan(0)
98
+ })
99
+
100
+ test('includes all required elements', () => {
101
+ const elements = getSupportedElements()
102
+ expect(elements).toContain('C')
103
+ expect(elements).toContain('N')
104
+ expect(elements).toContain('O')
105
+ expect(elements).toContain('S')
106
+ expect(elements).toContain('P')
107
+ })
108
+
109
+ test('includes halogens', () => {
110
+ const elements = getSupportedElements()
111
+ expect(elements).toContain('F')
112
+ expect(elements).toContain('Cl')
113
+ expect(elements).toContain('Br')
114
+ expect(elements).toContain('I')
115
+ })
116
+ })
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Formula - Generates molecular formula from SELFIES
3
+ *
4
+ * Computes the molecular formula in Hill notation:
5
+ * C first, then H, then other elements alphabetically.
6
+ */
7
+
8
+ import { tokenize } from '../tokenizer.js'
9
+ import { parse } from '../parser.js'
10
+
11
+ /**
12
+ * Generates molecular formula from SELFIES string
13
+ * @param {string} selfies - SELFIES string
14
+ * @returns {string} Molecular formula in Hill notation
15
+ *
16
+ * Hill notation:
17
+ * - Carbon first (if present)
18
+ * - Hydrogen second (if present)
19
+ * - Other elements alphabetically
20
+ * - Omit count if 1
21
+ *
22
+ * Examples:
23
+ * getFormula('[C][C][O]') // => 'C2H6O'
24
+ * getFormula('[C]') // => 'CH4'
25
+ * getFormula('[N][C][C][=O]') // => 'C2H5NO'
26
+ */
27
+ export function getFormula(selfies) {
28
+ const tokens = tokenize(selfies)
29
+ const ir = parse(tokens)
30
+ const counts = countAtoms(ir)
31
+ return formatHill(counts)
32
+ }
33
+
34
+ /**
35
+ * Counts atoms in molecule IR (including implicit hydrogens)
36
+ * @param {Object} ir - Molecule internal representation
37
+ * @returns {Object} Map of element to count
38
+ */
39
+ function countAtoms(ir) {
40
+ const counts = {}
41
+
42
+ // Count explicit atoms
43
+ for (const atom of ir.atoms) {
44
+ counts[atom.element] = (counts[atom.element] || 0) + 1
45
+ }
46
+
47
+ // Calculate used valence from bonds
48
+ const usedValence = new Array(ir.atoms.length).fill(0)
49
+ for (const bond of ir.bonds) {
50
+ usedValence[bond.from] += bond.order
51
+ usedValence[bond.to] += bond.order
52
+ }
53
+
54
+ // Add implicit hydrogens
55
+ let totalH = 0
56
+ for (let i = 0; i < ir.atoms.length; i++) {
57
+ const atom = ir.atoms[i]
58
+ const implicitH = Math.max(0, atom.valence - usedValence[i])
59
+ totalH += implicitH
60
+ }
61
+
62
+ if (totalH > 0) {
63
+ counts['H'] = totalH
64
+ }
65
+
66
+ return counts
67
+ }
68
+
69
+ /**
70
+ * Formats atom counts as Hill notation formula
71
+ * @param {Object} counts - Map of element to count
72
+ * @returns {string} Formatted formula
73
+ *
74
+ * Example:
75
+ * formatHill({ C: 2, H: 6, O: 1 }) // => 'C2H6O'
76
+ * formatHill({ H: 2, O: 1 }) // => 'H2O'
77
+ */
78
+ function formatHill(counts) {
79
+ let formula = ''
80
+
81
+ // C first
82
+ if (counts['C']) {
83
+ formula += formatElement('C', counts['C'])
84
+ }
85
+
86
+ // H second
87
+ if (counts['H']) {
88
+ formula += formatElement('H', counts['H'])
89
+ }
90
+
91
+ // Other elements alphabetically
92
+ const others = Object.keys(counts)
93
+ .filter(el => el !== 'C' && el !== 'H')
94
+ .sort()
95
+
96
+ for (const element of others) {
97
+ formula += formatElement(element, counts[element])
98
+ }
99
+
100
+ return formula
101
+ }
102
+
103
+ /**
104
+ * Formats a single element count
105
+ * @param {string} element - Element symbol
106
+ * @param {number} count - Number of atoms
107
+ * @returns {string} Formatted string (e.g., 'C2', 'O', 'H3')
108
+ */
109
+ function formatElement(element, count) {
110
+ return count === 1 ? element : element + count
111
+ }