selfies-js 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +274 -0
  3. package/package.json +65 -0
  4. package/src/alphabet.js +150 -0
  5. package/src/alphabet.test.js +82 -0
  6. package/src/chemistryValidator.js +236 -0
  7. package/src/cli.js +206 -0
  8. package/src/constraints.js +186 -0
  9. package/src/constraints.test.js +126 -0
  10. package/src/decoder.js +636 -0
  11. package/src/decoder.test.js +560 -0
  12. package/src/dsl/analyzer.js +170 -0
  13. package/src/dsl/analyzer.test.js +139 -0
  14. package/src/dsl/dsl.test.js +146 -0
  15. package/src/dsl/importer.js +238 -0
  16. package/src/dsl/index.js +32 -0
  17. package/src/dsl/lexer.js +264 -0
  18. package/src/dsl/lexer.test.js +115 -0
  19. package/src/dsl/parser.js +201 -0
  20. package/src/dsl/parser.test.js +148 -0
  21. package/src/dsl/resolver.js +136 -0
  22. package/src/dsl/resolver.test.js +99 -0
  23. package/src/dsl/symbolTable.js +56 -0
  24. package/src/dsl/symbolTable.test.js +68 -0
  25. package/src/dsl/valenceValidator.js +147 -0
  26. package/src/encoder.js +467 -0
  27. package/src/encoder.test.js +61 -0
  28. package/src/errors.js +79 -0
  29. package/src/errors.test.js +91 -0
  30. package/src/grammar_rules.js +146 -0
  31. package/src/index.js +70 -0
  32. package/src/parser.js +96 -0
  33. package/src/parser.test.js +96 -0
  34. package/src/properties/atoms.js +69 -0
  35. package/src/properties/atoms.test.js +116 -0
  36. package/src/properties/formula.js +111 -0
  37. package/src/properties/formula.test.js +95 -0
  38. package/src/properties/molecularWeight.js +80 -0
  39. package/src/properties/molecularWeight.test.js +84 -0
  40. package/src/properties/properties.test.js +77 -0
  41. package/src/renderers/README.md +127 -0
  42. package/src/renderers/svg.js +113 -0
  43. package/src/renderers/svg.test.js +42 -0
  44. package/src/syntax.js +641 -0
  45. package/src/syntax.test.js +363 -0
  46. package/src/tokenizer.js +99 -0
  47. package/src/tokenizer.test.js +55 -0
  48. package/src/validator.js +70 -0
  49. package/src/validator.test.js +44 -0
@@ -0,0 +1,32 @@
1
+ /**
2
+ * DSL Module - Public API exports
3
+ *
4
+ * This module provides the complete DSL functionality for defining
5
+ * named SELFIES molecules.
6
+ */
7
+
8
+ export { lex, TokenType } from './lexer.js'
9
+ export { parse } from './parser.js'
10
+ export { resolve, resolveAll, ResolveError } from './resolver.js'
11
+ export {
12
+ getDependencies,
13
+ getDependents,
14
+ detectCycles,
15
+ findUnused
16
+ } from './analyzer.js'
17
+ export {
18
+ createSymbolTable,
19
+ addDefinition,
20
+ lookup,
21
+ has,
22
+ getNames
23
+ } from './symbolTable.js'
24
+ export {
25
+ validateValence,
26
+ validateProgramValence
27
+ } from './valenceValidator.js'
28
+ export {
29
+ parseImports,
30
+ loadWithImports,
31
+ loadFile
32
+ } from './importer.js'
@@ -0,0 +1,264 @@
1
+ /**
2
+ * DSL Lexer - Tokenizes .selfies DSL source code
3
+ *
4
+ * The DSL allows defining named SELFIES molecules:
5
+ * [methyl] = [C]
6
+ * [ethanol] = [methyl][C][O]
7
+ */
8
+
9
+ /**
10
+ * Token types for DSL
11
+ */
12
+ export const TokenType = {
13
+ NAME: 'NAME', // [identifier]
14
+ EQUALS: 'EQUALS', // =
15
+ SELFIES_TOKEN: 'SELFIES_TOKEN', // [C], [=O], etc.
16
+ COMMENT: 'COMMENT', // # comment
17
+ NEWLINE: 'NEWLINE', // \n
18
+ EOF: 'EOF', // end of file
19
+
20
+ // Import-related tokens
21
+ IMPORT: 'IMPORT', // import keyword
22
+ FROM: 'FROM', // from keyword
23
+ STRING: 'STRING', // "path/to/file.selfies"
24
+ STAR: 'STAR', // * (wildcard import)
25
+ COMMA: 'COMMA', // , (separator in selective imports)
26
+ LBRACKET: 'LBRACKET', // [ (for selective import list)
27
+ RBRACKET: 'RBRACKET', // ] (for selective import list)
28
+ }
29
+
30
+ /**
31
+ * Lexes DSL source code into tokens
32
+ * @param {string} source - DSL source code
33
+ * @returns {Object[]} Array of tokens with type, value, line, column
34
+ *
35
+ * Token structure:
36
+ * {
37
+ * type: TokenType,
38
+ * value: string,
39
+ * line: number,
40
+ * column: number,
41
+ * range: [number, number] // character offsets
42
+ * }
43
+ */
44
+ export function lex(source) {
45
+ const tokens = []
46
+ let line = 1
47
+ let column = 1
48
+ let i = 0
49
+
50
+ while (i < source.length) {
51
+ const char = source[i]
52
+ const startColumn = column
53
+ const startOffset = i
54
+
55
+ // Skip whitespace (except newlines)
56
+ if (char === ' ' || char === '\t' || char === '\r') {
57
+ i++
58
+ column++
59
+ continue
60
+ }
61
+
62
+ // Newline
63
+ if (char === '\n') {
64
+ tokens.push({
65
+ type: TokenType.NEWLINE,
66
+ value: '\n',
67
+ line,
68
+ column,
69
+ range: [i, i + 1]
70
+ })
71
+ i++
72
+ line++
73
+ column = 1
74
+ continue
75
+ }
76
+
77
+ // Comment
78
+ if (char === '#') {
79
+ const commentStart = i
80
+ let commentValue = ''
81
+ while (i < source.length && source[i] !== '\n') {
82
+ commentValue += source[i]
83
+ i++
84
+ }
85
+ tokens.push({
86
+ type: TokenType.COMMENT,
87
+ value: commentValue,
88
+ line,
89
+ column: startColumn,
90
+ range: [commentStart, i]
91
+ })
92
+ column += commentValue.length
93
+ continue
94
+ }
95
+
96
+ // Equals
97
+ if (char === '=') {
98
+ tokens.push({
99
+ type: TokenType.EQUALS,
100
+ value: '=',
101
+ line,
102
+ column,
103
+ range: [i, i + 1]
104
+ })
105
+ i++
106
+ column++
107
+ continue
108
+ }
109
+
110
+ // Star (for wildcard imports)
111
+ if (char === '*') {
112
+ tokens.push({
113
+ type: TokenType.STAR,
114
+ value: '*',
115
+ line,
116
+ column,
117
+ range: [i, i + 1]
118
+ })
119
+ i++
120
+ column++
121
+ continue
122
+ }
123
+
124
+ // Comma (for selective imports)
125
+ if (char === ',') {
126
+ tokens.push({
127
+ type: TokenType.COMMA,
128
+ value: ',',
129
+ line,
130
+ column,
131
+ range: [i, i + 1]
132
+ })
133
+ i++
134
+ column++
135
+ continue
136
+ }
137
+
138
+ // String literal (for import paths)
139
+ if (char === '"') {
140
+ const stringStart = i
141
+ let stringValue = '"'
142
+ i++
143
+ column++
144
+
145
+ while (i < source.length && source[i] !== '"' && source[i] !== '\n') {
146
+ stringValue += source[i]
147
+ i++
148
+ column++
149
+ }
150
+
151
+ if (i >= source.length || source[i] === '\n') {
152
+ throw new Error(`Unclosed string at line ${line}, column ${startColumn}`)
153
+ }
154
+
155
+ stringValue += '"'
156
+ i++
157
+ column++
158
+
159
+ tokens.push({
160
+ type: TokenType.STRING,
161
+ value: stringValue,
162
+ line,
163
+ column: startColumn,
164
+ range: [stringStart, i]
165
+ })
166
+ continue
167
+ }
168
+
169
+ // Keywords and identifiers (import, from)
170
+ if (isAlpha(char)) {
171
+ const wordStart = i
172
+ let wordValue = ''
173
+
174
+ while (i < source.length && isAlphaNumeric(source[i])) {
175
+ wordValue += source[i]
176
+ i++
177
+ column++
178
+ }
179
+
180
+ let type = TokenType.NAME
181
+ if (wordValue === 'import') {
182
+ type = TokenType.IMPORT
183
+ } else if (wordValue === 'from') {
184
+ type = TokenType.FROM
185
+ }
186
+
187
+ tokens.push({
188
+ type,
189
+ value: wordValue,
190
+ line,
191
+ column: startColumn,
192
+ range: [wordStart, i]
193
+ })
194
+ continue
195
+ }
196
+
197
+ // Bracketed token (could be NAME or SELFIES_TOKEN)
198
+ if (char === '[') {
199
+ const tokenStart = i
200
+ let tokenValue = '['
201
+ i++
202
+ column++
203
+
204
+ // Read until closing bracket
205
+ while (i < source.length && source[i] !== ']') {
206
+ tokenValue += source[i]
207
+ i++
208
+ column++
209
+ }
210
+
211
+ if (i >= source.length) {
212
+ throw new Error(`Unclosed bracket at line ${line}, column ${startColumn}`)
213
+ }
214
+
215
+ tokenValue += ']'
216
+ i++
217
+ column++
218
+
219
+ // Determine if this is a NAME or SELFIES_TOKEN
220
+ // We'll initially mark all as SELFIES_TOKEN
221
+ // The parser will determine context
222
+ tokens.push({
223
+ type: TokenType.SELFIES_TOKEN,
224
+ value: tokenValue,
225
+ line,
226
+ column: startColumn,
227
+ range: [tokenStart, i]
228
+ })
229
+ continue
230
+ }
231
+
232
+ // Unknown character
233
+ throw new Error(`Unexpected character '${char}' at line ${line}, column ${column}`)
234
+ }
235
+
236
+ // Add EOF token
237
+ tokens.push({
238
+ type: TokenType.EOF,
239
+ value: '',
240
+ line,
241
+ column,
242
+ range: [i, i]
243
+ })
244
+
245
+ return tokens
246
+ }
247
+
248
+ /**
249
+ * Checks if character is alphabetic
250
+ * @param {string} char - Single character
251
+ * @returns {boolean}
252
+ */
253
+ function isAlpha(char) {
254
+ return (char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z')
255
+ }
256
+
257
+ /**
258
+ * Checks if character is alphanumeric
259
+ * @param {string} char - Single character
260
+ * @returns {boolean}
261
+ */
262
+ function isAlphaNumeric(char) {
263
+ return isAlpha(char) || (char >= '0' && char <= '9') || char === '_'
264
+ }
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Tests for DSL lexer
3
+ */
4
+
5
+ import { describe, test, expect } from 'bun:test'
6
+ import { lex, TokenType } from './lexer.js'
7
+
8
+ describe('lex', () => {
9
+ // Basic lexing
10
+ test('lexes simple definition', () => {
11
+ const tokens = lex('[methyl] = [C]')
12
+ expect(tokens).toHaveLength(4) // SELFIES_TOKEN EQUALS SELFIES_TOKEN EOF
13
+ expect(tokens[0].type).toBe(TokenType.SELFIES_TOKEN)
14
+ expect(tokens[0].value).toBe('[methyl]')
15
+ expect(tokens[1].type).toBe(TokenType.EQUALS)
16
+ expect(tokens[2].type).toBe(TokenType.SELFIES_TOKEN)
17
+ expect(tokens[2].value).toBe('[C]')
18
+ expect(tokens[3].type).toBe(TokenType.EOF)
19
+ })
20
+
21
+ test('lexes multiple tokens', () => {
22
+ const tokens = lex('[ethanol] = [C][C][O]')
23
+ const selfiesTokens = tokens.filter(t => t.type === TokenType.SELFIES_TOKEN)
24
+ expect(selfiesTokens).toHaveLength(4) // [ethanol], [C], [C], [O]
25
+ expect(selfiesTokens[1].value).toBe('[C]')
26
+ expect(selfiesTokens[2].value).toBe('[C]')
27
+ expect(selfiesTokens[3].value).toBe('[O]')
28
+ })
29
+
30
+ test('handles comments', () => {
31
+ const tokens = lex('# This is a comment\n[methyl] = [C]')
32
+ const comment = tokens.find(t => t.type === TokenType.COMMENT)
33
+ expect(comment).toBeDefined()
34
+ expect(comment.value).toBe('# This is a comment')
35
+ })
36
+
37
+ test('handles newlines', () => {
38
+ const tokens = lex('[a] = [C]\n[b] = [N]')
39
+ const newlines = tokens.filter(t => t.type === TokenType.NEWLINE)
40
+ expect(newlines.length).toBe(1) // Only the explicit \n, not at EOF
41
+ })
42
+
43
+ // Token properties
44
+ test('includes line and column info', () => {
45
+ const tokens = lex('[methyl] = [C]')
46
+ expect(tokens[0]).toHaveProperty('line')
47
+ expect(tokens[0]).toHaveProperty('column')
48
+ expect(tokens[0].line).toBe(1)
49
+ expect(tokens[0].column).toBe(1)
50
+ })
51
+
52
+ test('includes character range', () => {
53
+ const tokens = lex('[methyl] = [C]')
54
+ expect(tokens[0]).toHaveProperty('range')
55
+ expect(Array.isArray(tokens[0].range)).toBe(true)
56
+ expect(tokens[0].range).toEqual([0, 8]) // '[methyl]'
57
+ })
58
+
59
+ // Edge cases
60
+ test('handles empty string', () => {
61
+ const tokens = lex('')
62
+ expect(tokens).toHaveLength(1) // just EOF
63
+ expect(tokens[0].type).toBe(TokenType.EOF)
64
+ })
65
+
66
+ test('handles whitespace', () => {
67
+ const tokens = lex('[a] = [C]') // extra spaces
68
+ const nonEof = tokens.filter(t => t.type !== TokenType.EOF)
69
+ expect(nonEof).toHaveLength(3) // [a], =, [C]
70
+ expect(nonEof[0].value).toBe('[a]')
71
+ expect(nonEof[1].value).toBe('=')
72
+ expect(nonEof[2].value).toBe('[C]')
73
+ })
74
+
75
+ test('tracks line numbers correctly', () => {
76
+ const tokens = lex('[a] = [C]\n[b] = [N]\n[c] = [O]')
77
+ const selfiesTokens = tokens.filter(t => t.type === TokenType.SELFIES_TOKEN)
78
+ expect(selfiesTokens[0].line).toBe(1) // [a]
79
+ expect(selfiesTokens[2].line).toBe(2) // [b]
80
+ expect(selfiesTokens[4].line).toBe(3) // [c]
81
+ })
82
+
83
+ test('throws on unclosed bracket', () => {
84
+ expect(() => lex('[methyl')).toThrow(/Unclosed bracket/)
85
+ })
86
+
87
+ test('throws on unexpected character', () => {
88
+ expect(() => lex('[a] = [C] @')).toThrow(/Unexpected character/)
89
+ })
90
+
91
+ test('handles bond modifiers in tokens', () => {
92
+ const tokens = lex('[alcohol] = [C][=O]')
93
+ const selfiesTokens = tokens.filter(t => t.type === TokenType.SELFIES_TOKEN)
94
+ expect(selfiesTokens[1].value).toBe('[C]')
95
+ expect(selfiesTokens[2].value).toBe('[=O]')
96
+ })
97
+
98
+ test('handles inline comments', () => {
99
+ const tokens = lex('[methyl] = [C] # carbon atom')
100
+ const comment = tokens.find(t => t.type === TokenType.COMMENT)
101
+ expect(comment).toBeDefined()
102
+ expect(comment.value).toContain('carbon atom')
103
+ })
104
+ })
105
+
106
+ describe('TokenType', () => {
107
+ test('exports all token types', () => {
108
+ expect(TokenType.NAME).toBeDefined()
109
+ expect(TokenType.EQUALS).toBeDefined()
110
+ expect(TokenType.SELFIES_TOKEN).toBeDefined()
111
+ expect(TokenType.COMMENT).toBeDefined()
112
+ expect(TokenType.NEWLINE).toBeDefined()
113
+ expect(TokenType.EOF).toBeDefined()
114
+ })
115
+ })
@@ -0,0 +1,201 @@
1
+ /**
2
+ * DSL Parser - Parses DSL tokens into AST and symbol table
3
+ *
4
+ * Converts lexer tokens into a structured Program object with
5
+ * definitions, errors, and warnings.
6
+ */
7
+
8
+ import { lex, TokenType } from './lexer.js'
9
+
10
+ /**
11
+ * Parses DSL source code into a Program object
12
+ * @param {string} source - DSL source code
13
+ * @returns {Object} Program object
14
+ *
15
+ * Program structure:
16
+ * {
17
+ * definitions: Map<string, Definition>,
18
+ * errors: Diagnostic[],
19
+ * warnings: Diagnostic[]
20
+ * }
21
+ *
22
+ * Definition structure:
23
+ * {
24
+ * name: string,
25
+ * tokens: Token[], // SELFIES tokens in the definition
26
+ * line: number,
27
+ * range: [number, number]
28
+ * }
29
+ *
30
+ * Diagnostic structure:
31
+ * {
32
+ * message: string,
33
+ * severity: 'error' | 'warning',
34
+ * line: number,
35
+ * column: number,
36
+ * range: [number, number]
37
+ * }
38
+ */
39
+ export function parse(source) {
40
+ const tokens = lex(source)
41
+ const program = {
42
+ definitions: new Map(),
43
+ errors: [],
44
+ warnings: []
45
+ }
46
+
47
+ let i = 0
48
+
49
+ while (i < tokens.length && tokens[i].type !== TokenType.EOF) {
50
+ const token = tokens[i]
51
+
52
+ // Skip comments and newlines
53
+ if (token.type === TokenType.COMMENT || token.type === TokenType.NEWLINE) {
54
+ i++
55
+ continue
56
+ }
57
+
58
+ // Parse definition line
59
+ const { definition, errors, nextIndex } = parseDefinition(tokens, i)
60
+
61
+ if (definition) {
62
+ // Check for duplicate definitions
63
+ if (program.definitions.has(definition.name)) {
64
+ program.errors.push({
65
+ message: `Duplicate definition of '${definition.name}'`,
66
+ severity: 'error',
67
+ line: definition.line,
68
+ column: 1,
69
+ range: definition.range
70
+ })
71
+ } else {
72
+ program.definitions.set(definition.name, definition)
73
+ }
74
+ }
75
+
76
+ if (errors && errors.length > 0) {
77
+ program.errors.push(...errors)
78
+ }
79
+
80
+ i = nextIndex
81
+ }
82
+
83
+ return program
84
+ }
85
+
86
+ /**
87
+ * Parses a single definition line
88
+ * @param {Object[]} tokens - Tokens for the line
89
+ * @param {number} startIndex - Index to start parsing
90
+ * @returns {{definition: Object, errors: Object[], nextIndex: number}} Parsed definition and any errors
91
+ */
92
+ function parseDefinition(tokens, startIndex) {
93
+ const errors = []
94
+ let i = startIndex
95
+ const lineStart = tokens[i].line
96
+
97
+ // Expected pattern: [name] = [token] [token] ... NEWLINE|EOF
98
+
99
+ // 1. Expect NAME (SELFIES_TOKEN that acts as name)
100
+ if (tokens[i].type !== TokenType.SELFIES_TOKEN) {
101
+ errors.push(createDiagnostic(
102
+ `Expected definition name, got ${tokens[i].type}`,
103
+ 'error',
104
+ tokens[i]
105
+ ))
106
+ // Skip to next line
107
+ while (i < tokens.length && tokens[i].type !== TokenType.NEWLINE && tokens[i].type !== TokenType.EOF) {
108
+ i++
109
+ }
110
+ if (tokens[i].type === TokenType.NEWLINE) i++
111
+ return { definition: null, errors, nextIndex: i }
112
+ }
113
+
114
+ const nameToken = tokens[i]
115
+ const name = nameToken.value.slice(1, -1) // Remove brackets
116
+ i++
117
+
118
+ // 2. Expect EQUALS
119
+ if (i >= tokens.length || tokens[i].type !== TokenType.EQUALS) {
120
+ errors.push(createDiagnostic(
121
+ `Expected '=' after definition name`,
122
+ 'error',
123
+ tokens[i] || nameToken
124
+ ))
125
+ // Skip to next line
126
+ while (i < tokens.length && tokens[i].type !== TokenType.NEWLINE && tokens[i].type !== TokenType.EOF) {
127
+ i++
128
+ }
129
+ if (i < tokens.length && tokens[i].type === TokenType.NEWLINE) i++
130
+ return { definition: null, errors, nextIndex: i }
131
+ }
132
+ i++
133
+
134
+ // 3. Collect SELFIES_TOKENs until NEWLINE or EOF
135
+ const definitionTokens = []
136
+ const tokenStart = nameToken.range[0]
137
+ let tokenEnd = tokens[i - 1].range[1]
138
+
139
+ while (i < tokens.length &&
140
+ tokens[i].type !== TokenType.NEWLINE &&
141
+ tokens[i].type !== TokenType.EOF &&
142
+ tokens[i].type !== TokenType.COMMENT) {
143
+ if (tokens[i].type === TokenType.SELFIES_TOKEN) {
144
+ definitionTokens.push(tokens[i].value)
145
+ tokenEnd = tokens[i].range[1]
146
+ i++
147
+ } else {
148
+ errors.push(createDiagnostic(
149
+ `Unexpected token in definition body: ${tokens[i].type}`,
150
+ 'error',
151
+ tokens[i]
152
+ ))
153
+ i++
154
+ }
155
+ }
156
+
157
+ // 4. Check if we have at least one token
158
+ if (definitionTokens.length === 0) {
159
+ errors.push(createDiagnostic(
160
+ `Definition must have at least one token`,
161
+ 'error',
162
+ nameToken
163
+ ))
164
+ }
165
+
166
+ // Skip trailing comment if present
167
+ if (i < tokens.length && tokens[i].type === TokenType.COMMENT) {
168
+ i++
169
+ }
170
+
171
+ // Skip newline
172
+ if (i < tokens.length && tokens[i].type === TokenType.NEWLINE) {
173
+ i++
174
+ }
175
+
176
+ const definition = {
177
+ name,
178
+ tokens: definitionTokens,
179
+ line: lineStart,
180
+ range: [tokenStart, tokenEnd]
181
+ }
182
+
183
+ return { definition, errors, nextIndex: i }
184
+ }
185
+
186
+ /**
187
+ * Creates a diagnostic object
188
+ * @param {string} message - Error/warning message
189
+ * @param {string} severity - 'error' or 'warning'
190
+ * @param {Object} token - Token where diagnostic occurred
191
+ * @returns {Object} Diagnostic object
192
+ */
193
+ function createDiagnostic(message, severity, token) {
194
+ return {
195
+ message,
196
+ severity,
197
+ line: token.line,
198
+ column: token.column,
199
+ range: token.range
200
+ }
201
+ }