selfies-js 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +274 -0
- package/package.json +65 -0
- package/src/alphabet.js +150 -0
- package/src/alphabet.test.js +82 -0
- package/src/chemistryValidator.js +236 -0
- package/src/cli.js +206 -0
- package/src/constraints.js +186 -0
- package/src/constraints.test.js +126 -0
- package/src/decoder.js +636 -0
- package/src/decoder.test.js +560 -0
- package/src/dsl/analyzer.js +170 -0
- package/src/dsl/analyzer.test.js +139 -0
- package/src/dsl/dsl.test.js +146 -0
- package/src/dsl/importer.js +238 -0
- package/src/dsl/index.js +32 -0
- package/src/dsl/lexer.js +264 -0
- package/src/dsl/lexer.test.js +115 -0
- package/src/dsl/parser.js +201 -0
- package/src/dsl/parser.test.js +148 -0
- package/src/dsl/resolver.js +136 -0
- package/src/dsl/resolver.test.js +99 -0
- package/src/dsl/symbolTable.js +56 -0
- package/src/dsl/symbolTable.test.js +68 -0
- package/src/dsl/valenceValidator.js +147 -0
- package/src/encoder.js +467 -0
- package/src/encoder.test.js +61 -0
- package/src/errors.js +79 -0
- package/src/errors.test.js +91 -0
- package/src/grammar_rules.js +146 -0
- package/src/index.js +70 -0
- package/src/parser.js +96 -0
- package/src/parser.test.js +96 -0
- package/src/properties/atoms.js +69 -0
- package/src/properties/atoms.test.js +116 -0
- package/src/properties/formula.js +111 -0
- package/src/properties/formula.test.js +95 -0
- package/src/properties/molecularWeight.js +80 -0
- package/src/properties/molecularWeight.test.js +84 -0
- package/src/properties/properties.test.js +77 -0
- package/src/renderers/README.md +127 -0
- package/src/renderers/svg.js +113 -0
- package/src/renderers/svg.test.js +42 -0
- package/src/syntax.js +641 -0
- package/src/syntax.test.js +363 -0
- package/src/tokenizer.js +99 -0
- package/src/tokenizer.test.js +55 -0
- package/src/validator.js +70 -0
- package/src/validator.test.js +44 -0
package/src/dsl/index.js
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DSL Module - Public API exports
|
|
3
|
+
*
|
|
4
|
+
* This module provides the complete DSL functionality for defining
|
|
5
|
+
* named SELFIES molecules.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export { lex, TokenType } from './lexer.js'
|
|
9
|
+
export { parse } from './parser.js'
|
|
10
|
+
export { resolve, resolveAll, ResolveError } from './resolver.js'
|
|
11
|
+
export {
|
|
12
|
+
getDependencies,
|
|
13
|
+
getDependents,
|
|
14
|
+
detectCycles,
|
|
15
|
+
findUnused
|
|
16
|
+
} from './analyzer.js'
|
|
17
|
+
export {
|
|
18
|
+
createSymbolTable,
|
|
19
|
+
addDefinition,
|
|
20
|
+
lookup,
|
|
21
|
+
has,
|
|
22
|
+
getNames
|
|
23
|
+
} from './symbolTable.js'
|
|
24
|
+
export {
|
|
25
|
+
validateValence,
|
|
26
|
+
validateProgramValence
|
|
27
|
+
} from './valenceValidator.js'
|
|
28
|
+
export {
|
|
29
|
+
parseImports,
|
|
30
|
+
loadWithImports,
|
|
31
|
+
loadFile
|
|
32
|
+
} from './importer.js'
|
package/src/dsl/lexer.js
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DSL Lexer - Tokenizes .selfies DSL source code
|
|
3
|
+
*
|
|
4
|
+
* The DSL allows defining named SELFIES molecules:
|
|
5
|
+
* [methyl] = [C]
|
|
6
|
+
* [ethanol] = [methyl][C][O]
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Token types for DSL
|
|
11
|
+
*/
|
|
12
|
+
export const TokenType = {
|
|
13
|
+
NAME: 'NAME', // [identifier]
|
|
14
|
+
EQUALS: 'EQUALS', // =
|
|
15
|
+
SELFIES_TOKEN: 'SELFIES_TOKEN', // [C], [=O], etc.
|
|
16
|
+
COMMENT: 'COMMENT', // # comment
|
|
17
|
+
NEWLINE: 'NEWLINE', // \n
|
|
18
|
+
EOF: 'EOF', // end of file
|
|
19
|
+
|
|
20
|
+
// Import-related tokens
|
|
21
|
+
IMPORT: 'IMPORT', // import keyword
|
|
22
|
+
FROM: 'FROM', // from keyword
|
|
23
|
+
STRING: 'STRING', // "path/to/file.selfies"
|
|
24
|
+
STAR: 'STAR', // * (wildcard import)
|
|
25
|
+
COMMA: 'COMMA', // , (separator in selective imports)
|
|
26
|
+
LBRACKET: 'LBRACKET', // [ (for selective import list)
|
|
27
|
+
RBRACKET: 'RBRACKET', // ] (for selective import list)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Lexes DSL source code into tokens
|
|
32
|
+
* @param {string} source - DSL source code
|
|
33
|
+
* @returns {Object[]} Array of tokens with type, value, line, column
|
|
34
|
+
*
|
|
35
|
+
* Token structure:
|
|
36
|
+
* {
|
|
37
|
+
* type: TokenType,
|
|
38
|
+
* value: string,
|
|
39
|
+
* line: number,
|
|
40
|
+
* column: number,
|
|
41
|
+
* range: [number, number] // character offsets
|
|
42
|
+
* }
|
|
43
|
+
*/
|
|
44
|
+
export function lex(source) {
|
|
45
|
+
const tokens = []
|
|
46
|
+
let line = 1
|
|
47
|
+
let column = 1
|
|
48
|
+
let i = 0
|
|
49
|
+
|
|
50
|
+
while (i < source.length) {
|
|
51
|
+
const char = source[i]
|
|
52
|
+
const startColumn = column
|
|
53
|
+
const startOffset = i
|
|
54
|
+
|
|
55
|
+
// Skip whitespace (except newlines)
|
|
56
|
+
if (char === ' ' || char === '\t' || char === '\r') {
|
|
57
|
+
i++
|
|
58
|
+
column++
|
|
59
|
+
continue
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Newline
|
|
63
|
+
if (char === '\n') {
|
|
64
|
+
tokens.push({
|
|
65
|
+
type: TokenType.NEWLINE,
|
|
66
|
+
value: '\n',
|
|
67
|
+
line,
|
|
68
|
+
column,
|
|
69
|
+
range: [i, i + 1]
|
|
70
|
+
})
|
|
71
|
+
i++
|
|
72
|
+
line++
|
|
73
|
+
column = 1
|
|
74
|
+
continue
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Comment
|
|
78
|
+
if (char === '#') {
|
|
79
|
+
const commentStart = i
|
|
80
|
+
let commentValue = ''
|
|
81
|
+
while (i < source.length && source[i] !== '\n') {
|
|
82
|
+
commentValue += source[i]
|
|
83
|
+
i++
|
|
84
|
+
}
|
|
85
|
+
tokens.push({
|
|
86
|
+
type: TokenType.COMMENT,
|
|
87
|
+
value: commentValue,
|
|
88
|
+
line,
|
|
89
|
+
column: startColumn,
|
|
90
|
+
range: [commentStart, i]
|
|
91
|
+
})
|
|
92
|
+
column += commentValue.length
|
|
93
|
+
continue
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Equals
|
|
97
|
+
if (char === '=') {
|
|
98
|
+
tokens.push({
|
|
99
|
+
type: TokenType.EQUALS,
|
|
100
|
+
value: '=',
|
|
101
|
+
line,
|
|
102
|
+
column,
|
|
103
|
+
range: [i, i + 1]
|
|
104
|
+
})
|
|
105
|
+
i++
|
|
106
|
+
column++
|
|
107
|
+
continue
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Star (for wildcard imports)
|
|
111
|
+
if (char === '*') {
|
|
112
|
+
tokens.push({
|
|
113
|
+
type: TokenType.STAR,
|
|
114
|
+
value: '*',
|
|
115
|
+
line,
|
|
116
|
+
column,
|
|
117
|
+
range: [i, i + 1]
|
|
118
|
+
})
|
|
119
|
+
i++
|
|
120
|
+
column++
|
|
121
|
+
continue
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Comma (for selective imports)
|
|
125
|
+
if (char === ',') {
|
|
126
|
+
tokens.push({
|
|
127
|
+
type: TokenType.COMMA,
|
|
128
|
+
value: ',',
|
|
129
|
+
line,
|
|
130
|
+
column,
|
|
131
|
+
range: [i, i + 1]
|
|
132
|
+
})
|
|
133
|
+
i++
|
|
134
|
+
column++
|
|
135
|
+
continue
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// String literal (for import paths)
|
|
139
|
+
if (char === '"') {
|
|
140
|
+
const stringStart = i
|
|
141
|
+
let stringValue = '"'
|
|
142
|
+
i++
|
|
143
|
+
column++
|
|
144
|
+
|
|
145
|
+
while (i < source.length && source[i] !== '"' && source[i] !== '\n') {
|
|
146
|
+
stringValue += source[i]
|
|
147
|
+
i++
|
|
148
|
+
column++
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (i >= source.length || source[i] === '\n') {
|
|
152
|
+
throw new Error(`Unclosed string at line ${line}, column ${startColumn}`)
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
stringValue += '"'
|
|
156
|
+
i++
|
|
157
|
+
column++
|
|
158
|
+
|
|
159
|
+
tokens.push({
|
|
160
|
+
type: TokenType.STRING,
|
|
161
|
+
value: stringValue,
|
|
162
|
+
line,
|
|
163
|
+
column: startColumn,
|
|
164
|
+
range: [stringStart, i]
|
|
165
|
+
})
|
|
166
|
+
continue
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Keywords and identifiers (import, from)
|
|
170
|
+
if (isAlpha(char)) {
|
|
171
|
+
const wordStart = i
|
|
172
|
+
let wordValue = ''
|
|
173
|
+
|
|
174
|
+
while (i < source.length && isAlphaNumeric(source[i])) {
|
|
175
|
+
wordValue += source[i]
|
|
176
|
+
i++
|
|
177
|
+
column++
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
let type = TokenType.NAME
|
|
181
|
+
if (wordValue === 'import') {
|
|
182
|
+
type = TokenType.IMPORT
|
|
183
|
+
} else if (wordValue === 'from') {
|
|
184
|
+
type = TokenType.FROM
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
tokens.push({
|
|
188
|
+
type,
|
|
189
|
+
value: wordValue,
|
|
190
|
+
line,
|
|
191
|
+
column: startColumn,
|
|
192
|
+
range: [wordStart, i]
|
|
193
|
+
})
|
|
194
|
+
continue
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Bracketed token (could be NAME or SELFIES_TOKEN)
|
|
198
|
+
if (char === '[') {
|
|
199
|
+
const tokenStart = i
|
|
200
|
+
let tokenValue = '['
|
|
201
|
+
i++
|
|
202
|
+
column++
|
|
203
|
+
|
|
204
|
+
// Read until closing bracket
|
|
205
|
+
while (i < source.length && source[i] !== ']') {
|
|
206
|
+
tokenValue += source[i]
|
|
207
|
+
i++
|
|
208
|
+
column++
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (i >= source.length) {
|
|
212
|
+
throw new Error(`Unclosed bracket at line ${line}, column ${startColumn}`)
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
tokenValue += ']'
|
|
216
|
+
i++
|
|
217
|
+
column++
|
|
218
|
+
|
|
219
|
+
// Determine if this is a NAME or SELFIES_TOKEN
|
|
220
|
+
// We'll initially mark all as SELFIES_TOKEN
|
|
221
|
+
// The parser will determine context
|
|
222
|
+
tokens.push({
|
|
223
|
+
type: TokenType.SELFIES_TOKEN,
|
|
224
|
+
value: tokenValue,
|
|
225
|
+
line,
|
|
226
|
+
column: startColumn,
|
|
227
|
+
range: [tokenStart, i]
|
|
228
|
+
})
|
|
229
|
+
continue
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Unknown character
|
|
233
|
+
throw new Error(`Unexpected character '${char}' at line ${line}, column ${column}`)
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Add EOF token
|
|
237
|
+
tokens.push({
|
|
238
|
+
type: TokenType.EOF,
|
|
239
|
+
value: '',
|
|
240
|
+
line,
|
|
241
|
+
column,
|
|
242
|
+
range: [i, i]
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
return tokens
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Checks if character is alphabetic
|
|
250
|
+
* @param {string} char - Single character
|
|
251
|
+
* @returns {boolean}
|
|
252
|
+
*/
|
|
253
|
+
function isAlpha(char) {
|
|
254
|
+
return (char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z')
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Checks if character is alphanumeric
|
|
259
|
+
* @param {string} char - Single character
|
|
260
|
+
* @returns {boolean}
|
|
261
|
+
*/
|
|
262
|
+
function isAlphaNumeric(char) {
|
|
263
|
+
return isAlpha(char) || (char >= '0' && char <= '9') || char === '_'
|
|
264
|
+
}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for DSL lexer
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, test, expect } from 'bun:test'
|
|
6
|
+
import { lex, TokenType } from './lexer.js'
|
|
7
|
+
|
|
8
|
+
describe('lex', () => {
|
|
9
|
+
// Basic lexing
|
|
10
|
+
test('lexes simple definition', () => {
|
|
11
|
+
const tokens = lex('[methyl] = [C]')
|
|
12
|
+
expect(tokens).toHaveLength(4) // SELFIES_TOKEN EQUALS SELFIES_TOKEN EOF
|
|
13
|
+
expect(tokens[0].type).toBe(TokenType.SELFIES_TOKEN)
|
|
14
|
+
expect(tokens[0].value).toBe('[methyl]')
|
|
15
|
+
expect(tokens[1].type).toBe(TokenType.EQUALS)
|
|
16
|
+
expect(tokens[2].type).toBe(TokenType.SELFIES_TOKEN)
|
|
17
|
+
expect(tokens[2].value).toBe('[C]')
|
|
18
|
+
expect(tokens[3].type).toBe(TokenType.EOF)
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
test('lexes multiple tokens', () => {
|
|
22
|
+
const tokens = lex('[ethanol] = [C][C][O]')
|
|
23
|
+
const selfiesTokens = tokens.filter(t => t.type === TokenType.SELFIES_TOKEN)
|
|
24
|
+
expect(selfiesTokens).toHaveLength(4) // [ethanol], [C], [C], [O]
|
|
25
|
+
expect(selfiesTokens[1].value).toBe('[C]')
|
|
26
|
+
expect(selfiesTokens[2].value).toBe('[C]')
|
|
27
|
+
expect(selfiesTokens[3].value).toBe('[O]')
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
test('handles comments', () => {
|
|
31
|
+
const tokens = lex('# This is a comment\n[methyl] = [C]')
|
|
32
|
+
const comment = tokens.find(t => t.type === TokenType.COMMENT)
|
|
33
|
+
expect(comment).toBeDefined()
|
|
34
|
+
expect(comment.value).toBe('# This is a comment')
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
test('handles newlines', () => {
|
|
38
|
+
const tokens = lex('[a] = [C]\n[b] = [N]')
|
|
39
|
+
const newlines = tokens.filter(t => t.type === TokenType.NEWLINE)
|
|
40
|
+
expect(newlines.length).toBe(1) // Only the explicit \n, not at EOF
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
// Token properties
|
|
44
|
+
test('includes line and column info', () => {
|
|
45
|
+
const tokens = lex('[methyl] = [C]')
|
|
46
|
+
expect(tokens[0]).toHaveProperty('line')
|
|
47
|
+
expect(tokens[0]).toHaveProperty('column')
|
|
48
|
+
expect(tokens[0].line).toBe(1)
|
|
49
|
+
expect(tokens[0].column).toBe(1)
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
test('includes character range', () => {
|
|
53
|
+
const tokens = lex('[methyl] = [C]')
|
|
54
|
+
expect(tokens[0]).toHaveProperty('range')
|
|
55
|
+
expect(Array.isArray(tokens[0].range)).toBe(true)
|
|
56
|
+
expect(tokens[0].range).toEqual([0, 8]) // '[methyl]'
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
// Edge cases
|
|
60
|
+
test('handles empty string', () => {
|
|
61
|
+
const tokens = lex('')
|
|
62
|
+
expect(tokens).toHaveLength(1) // just EOF
|
|
63
|
+
expect(tokens[0].type).toBe(TokenType.EOF)
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
test('handles whitespace', () => {
|
|
67
|
+
const tokens = lex('[a] = [C]') // extra spaces
|
|
68
|
+
const nonEof = tokens.filter(t => t.type !== TokenType.EOF)
|
|
69
|
+
expect(nonEof).toHaveLength(3) // [a], =, [C]
|
|
70
|
+
expect(nonEof[0].value).toBe('[a]')
|
|
71
|
+
expect(nonEof[1].value).toBe('=')
|
|
72
|
+
expect(nonEof[2].value).toBe('[C]')
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
test('tracks line numbers correctly', () => {
|
|
76
|
+
const tokens = lex('[a] = [C]\n[b] = [N]\n[c] = [O]')
|
|
77
|
+
const selfiesTokens = tokens.filter(t => t.type === TokenType.SELFIES_TOKEN)
|
|
78
|
+
expect(selfiesTokens[0].line).toBe(1) // [a]
|
|
79
|
+
expect(selfiesTokens[2].line).toBe(2) // [b]
|
|
80
|
+
expect(selfiesTokens[4].line).toBe(3) // [c]
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
test('throws on unclosed bracket', () => {
|
|
84
|
+
expect(() => lex('[methyl')).toThrow(/Unclosed bracket/)
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
test('throws on unexpected character', () => {
|
|
88
|
+
expect(() => lex('[a] = [C] @')).toThrow(/Unexpected character/)
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
test('handles bond modifiers in tokens', () => {
|
|
92
|
+
const tokens = lex('[alcohol] = [C][=O]')
|
|
93
|
+
const selfiesTokens = tokens.filter(t => t.type === TokenType.SELFIES_TOKEN)
|
|
94
|
+
expect(selfiesTokens[1].value).toBe('[C]')
|
|
95
|
+
expect(selfiesTokens[2].value).toBe('[=O]')
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
test('handles inline comments', () => {
|
|
99
|
+
const tokens = lex('[methyl] = [C] # carbon atom')
|
|
100
|
+
const comment = tokens.find(t => t.type === TokenType.COMMENT)
|
|
101
|
+
expect(comment).toBeDefined()
|
|
102
|
+
expect(comment.value).toContain('carbon atom')
|
|
103
|
+
})
|
|
104
|
+
})
|
|
105
|
+
|
|
106
|
+
describe('TokenType', () => {
|
|
107
|
+
test('exports all token types', () => {
|
|
108
|
+
expect(TokenType.NAME).toBeDefined()
|
|
109
|
+
expect(TokenType.EQUALS).toBeDefined()
|
|
110
|
+
expect(TokenType.SELFIES_TOKEN).toBeDefined()
|
|
111
|
+
expect(TokenType.COMMENT).toBeDefined()
|
|
112
|
+
expect(TokenType.NEWLINE).toBeDefined()
|
|
113
|
+
expect(TokenType.EOF).toBeDefined()
|
|
114
|
+
})
|
|
115
|
+
})
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DSL Parser - Parses DSL tokens into AST and symbol table
|
|
3
|
+
*
|
|
4
|
+
* Converts lexer tokens into a structured Program object with
|
|
5
|
+
* definitions, errors, and warnings.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { lex, TokenType } from './lexer.js'
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Parses DSL source code into a Program object
|
|
12
|
+
* @param {string} source - DSL source code
|
|
13
|
+
* @returns {Object} Program object
|
|
14
|
+
*
|
|
15
|
+
* Program structure:
|
|
16
|
+
* {
|
|
17
|
+
* definitions: Map<string, Definition>,
|
|
18
|
+
* errors: Diagnostic[],
|
|
19
|
+
* warnings: Diagnostic[]
|
|
20
|
+
* }
|
|
21
|
+
*
|
|
22
|
+
* Definition structure:
|
|
23
|
+
* {
|
|
24
|
+
* name: string,
|
|
25
|
+
* tokens: Token[], // SELFIES tokens in the definition
|
|
26
|
+
* line: number,
|
|
27
|
+
* range: [number, number]
|
|
28
|
+
* }
|
|
29
|
+
*
|
|
30
|
+
* Diagnostic structure:
|
|
31
|
+
* {
|
|
32
|
+
* message: string,
|
|
33
|
+
* severity: 'error' | 'warning',
|
|
34
|
+
* line: number,
|
|
35
|
+
* column: number,
|
|
36
|
+
* range: [number, number]
|
|
37
|
+
* }
|
|
38
|
+
*/
|
|
39
|
+
export function parse(source) {
|
|
40
|
+
const tokens = lex(source)
|
|
41
|
+
const program = {
|
|
42
|
+
definitions: new Map(),
|
|
43
|
+
errors: [],
|
|
44
|
+
warnings: []
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
let i = 0
|
|
48
|
+
|
|
49
|
+
while (i < tokens.length && tokens[i].type !== TokenType.EOF) {
|
|
50
|
+
const token = tokens[i]
|
|
51
|
+
|
|
52
|
+
// Skip comments and newlines
|
|
53
|
+
if (token.type === TokenType.COMMENT || token.type === TokenType.NEWLINE) {
|
|
54
|
+
i++
|
|
55
|
+
continue
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Parse definition line
|
|
59
|
+
const { definition, errors, nextIndex } = parseDefinition(tokens, i)
|
|
60
|
+
|
|
61
|
+
if (definition) {
|
|
62
|
+
// Check for duplicate definitions
|
|
63
|
+
if (program.definitions.has(definition.name)) {
|
|
64
|
+
program.errors.push({
|
|
65
|
+
message: `Duplicate definition of '${definition.name}'`,
|
|
66
|
+
severity: 'error',
|
|
67
|
+
line: definition.line,
|
|
68
|
+
column: 1,
|
|
69
|
+
range: definition.range
|
|
70
|
+
})
|
|
71
|
+
} else {
|
|
72
|
+
program.definitions.set(definition.name, definition)
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (errors && errors.length > 0) {
|
|
77
|
+
program.errors.push(...errors)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
i = nextIndex
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return program
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Parses a single definition line
|
|
88
|
+
* @param {Object[]} tokens - Tokens for the line
|
|
89
|
+
* @param {number} startIndex - Index to start parsing
|
|
90
|
+
* @returns {{definition: Object, errors: Object[], nextIndex: number}} Parsed definition and any errors
|
|
91
|
+
*/
|
|
92
|
+
function parseDefinition(tokens, startIndex) {
|
|
93
|
+
const errors = []
|
|
94
|
+
let i = startIndex
|
|
95
|
+
const lineStart = tokens[i].line
|
|
96
|
+
|
|
97
|
+
// Expected pattern: [name] = [token] [token] ... NEWLINE|EOF
|
|
98
|
+
|
|
99
|
+
// 1. Expect NAME (SELFIES_TOKEN that acts as name)
|
|
100
|
+
if (tokens[i].type !== TokenType.SELFIES_TOKEN) {
|
|
101
|
+
errors.push(createDiagnostic(
|
|
102
|
+
`Expected definition name, got ${tokens[i].type}`,
|
|
103
|
+
'error',
|
|
104
|
+
tokens[i]
|
|
105
|
+
))
|
|
106
|
+
// Skip to next line
|
|
107
|
+
while (i < tokens.length && tokens[i].type !== TokenType.NEWLINE && tokens[i].type !== TokenType.EOF) {
|
|
108
|
+
i++
|
|
109
|
+
}
|
|
110
|
+
if (tokens[i].type === TokenType.NEWLINE) i++
|
|
111
|
+
return { definition: null, errors, nextIndex: i }
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const nameToken = tokens[i]
|
|
115
|
+
const name = nameToken.value.slice(1, -1) // Remove brackets
|
|
116
|
+
i++
|
|
117
|
+
|
|
118
|
+
// 2. Expect EQUALS
|
|
119
|
+
if (i >= tokens.length || tokens[i].type !== TokenType.EQUALS) {
|
|
120
|
+
errors.push(createDiagnostic(
|
|
121
|
+
`Expected '=' after definition name`,
|
|
122
|
+
'error',
|
|
123
|
+
tokens[i] || nameToken
|
|
124
|
+
))
|
|
125
|
+
// Skip to next line
|
|
126
|
+
while (i < tokens.length && tokens[i].type !== TokenType.NEWLINE && tokens[i].type !== TokenType.EOF) {
|
|
127
|
+
i++
|
|
128
|
+
}
|
|
129
|
+
if (i < tokens.length && tokens[i].type === TokenType.NEWLINE) i++
|
|
130
|
+
return { definition: null, errors, nextIndex: i }
|
|
131
|
+
}
|
|
132
|
+
i++
|
|
133
|
+
|
|
134
|
+
// 3. Collect SELFIES_TOKENs until NEWLINE or EOF
|
|
135
|
+
const definitionTokens = []
|
|
136
|
+
const tokenStart = nameToken.range[0]
|
|
137
|
+
let tokenEnd = tokens[i - 1].range[1]
|
|
138
|
+
|
|
139
|
+
while (i < tokens.length &&
|
|
140
|
+
tokens[i].type !== TokenType.NEWLINE &&
|
|
141
|
+
tokens[i].type !== TokenType.EOF &&
|
|
142
|
+
tokens[i].type !== TokenType.COMMENT) {
|
|
143
|
+
if (tokens[i].type === TokenType.SELFIES_TOKEN) {
|
|
144
|
+
definitionTokens.push(tokens[i].value)
|
|
145
|
+
tokenEnd = tokens[i].range[1]
|
|
146
|
+
i++
|
|
147
|
+
} else {
|
|
148
|
+
errors.push(createDiagnostic(
|
|
149
|
+
`Unexpected token in definition body: ${tokens[i].type}`,
|
|
150
|
+
'error',
|
|
151
|
+
tokens[i]
|
|
152
|
+
))
|
|
153
|
+
i++
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// 4. Check if we have at least one token
|
|
158
|
+
if (definitionTokens.length === 0) {
|
|
159
|
+
errors.push(createDiagnostic(
|
|
160
|
+
`Definition must have at least one token`,
|
|
161
|
+
'error',
|
|
162
|
+
nameToken
|
|
163
|
+
))
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Skip trailing comment if present
|
|
167
|
+
if (i < tokens.length && tokens[i].type === TokenType.COMMENT) {
|
|
168
|
+
i++
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Skip newline
|
|
172
|
+
if (i < tokens.length && tokens[i].type === TokenType.NEWLINE) {
|
|
173
|
+
i++
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const definition = {
|
|
177
|
+
name,
|
|
178
|
+
tokens: definitionTokens,
|
|
179
|
+
line: lineStart,
|
|
180
|
+
range: [tokenStart, tokenEnd]
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return { definition, errors, nextIndex: i }
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Creates a diagnostic object
|
|
188
|
+
* @param {string} message - Error/warning message
|
|
189
|
+
* @param {string} severity - 'error' or 'warning'
|
|
190
|
+
* @param {Object} token - Token where diagnostic occurred
|
|
191
|
+
* @returns {Object} Diagnostic object
|
|
192
|
+
*/
|
|
193
|
+
function createDiagnostic(message, severity, token) {
|
|
194
|
+
return {
|
|
195
|
+
message,
|
|
196
|
+
severity,
|
|
197
|
+
line: token.line,
|
|
198
|
+
column: token.column,
|
|
199
|
+
range: token.range
|
|
200
|
+
}
|
|
201
|
+
}
|