selfies-js 0.3.3 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -112,6 +112,11 @@ selfies list molecules.selfies
112
112
  # Reference other fragments
113
113
  [ethanol] = [ethyl][hydroxyl]
114
114
 
115
+ # Use repeat() macro for patterns
116
+ [benzene] = repeat([C][=C], 3)[Ring1][=Branch1]
117
+ [carbon_chain] = repeat([C], 10)
118
+ [polymer] = repeat([monomer], 5)
119
+
115
120
  # Import from other files
116
121
  import "./other-file.selfies" # import all
117
122
  import [methyl, ethyl] from "./fragments.selfies" # import specific
@@ -156,6 +161,33 @@ await initRDKit()
156
161
  const svg = await renderSelfies('[C][C][O]', { width: 300, height: 300 })
157
162
  ```
158
163
 
164
+ ## Repeat Macro
165
+
166
+ The `repeat()` macro allows you to repeat molecular patterns, perfect for polymers and long chains:
167
+
168
+ ```selfies
169
+ # Benzene ring
170
+ [benzene] = repeat([C][=C], 3)[Ring1][=Branch1]
171
+
172
+ # Carbon chains
173
+ [decane] = repeat([C], 10)
174
+ [pentadecane] = repeat([C], 15)
175
+
176
+ # Polymer repeat units
177
+ [PE_unit] = [C][C]
178
+ [polyethylene_trimer] = repeat([PE_unit], 3)
179
+
180
+ # References work too
181
+ [monomer] = [C][Branch1][C][Cl][C]
182
+ [pvc_hexamer] = repeat([monomer], 6)
183
+ ```
184
+
185
+ The pattern can be:
186
+ - **Primitive tokens**: `repeat([C], 10)` for a 10-carbon chain
187
+ - **Complex patterns**: `repeat([C][=C], 3)` for alternating double bonds
188
+ - **Named references**: `repeat([unit], 5)` to repeat a defined fragment
189
+ - **Combined**: `[molecule] = [N]repeat([C], 3)[O]` with tokens before/after
190
+
159
191
  ## VS Code Extension
160
192
 
161
193
  Get live visualization as you author `.selfies` files. See the molecular structure update line-by-line as you navigate your code.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "selfies-js",
3
- "version": "0.3.3",
3
+ "version": "0.3.5",
4
4
  "description": "Pure JavaScript SELFIES encoder/decoder with DSL for molecular composition",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
package/src/decoder.js CHANGED
@@ -415,10 +415,81 @@ export function deriveBranch(tokens, startIndex, maxDerive, initState, rootAtom,
415
415
  const token = tokens[startIndex + consumed]
416
416
  const content = token.slice(1, -1)
417
417
 
418
- // Skip structural tokens in branch
419
- if (content.includes('Branch') || content.includes('Ring') ||
420
- content.includes('ch') || content.includes('ng')) {
418
+ // Handle Ring tokens inside branch
419
+ if (content.includes('Ring') || content.includes('ng')) {
420
+ const ringInfo = processRingSymbol(token)
421
+ if (!ringInfo) {
422
+ throw new Error(`Invalid ring token in branch: ${token}`)
423
+ }
424
+ if (state === 0) {
425
+ throw new Error(`Ring ${token} at invalid state 0 inside branch`)
426
+ }
427
+
428
+ const { order: requestedOrder, L } = ringInfo
429
+ const [bondOrder, nextState] = nextRingState(requestedOrder, state)
430
+
431
+ consumed++
432
+ derived++ // Ring counts toward derive limit
433
+
434
+ if (consumed >= tokens.length - startIndex) {
435
+ state = nextState
436
+ break
437
+ }
438
+
439
+ const Q = readIndexFromTokens(tokens, startIndex + consumed, L)
440
+ consumed += Q.consumed
441
+ derived += Q.consumed // Q index tokens also count
442
+
443
+ const targetIndex = Math.max(0, prevAtomIndex - (Q.value + 1))
444
+ if (targetIndex !== prevAtomIndex) {
445
+ handleRingClosure(targetIndex, prevAtomIndex, bondOrder, bonds, rings)
446
+ }
447
+
448
+ state = nextState
449
+ continue
450
+ }
451
+
452
+ // Handle nested Branch tokens - recursively process them
453
+ if (content.includes('Branch') || content.includes('ch')) {
454
+ const branchInfo = processBranchSymbol(token)
455
+ if (!branchInfo) {
456
+ throw new Error(`Invalid branch token in branch: ${token}`)
457
+ }
458
+ if (state <= 1) {
459
+ throw new Error(`Branch ${token} at invalid state ${state} inside branch`)
460
+ }
461
+
462
+ const { order: branchOrder, L } = branchInfo
463
+ const [branchInitState, nextState] = nextBranchState(branchOrder, state)
464
+
421
465
  consumed++
466
+ derived++ // Branch counts toward derive limit
467
+
468
+ if (consumed >= tokens.length - startIndex) {
469
+ state = nextState
470
+ break
471
+ }
472
+
473
+ // Read Q index
474
+ const Q = readIndexFromTokens(tokens, startIndex + consumed, L)
475
+ consumed += Q.consumed
476
+ derived += Q.consumed // Q index tokens count
477
+
478
+ // Recursively derive nested branch
479
+ const nestedResult = deriveBranch(
480
+ tokens,
481
+ startIndex + consumed,
482
+ Q.value + 1,
483
+ branchInitState,
484
+ prevAtomIndex,
485
+ atoms,
486
+ bonds,
487
+ rings
488
+ )
489
+ consumed += nestedResult.consumed
490
+ derived += nestedResult.derived
491
+
492
+ state = nextState
422
493
  continue
423
494
  }
424
495
 
@@ -25,10 +25,23 @@ export function getDependencies(program, name) {
25
25
 
26
26
  const dependencies = []
27
27
  for (const token of definition.tokens) {
28
- const tokenName = token.slice(1, -1) // Remove brackets
29
- if (program.definitions.has(tokenName)) {
30
- if (!dependencies.includes(tokenName)) {
31
- dependencies.push(tokenName)
28
+ if (typeof token === 'object' && token.type === 'REPEAT_CALL') {
29
+ // Extract dependencies from repeat pattern
30
+ const patternTokens = tokenizePattern(token.pattern)
31
+ for (const patternToken of patternTokens) {
32
+ const tokenName = patternToken.slice(1, -1) // Remove brackets
33
+ if (program.definitions.has(tokenName)) {
34
+ if (!dependencies.includes(tokenName)) {
35
+ dependencies.push(tokenName)
36
+ }
37
+ }
38
+ }
39
+ } else if (typeof token === 'string') {
40
+ const tokenName = token.slice(1, -1) // Remove brackets
41
+ if (program.definitions.has(tokenName)) {
42
+ if (!dependencies.includes(tokenName)) {
43
+ dependencies.push(tokenName)
44
+ }
32
45
  }
33
46
  }
34
47
  }
@@ -36,6 +49,36 @@ export function getDependencies(program, name) {
36
49
  return dependencies
37
50
  }
38
51
 
52
+ /**
53
+ * Tokenizes a pattern string into SELFIES tokens
54
+ * @param {string} pattern - Pattern string like '[C][=C]'
55
+ * @returns {string[]} Array of tokens
56
+ */
57
+ function tokenizePattern(pattern) {
58
+ const tokens = []
59
+ let i = 0
60
+
61
+ while (i < pattern.length) {
62
+ if (pattern[i] === '[') {
63
+ // Find the closing bracket
64
+ let j = i + 1
65
+ while (j < pattern.length && pattern[j] !== ']') {
66
+ j++
67
+ }
68
+ if (j < pattern.length) {
69
+ tokens.push(pattern.slice(i, j + 1))
70
+ i = j + 1
71
+ } else {
72
+ i++
73
+ }
74
+ } else {
75
+ i++
76
+ }
77
+ }
78
+
79
+ return tokens
80
+ }
81
+
39
82
  /**
40
83
  * Gets the names that depend on a definition
41
84
  * @param {Object} program - Program object
package/src/dsl/parser.js CHANGED
@@ -222,18 +222,37 @@ function parseRepeatCall(tokens, startIndex) {
222
222
  }
223
223
  i++
224
224
 
225
- // Expect STRING (pattern)
226
- if (i >= tokens.length || tokens[i].type !== TokenType.STRING) {
227
- // Skip to closing paren or end of line on error
225
+ // Collect SELFIES_TOKENs as pattern until we hit COMMA
226
+ const patternTokens = []
227
+ const patternStart = i
228
+
229
+ while (i < tokens.length &&
230
+ tokens[i].type !== TokenType.COMMA &&
231
+ tokens[i].type !== TokenType.RPAREN &&
232
+ tokens[i].type !== TokenType.NEWLINE &&
233
+ tokens[i].type !== TokenType.EOF) {
234
+ if (tokens[i].type === TokenType.SELFIES_TOKEN) {
235
+ patternTokens.push(tokens[i].value)
236
+ i++
237
+ } else {
238
+ const skipToEnd = skipToRParenOrEOL(tokens, i)
239
+ return {
240
+ error: createDiagnostic('Expected SELFIES tokens or name references in pattern', 'error', tokens[i]),
241
+ nextIndex: skipToEnd
242
+ }
243
+ }
244
+ }
245
+
246
+ if (patternTokens.length === 0) {
228
247
  const skipToEnd = skipToRParenOrEOL(tokens, i)
229
248
  return {
230
- error: createDiagnostic('Expected string pattern as first argument', 'error', tokens[i] || repeatToken),
249
+ error: createDiagnostic('Pattern cannot be empty', 'error', tokens[patternStart] || repeatToken),
231
250
  nextIndex: skipToEnd
232
251
  }
233
252
  }
234
- const patternToken = tokens[i]
235
- const pattern = patternToken.value.slice(1, -1) // Remove quotes
236
- i++
253
+
254
+ // Join pattern tokens into a single string
255
+ const pattern = patternTokens.join('')
237
256
 
238
257
  // Expect COMMA
239
258
  if (i >= tokens.length || tokens[i].type !== TokenType.COMMA) {
@@ -100,81 +100,82 @@ describe('resolveAll', () => {
100
100
 
101
101
  describe('repeat macro', () => {
102
102
  test('repeats a simple token sequence', () => {
103
- const program = parse('[triple_carbon] = repeat(\'[C]\', 3)')
103
+ const program = parse('[triple_carbon] = repeat([C], 3)')
104
104
  expect(resolve(program, 'triple_carbon')).toBe('[C][C][C]')
105
105
  })
106
106
 
107
107
  test('repeats a complex token sequence', () => {
108
- const program = parse('[benzene] = repeat(\'[C][=C]\', 3)[Ring1][=Branch1]')
108
+ const program = parse('[benzene] = repeat([C][=C], 3)[Ring1][=Branch1]')
109
109
  expect(resolve(program, 'benzene')).toBe('[C][=C][C][=C][C][=C][Ring1][=Branch1]')
110
110
  })
111
111
 
112
112
  test('repeats with count of 1', () => {
113
- const program = parse('[single] = repeat(\'[C][O]\', 1)')
113
+ const program = parse('[single] = repeat([C][O], 1)')
114
114
  expect(resolve(program, 'single')).toBe('[C][O]')
115
115
  })
116
116
 
117
117
  test('repeats with count of 0 produces empty sequence', () => {
118
- const program = parse('[empty] = [C]repeat(\'[O]\', 0)[C]')
118
+ const program = parse('[empty] = [C]repeat([O], 0)[C]')
119
119
  expect(resolve(program, 'empty')).toBe('[C][C]')
120
120
  })
121
121
 
122
122
  test('repeat with reference to other definition', () => {
123
- const source = '[unit] = [C][=C]\n[triple] = repeat(\'[unit]\', 3)'
123
+ const source = '[unit] = [C][=C]\n[triple] = repeat([unit], 3)'
124
124
  const program = parse(source)
125
125
  expect(resolve(program, 'triple')).toBe('[C][=C][C][=C][C][=C]')
126
126
  })
127
127
 
128
128
  test('multiple repeat calls in one definition', () => {
129
- const program = parse('[chain] = repeat(\'[C]\', 2)repeat(\'[O]\', 2)')
129
+ const program = parse('[chain] = repeat([C], 2)repeat([O], 2)')
130
130
  expect(resolve(program, 'chain')).toBe('[C][C][O][O]')
131
131
  })
132
132
 
133
133
  test('repeat combined with regular tokens', () => {
134
- const program = parse('[molecule] = [N]repeat(\'[C]\', 3)[O]')
134
+ const program = parse('[molecule] = [N]repeat([C], 3)[O]')
135
135
  expect(resolve(program, 'molecule')).toBe('[N][C][C][C][O]')
136
136
  })
137
137
 
138
138
  test('repeat with nested brackets in pattern', () => {
139
- const program = parse('[branched] = repeat(\'[C][Branch1][C][O]\', 2)')
139
+ const program = parse('[branched] = repeat([C][Branch1][C][O], 2)')
140
140
  expect(resolve(program, 'branched')).toBe('[C][Branch1][C][O][C][Branch1][C][O]')
141
141
  })
142
142
 
143
143
  test('throws error on invalid repeat count', () => {
144
- const program = parse('[bad] = repeat(\'[C]\', -1)')
144
+ const program = parse('[bad] = repeat([C], -1)')
145
145
  expect(() => resolve(program, 'bad')).toThrow(/count must be/)
146
146
  })
147
147
 
148
148
  test('throws error on non-numeric count', () => {
149
- const program = parse('[bad] = repeat(\'[C]\', abc)')
149
+ const program = parse('[bad] = repeat([C], abc)')
150
150
  expect(() => resolve(program, 'bad')).toThrow()
151
151
  })
152
152
 
153
153
  test('throws error on missing arguments', () => {
154
- const program = parse('[bad] = repeat(\'[C]\')')
154
+ const program = parse('[bad] = repeat([C])')
155
155
  expect(() => resolve(program, 'bad')).toThrow()
156
156
  })
157
157
 
158
- test('throws error on malformed repeat syntax', () => {
159
- const program = parse('[bad] = repeat([C], 3)')
160
- expect(() => resolve(program, 'bad')).toThrow()
158
+ test('throws error on empty pattern', () => {
159
+ const program = parse('[bad] = repeat(, 3)')
160
+ // Parse error results in empty definition
161
+ expect(() => resolve(program, 'bad')).toThrow(/no tokens/)
161
162
  })
162
163
 
163
164
  test('simple polymer-like chain', () => {
164
- const source = '[ch2] = [C]\n[polymer_chain] = repeat(\'[ch2]\', 5)'
165
+ const source = '[ch2] = [C]\n[polymer_chain] = repeat([ch2], 5)'
165
166
  const program = parse(source)
166
167
  expect(resolve(program, 'polymer_chain')).toBe('[C][C][C][C][C]')
167
168
  })
168
169
 
169
170
  test('polymer chain with decode', () => {
170
- const source = '[ch2] = [C]\n[polymer_chain] = repeat(\'[ch2]\', 5)'
171
+ const source = '[ch2] = [C]\n[polymer_chain] = repeat([ch2], 5)'
171
172
  const program = parse(source)
172
173
  expect(resolve(program, 'polymer_chain', { decode: true })).toBe('CCCCC')
173
174
  })
174
175
 
175
176
  test('vinyl chloride monomer units', () => {
176
177
  // Each monomer as a branch structure for proper chemistry
177
- const source = '[monomer] = [C][Branch1][C][Cl][C]\n[polymer] = repeat(\'[monomer]\', 3)'
178
+ const source = '[monomer] = [C][Branch1][C][Cl][C]\n[polymer] = repeat([monomer], 3)'
178
179
  const program = parse(source)
179
180
  // This creates a branched structure: C(Cl)CC(Cl)CC(Cl)C
180
181
  expect(resolve(program, 'polymer')).toBe('[C][Branch1][C][Cl][C][C][Branch1][C][Cl][C][C][Branch1][C][Cl][C]')
@@ -25,26 +25,10 @@ export function validateValence(selfies, defName) {
25
25
  const errors = []
26
26
 
27
27
  try {
28
- // Try to decode to SMILES - this will catch many structural issues
28
+ // Try to decode to SMILES - this validates the structure
29
+ // The decoder's state machine properly enforces valence rules
29
30
  const smiles = decode(selfies)
30
-
31
- // Parse the SELFIES to extract atoms and bonds
32
- const tokens = tokenizeSelfies(selfies)
33
- const atomBonds = calculateBonds(tokens)
34
-
35
- // Check each atom's valence
36
- for (const [atom, bondCount] of Object.entries(atomBonds)) {
37
- const { element, charge } = parseAtom(atom)
38
- const maxBonds = getBondingCapacity(element, charge)
39
-
40
- if (bondCount > maxBonds) {
41
- errors.push({
42
- message: `Valence error in '${defName}': ${element} has ${bondCount} bonds but max is ${maxBonds}`,
43
- severity: 'error',
44
- definitionName: defName
45
- })
46
- }
47
- }
31
+ // If decoding succeeds, the SELFIES is valid
48
32
  } catch (error) {
49
33
  // If decoding fails, it's a structural error
50
34
  errors.push({