npm - selfies-js - Versions diffs - 0.3.2 → 0.3.3 - Mend

selfies-js 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "selfies-js",
-  "version": "0.3.2",
+  "version": "0.3.3",
   "description": "Pure JavaScript SELFIES encoder/decoder with DSL for molecular composition",
   "type": "module",
   "main": "src/index.js",

package/src/dsl/lexer.js CHANGED Viewed

@@ -20,11 +20,17 @@ export const TokenType = {
   // Import-related tokens
   IMPORT: 'IMPORT',       // import keyword
   FROM: 'FROM',           // from keyword
-  STRING: 'STRING',       // "path/to/file.selfies"
+  STRING: 'STRING',       // "path/to/file.selfies" or 'pattern'
   STAR: 'STAR',           // * (wildcard import)
   COMMA: 'COMMA',         // , (separator in selective imports)
   LBRACKET: 'LBRACKET',   // [ (for selective import list)
   RBRACKET: 'RBRACKET',   // ] (for selective import list)
+  // Repeat macro tokens
+  REPEAT: 'REPEAT',       // repeat keyword
+  LPAREN: 'LPAREN',       // (
+  RPAREN: 'RPAREN',       // )
+  NUMBER: 'NUMBER',       // numeric literal
 }
 /**
@@ -135,14 +141,43 @@ export function lex(source) {
       continue
     }
-    // String literal (for import paths)
-    if (char === '"') {
+    // Left parenthesis
+    if (char === '(') {
+      tokens.push({
+        type: TokenType.LPAREN,
+        value: '(',
+        line,
+        column,
+        range: [i, i + 1]
+      })
+      i++
+      column++
+      continue
+    }
+    // Right parenthesis
+    if (char === ')') {
+      tokens.push({
+        type: TokenType.RPAREN,
+        value: ')',
+        line,
+        column,
+        range: [i, i + 1]
+      })
+      i++
+      column++
+      continue
+    }
+    // String literal (for import paths and repeat patterns)
+    if (char === '"' || char === "'") {
       const stringStart = i
-      let stringValue = '"'
+      const quote = char
+      let stringValue = quote
       i++
       column++
-      while (i < source.length && source[i] !== '"' && source[i] !== '\n') {
+      while (i < source.length && source[i] !== quote && source[i] !== '\n') {
         stringValue += source[i]
         i++
         column++
@@ -152,7 +187,7 @@ export function lex(source) {
         throw new Error(`Unclosed string at line ${line}, column ${startColumn}`)
       }
-      stringValue += '"'
+      stringValue += quote
       i++
       column++
@@ -166,7 +201,7 @@ export function lex(source) {
       continue
     }
-    // Keywords and identifiers (import, from)
+    // Keywords and identifiers (import, from, repeat)
     if (isAlpha(char)) {
       const wordStart = i
       let wordValue = ''
@@ -182,6 +217,8 @@ export function lex(source) {
         type = TokenType.IMPORT
       } else if (wordValue === 'from') {
         type = TokenType.FROM
+      } else if (wordValue === 'repeat') {
+        type = TokenType.REPEAT
       }
       tokens.push({
@@ -194,6 +231,34 @@ export function lex(source) {
       continue
     }
+    // Numbers (including negative)
+    if (char >= '0' && char <= '9' || (char === '-' && i + 1 < source.length && source[i + 1] >= '0' && source[i + 1] <= '9')) {
+      const numberStart = i
+      let numberValue = ''
+      // Handle negative sign
+      if (char === '-') {
+        numberValue += char
+        i++
+        column++
+      }
+      while (i < source.length && source[i] >= '0' && source[i] <= '9') {
+        numberValue += source[i]
+        i++
+        column++
+      }
+      tokens.push({
+        type: TokenType.NUMBER,
+        value: numberValue,
+        line,
+        column: startColumn,
+        range: [numberStart, i]
+      })
+      continue
+    }
     // Bracketed token (could be NAME or SELFIES_TOKEN)
     if (char === '[') {
       const tokenStart = i

package/src/dsl/parser.js CHANGED Viewed

@@ -131,7 +131,7 @@ function parseDefinition(tokens, startIndex) {
   }
   i++
-  // 3. Collect SELFIES_TOKENs until NEWLINE or EOF
+  // 3. Collect SELFIES_TOKENs and repeat calls until NEWLINE or EOF
   const definitionTokens = []
   const tokenStart = nameToken.range[0]
   let tokenEnd = tokens[i - 1].range[1]
@@ -144,6 +144,17 @@ function parseDefinition(tokens, startIndex) {
       definitionTokens.push(tokens[i].value)
       tokenEnd = tokens[i].range[1]
       i++
+    } else if (tokens[i].type === TokenType.REPEAT) {
+      // Parse repeat call: repeat(pattern, count)
+      const repeatResult = parseRepeatCall(tokens, i)
+      if (repeatResult.error) {
+        errors.push(repeatResult.error)
+        i = repeatResult.nextIndex
+      } else {
+        definitionTokens.push(repeatResult.repeatToken)
+        tokenEnd = repeatResult.range[1]
+        i = repeatResult.nextIndex
+      }
     } else {
       errors.push(createDiagnostic(
         `Unexpected token in definition body: ${tokens[i].type}`,
@@ -183,6 +194,114 @@ function parseDefinition(tokens, startIndex) {
   return { definition, errors, nextIndex: i }
 }
+/**
+ * Parses a repeat call: repeat(pattern, count)
+ * @param {Object[]} tokens - Token array
+ * @param {number} startIndex - Index of REPEAT token
+ * @returns {Object} Result with repeatToken or error
+ */
+function parseRepeatCall(tokens, startIndex) {
+  let i = startIndex
+  const repeatToken = tokens[i]
+  // Expect REPEAT
+  if (tokens[i].type !== TokenType.REPEAT) {
+    return {
+      error: createDiagnostic('Expected repeat keyword', 'error', tokens[i]),
+      nextIndex: i + 1
+    }
+  }
+  i++
+  // Expect LPAREN
+  if (i >= tokens.length || tokens[i].type !== TokenType.LPAREN) {
+    return {
+      error: createDiagnostic('Expected \'(\' after repeat', 'error', tokens[i] || repeatToken),
+      nextIndex: i
+    }
+  }
+  i++
+  // Expect STRING (pattern)
+  if (i >= tokens.length || tokens[i].type !== TokenType.STRING) {
+    // Skip to closing paren or end of line on error
+    const skipToEnd = skipToRParenOrEOL(tokens, i)
+    return {
+      error: createDiagnostic('Expected string pattern as first argument', 'error', tokens[i] || repeatToken),
+      nextIndex: skipToEnd
+    }
+  }
+  const patternToken = tokens[i]
+  const pattern = patternToken.value.slice(1, -1) // Remove quotes
+  i++
+  // Expect COMMA
+  if (i >= tokens.length || tokens[i].type !== TokenType.COMMA) {
+    const skipToEnd = skipToRParenOrEOL(tokens, i)
+    return {
+      error: createDiagnostic('Expected \',\' after pattern', 'error', tokens[i] || patternToken),
+      nextIndex: skipToEnd
+    }
+  }
+  i++
+  // Expect NUMBER (count)
+  if (i >= tokens.length || tokens[i].type !== TokenType.NUMBER) {
+    const skipToEnd = skipToRParenOrEOL(tokens, i)
+    return {
+      error: createDiagnostic('Expected number as second argument', 'error', tokens[i] || patternToken),
+      nextIndex: skipToEnd
+    }
+  }
+  const countToken = tokens[i]
+  const count = parseInt(countToken.value, 10)
+  i++
+  // Expect RPAREN
+  if (i >= tokens.length || tokens[i].type !== TokenType.RPAREN) {
+    const skipToEnd = skipToRParenOrEOL(tokens, i)
+    return {
+      error: createDiagnostic('Expected \')\' after count', 'error', tokens[i] || countToken),
+      nextIndex: skipToEnd
+    }
+  }
+  const rparenToken = tokens[i]
+  i++
+  // Create a special repeat token
+  return {
+    repeatToken: {
+      type: 'REPEAT_CALL',
+      pattern,
+      count,
+      range: [repeatToken.range[0], rparenToken.range[1]]
+    },
+    range: [repeatToken.range[0], rparenToken.range[1]],
+    nextIndex: i
+  }
+}
+/**
+ * Skips tokens until we find RPAREN or reach end of line
+ * @param {Object[]} tokens - Token array
+ * @param {number} startIndex - Index to start skipping from
+ * @returns {number} Index after RPAREN or at NEWLINE/EOF
+ */
+function skipToRParenOrEOL(tokens, startIndex) {
+  let i = startIndex
+  while (i < tokens.length &&
+         tokens[i].type !== TokenType.RPAREN &&
+         tokens[i].type !== TokenType.NEWLINE &&
+         tokens[i].type !== TokenType.EOF) {
+    i++
+  }
+  // If we found RPAREN, move past it
+  if (i < tokens.length && tokens[i].type === TokenType.RPAREN) {
+    i++
+  }
+  return i
+}
 /**
  * Creates a diagnostic object
  * @param {string} message - Error/warning message

package/src/dsl/resolver.js CHANGED Viewed

@@ -102,11 +102,21 @@ function resolveRecursive(program, name, visiting = new Set()) {
   // Get definition
   const definition = program.definitions.get(name)
+  // Check if definition has tokens (parse errors can result in empty definitions)
+  if (!definition.tokens || definition.tokens.length === 0) {
+    throw new ResolveError(`Definition '${name}' has no tokens (possibly due to parse errors)`, name)
+  }
   const resolvedTokens = []
   // Resolve each token
   for (const token of definition.tokens) {
-    if (isReference(token, program)) {
+    if (typeof token === 'object' && token.type === 'REPEAT_CALL') {
+      // It's a repeat call - expand it
+      const expandedTokens = expandRepeat(token, program, visiting)
+      resolvedTokens.push(...expandedTokens)
+    } else if (isReference(token, program)) {
       // It's a reference to another definition - resolve it recursively
       const refName = token.slice(1, -1) // Remove brackets
       const refResolved = resolveRecursive(program, refName, visiting)
@@ -123,6 +133,82 @@ function resolveRecursive(program, name, visiting = new Set()) {
   return resolvedTokens
 }
+/**
+ * Expands a repeat call by repeating the pattern
+ * @param {Object} repeatToken - Repeat token object with pattern and count
+ * @param {Object} program - Program object
+ * @param {Set<string>} visiting - Set of currently visiting definitions
+ * @returns {string[]} Expanded tokens
+ */
+function expandRepeat(repeatToken, program, visiting) {
+  const { pattern, count } = repeatToken
+  // Validate count
+  if (count < 0) {
+    throw new ResolveError(`Repeat count must be non-negative, got ${count}`)
+  }
+  if (!Number.isInteger(count)) {
+    throw new ResolveError(`Repeat count must be an integer, got ${count}`)
+  }
+  // Tokenize the pattern string to extract individual SELFIES tokens
+  const patternTokens = tokenizePattern(pattern)
+  // Resolve each token in the pattern (they might be references)
+  const resolvedPatternTokens = []
+  for (const token of patternTokens) {
+    if (isReference(token, program)) {
+      // Recursively resolve the reference
+      const refName = token.slice(1, -1)
+      const refResolved = resolveRecursive(program, refName, visiting)
+      resolvedPatternTokens.push(...refResolved)
+    } else {
+      resolvedPatternTokens.push(token)
+    }
+  }
+  // Repeat the resolved pattern
+  const result = []
+  for (let i = 0; i < count; i++) {
+    result.push(...resolvedPatternTokens)
+  }
+  return result
+}
+/**
+ * Tokenizes a pattern string into SELFIES tokens
+ * @param {string} pattern - Pattern string like '[C][=C]'
+ * @returns {string[]} Array of tokens
+ */
+function tokenizePattern(pattern) {
+  const tokens = []
+  let i = 0
+  while (i < pattern.length) {
+    if (pattern[i] === '[') {
+      // Find the closing bracket
+      let j = i + 1
+      while (j < pattern.length && pattern[j] !== ']') {
+        j++
+      }
+      if (j >= pattern.length) {
+        throw new ResolveError(`Unclosed bracket in pattern: ${pattern}`)
+      }
+      tokens.push(pattern.slice(i, j + 1))
+      i = j + 1
+    } else if (pattern[i] === ' ' || pattern[i] === '\t') {
+      // Skip whitespace
+      i++
+    } else {
+      throw new ResolveError(`Invalid character in pattern: ${pattern[i]}`)
+    }
+  }
+  return tokens
+}
 /**
  * Checks if a token is a reference to another definition
  * @param {string} token - Token to check

package/src/dsl/resolver.test.js CHANGED Viewed

@@ -97,3 +97,86 @@ describe('resolveAll', () => {
     expect(resolved.size).toBe(0)
   })
 })
+describe('repeat macro', () => {
+  test('repeats a simple token sequence', () => {
+    const program = parse('[triple_carbon] = repeat(\'[C]\', 3)')
+    expect(resolve(program, 'triple_carbon')).toBe('[C][C][C]')
+  })
+  test('repeats a complex token sequence', () => {
+    const program = parse('[benzene] = repeat(\'[C][=C]\', 3)[Ring1][=Branch1]')
+    expect(resolve(program, 'benzene')).toBe('[C][=C][C][=C][C][=C][Ring1][=Branch1]')
+  })
+  test('repeats with count of 1', () => {
+    const program = parse('[single] = repeat(\'[C][O]\', 1)')
+    expect(resolve(program, 'single')).toBe('[C][O]')
+  })
+  test('repeats with count of 0 produces empty sequence', () => {
+    const program = parse('[empty] = [C]repeat(\'[O]\', 0)[C]')
+    expect(resolve(program, 'empty')).toBe('[C][C]')
+  })
+  test('repeat with reference to other definition', () => {
+    const source = '[unit] = [C][=C]\n[triple] = repeat(\'[unit]\', 3)'
+    const program = parse(source)
+    expect(resolve(program, 'triple')).toBe('[C][=C][C][=C][C][=C]')
+  })
+  test('multiple repeat calls in one definition', () => {
+    const program = parse('[chain] = repeat(\'[C]\', 2)repeat(\'[O]\', 2)')
+    expect(resolve(program, 'chain')).toBe('[C][C][O][O]')
+  })
+  test('repeat combined with regular tokens', () => {
+    const program = parse('[molecule] = [N]repeat(\'[C]\', 3)[O]')
+    expect(resolve(program, 'molecule')).toBe('[N][C][C][C][O]')
+  })
+  test('repeat with nested brackets in pattern', () => {
+    const program = parse('[branched] = repeat(\'[C][Branch1][C][O]\', 2)')
+    expect(resolve(program, 'branched')).toBe('[C][Branch1][C][O][C][Branch1][C][O]')
+  })
+  test('throws error on invalid repeat count', () => {
+    const program = parse('[bad] = repeat(\'[C]\', -1)')
+    expect(() => resolve(program, 'bad')).toThrow(/count must be/)
+  })
+  test('throws error on non-numeric count', () => {
+    const program = parse('[bad] = repeat(\'[C]\', abc)')
+    expect(() => resolve(program, 'bad')).toThrow()
+  })
+  test('throws error on missing arguments', () => {
+    const program = parse('[bad] = repeat(\'[C]\')')
+    expect(() => resolve(program, 'bad')).toThrow()
+  })
+  test('throws error on malformed repeat syntax', () => {
+    const program = parse('[bad] = repeat([C], 3)')
+    expect(() => resolve(program, 'bad')).toThrow()
+  })
+  test('simple polymer-like chain', () => {
+    const source = '[ch2] = [C]\n[polymer_chain] = repeat(\'[ch2]\', 5)'
+    const program = parse(source)
+    expect(resolve(program, 'polymer_chain')).toBe('[C][C][C][C][C]')
+  })
+  test('polymer chain with decode', () => {
+    const source = '[ch2] = [C]\n[polymer_chain] = repeat(\'[ch2]\', 5)'
+    const program = parse(source)
+    expect(resolve(program, 'polymer_chain', { decode: true })).toBe('CCCCC')
+  })
+  test('vinyl chloride monomer units', () => {
+    // Each monomer as a branch structure for proper chemistry
+    const source = '[monomer] = [C][Branch1][C][Cl][C]\n[polymer] = repeat(\'[monomer]\', 3)'
+    const program = parse(source)
+    // This creates a branched structure: C(Cl)CC(Cl)CC(Cl)C
+    expect(resolve(program, 'polymer')).toBe('[C][Branch1][C][Cl][C][C][Branch1][C][Cl][C][C][Branch1][C][Cl][C]')
+  })
+})