RubyGems - rley - Versions diffs - 0.8.09 → 0.8.10 - Mend

rley 0.8.09 → 0.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 9734bc9875c8931a5cd5c5497df1a77a3c938076c86d2748557b8ec901d09de6
-  data.tar.gz: 20a2a6a9b88752645cf731f6790b7f89729fb108f13701048a6cc8a2c65521f6
+  metadata.gz: 92dd793350853b0466c7d541d8e19bd5d03b661f6bc207836155968b8580584b
+  data.tar.gz: c1583a4668d945c55ab7b687748eec224adb16721a4fd108813abb61d4f3356a
 SHA512:
-  metadata.gz: e203c3d6cf1b4f8b32a16af06ffe2a4548f8a53c37a2677fd13e8b3695a1e482f7d46177b3861d0b5c5b6139827133d09d1213821a7ec91b8dd805fcf3eac4dd
-  data.tar.gz: 43be3973376040fed3a9db55207d68171a0a6084ad9a102abcd26f47d60600c8aa9165a134c67b0f443e73e4e0cdd2d5a65e5271f63081a21f83aea0550b4474
+  metadata.gz: 948a5292ff798277c50e9a2b1829e9c0afb47886d7b1e4887409a98bcb08e490d493d3c672563b965f074756ce553f3a00c4729270a983f25b9b4a0389ab3505
+  data.tar.gz: d372ccc4cac0643c535759db3f63e2f5fa604dbc80fd72a85a9fb3f867b26a0a31d4a7e70925da0f2845f6e89d764efc886f5895ef1592798a13c76923839466

data/lib/rley/constants.rb CHANGED Viewed

@@ -5,7 +5,7 @@
 module Rley # Module used as a namespace
   # The version number of the gem.
-  Version = '0.8.09'
+  Version = '0.8.10'
   # Brief description of the gem.
   Description = "Ruby implementation of the Earley's parsing algorithm"

data/lib/rley/rgn/tokenizer.rb CHANGED Viewed

@@ -14,6 +14,13 @@ module Rley
     # Delimiters: e.g. parentheses '(',  ')'
     # Separators: e.g. comma
     class Tokenizer
+      PATT_KEY = /[a-zA-Z_][a-zA-Z_0-9]*:/.freeze
+      PATT_INTEGER = /\d+/.freeze
+      PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
+      PATT_STRING_START = /"|'/.freeze
+      PATT_SYMBOL = /[^?*+,:(){}\s]+/.freeze
+      PATT_WHITESPACE = /[ \t\f]+/.freeze
       # @return [StringScanner] Low-level input scanner
       attr_reader(:scanner)
@@ -24,7 +31,7 @@ module Rley
       attr_reader(:line_start)
       # One or two special character tokens.
-      @@lexeme2name = {
+      Lexeme2name = {
         '(' => 'LEFT_PAREN',
         ')' => 'RIGHT_PAREN',
         '{' => 'LEFT_BRACE',
@@ -44,16 +51,16 @@ module Rley
       # Constructor. Initialize a tokenizer for RGN input.
       # @param source [String] RGN text to tokenize.
       def initialize(source = nil)
-        @scanner = StringScanner.new('')
-        start_with(source) if source
+        reset
+        input = source || ''
+        @scanner = StringScanner.new(input)
       end
       # Reset the tokenizer and make the given text, the current input.
       # @param source [String] RGN text to tokenize.
       def start_with(source)
+        reset
         @scanner.string = source
-        @lineno = 1
-        @line_start = 0
       end
       # Scan the source and return an array of tokens.
@@ -65,47 +72,67 @@ module Rley
           tok_sequence << token unless token.nil?
         end
-        return tok_sequence
+        tok_sequence
       end
       private
-      def _next_token
-        pos_before = scanner.pos
-        skip_intertoken_spaces
-        ws_found = true if scanner.pos > pos_before
-        curr_ch = scanner.peek(1)
-        return nil if curr_ch.nil? || curr_ch.empty?
+      def reset
+        @lineno = 1
+        @line_start = 0
+      end
+      def _next_token
         token = nil
+        ws_found = false
-        if '(){},'.include? curr_ch
-          # Single delimiter, separator or character
-          token = build_token(@@lexeme2name[curr_ch], scanner.getch)
-        elsif '?*+,'.include? curr_ch # modifier character
-          # modifiers without prefix text are symbols
-          symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
-          token = build_token(symb, scanner.getch)
-        elsif (lexeme = scanner.scan(/\.\./))
-          # One or two special character tokens
-          token = build_token(@@lexeme2name[lexeme], lexeme)
-        elsif scanner.check(/"|'/) # Start of string detected...
-          token = build_string_token
-        elsif (lexeme = scanner.scan(/\d+/))
-          token = build_token('INT_LIT', lexeme)
-        elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
-          keyw = @@keywords[lexeme.chop!]
-          token = build_token('KEY', lexeme) if keyw
-          # ... error case
-        elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
-           token = build_token('SYMBOL', lexeme)
-        else # Unknown token
-          col = scanner.pos - @line_start + 1
-          _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
-          raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
-        end
+        # Loop until end of input reached or token found
+        until token || scanner.eos?
-        return token
+          nl_found = scanner.skip(PATT_NEWLINE)
+          if nl_found
+            next_line_scanned
+            next
+          end
+          if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
+            ws_found = true
+            next
+          end
+          curr_ch = scanner.peek(1)
+          if '(){},'.include? curr_ch
+            # Single delimiter, separator or character
+            token = build_token(Lexeme2name[curr_ch], scanner.getch)
+          elsif '?*+,'.include? curr_ch # modifier character
+            # modifiers without prefix text are symbols
+            symb = (ws_found || nl_found) ? 'SYMBOL' : Lexeme2name[curr_ch]
+            token = build_token(symb, scanner.getch)
+          elsif (lexeme = scanner.scan(/\.\./))
+            # One or two special character tokens
+            token = build_token(Lexeme2name[lexeme], lexeme)
+          elsif scanner.check(PATT_STRING_START) # Start of string detected...
+            token = build_string_token
+          elsif (lexeme = scanner.scan(PATT_INTEGER))
+            token = build_token('INT_LIT', lexeme)
+          elsif (lexeme = scanner.scan(PATT_KEY))
+            keyw = @@keywords[lexeme.chop!]
+            token = build_token('KEY', lexeme) if keyw
+            # ... error case
+          elsif (lexeme = scanner.scan(PATT_SYMBOL))
+             token = build_token('SYMBOL', lexeme)
+          else # Unknown token
+            col = scanner.pos - @line_start + 1
+            _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
+            raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
+          end
+          ws_found = false
+        end # until
+        # unterminated(@string_start.line, @string_start.column) if state == :multiline
+        token
+        # return token
       end
       def build_token(aSymbolName, aLexeme)
@@ -154,24 +181,8 @@ module Rley
         Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
       end
-      # Skip non-significant whitespaces and comments.
-      # Advance the scanner until something significant is found.
-      def skip_intertoken_spaces
-        loop do
-          ws_found = scanner.skip(/[ \t\f]+/) ? true : false
-          nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
-          if nl_found
-            ws_found = true
-            next_line
-          end
-          break unless ws_found
-        end
-        scanner.pos
-      end
-      def next_line
+      # Event: next line detected.
+      def next_line_scanned
         @lineno += 1
         @line_start = scanner.pos
       end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rley
 version: !ruby/object:Gem::Version
-  version: 0.8.09
+  version: 0.8.10
 platform: ruby
 authors:
 - Dimitri Geshef
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-01-28 00:00:00.000000000 Z
+date: 2022-04-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake