RubyGems - rley - Versions diffs - 0.8.09 → 0.8.10 - Mend

rley 0.8.09 → 0.8.10

Files changed (4) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 9734bc9875c8931a5cd5c5497df1a77a3c938076c86d2748557b8ec901d09de6
-  data.tar.gz: 20a2a6a9b88752645cf731f6790b7f89729fb108f13701048a6cc8a2c65521f6
+  metadata.gz: 92dd793350853b0466c7d541d8e19bd5d03b661f6bc207836155968b8580584b
+  data.tar.gz: c1583a4668d945c55ab7b687748eec224adb16721a4fd108813abb61d4f3356a
 SHA512:
-  metadata.gz: e203c3d6cf1b4f8b32a16af06ffe2a4548f8a53c37a2677fd13e8b3695a1e482f7d46177b3861d0b5c5b6139827133d09d1213821a7ec91b8dd805fcf3eac4dd
-  data.tar.gz: 43be3973376040fed3a9db55207d68171a0a6084ad9a102abcd26f47d60600c8aa9165a134c67b0f443e73e4e0cdd2d5a65e5271f63081a21f83aea0550b4474
+  metadata.gz: 948a5292ff798277c50e9a2b1829e9c0afb47886d7b1e4887409a98bcb08e490d493d3c672563b965f074756ce553f3a00c4729270a983f25b9b4a0389ab3505
+  data.tar.gz: d372ccc4cac0643c535759db3f63e2f5fa604dbc80fd72a85a9fb3f867b26a0a31d4a7e70925da0f2845f6e89d764efc886f5895ef1592798a13c76923839466

data/lib/rley/constants.rb CHANGED Viewed

@@ -5,7 +5,7 @@
 module Rley # Module used as a namespace
   # The version number of the gem.
-  Version = '0.8.09'
+  Version = '0.8.10'
   # Brief description of the gem.
   Description = "Ruby implementation of the Earley's parsing algorithm"

data/lib/rley/rgn/tokenizer.rb CHANGED Viewed

@@ -14,6 +14,13 @@ module Rley
     # Delimiters: e.g. parentheses '(',  ')'
     # Separators: e.g. comma
     class Tokenizer
+      PATT_KEY = /[a-zA-Z_][a-zA-Z_0-9]*:/.freeze
+      PATT_INTEGER = /\d+/.freeze
+      PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
+      PATT_STRING_START = /"|'/.freeze
+      PATT_SYMBOL = /[^?*+,:(){}\s]+/.freeze
+      PATT_WHITESPACE = /[ \t\f]+/.freeze
       # @return [StringScanner] Low-level input scanner
       attr_reader(:scanner)
@@ -24,7 +31,7 @@ module Rley
       attr_reader(:line_start)
       # One or two special character tokens.
-      @@lexeme2name = {
+      Lexeme2name = {
         '(' => 'LEFT_PAREN',
         ')' => 'RIGHT_PAREN',
         '{' => 'LEFT_BRACE',
@@ -44,16 +51,16 @@ module Rley
       # Constructor. Initialize a tokenizer for RGN input.
       # @param source [String] RGN text to tokenize.
       def initialize(source = nil)
-        @scanner = StringScanner.new('')
-        start_with(source) if source
+        reset
+        input = source || ''
+        @scanner = StringScanner.new(input)
       end
       # Reset the tokenizer and make the given text, the current input.
       # @param source [String] RGN text to tokenize.
       def start_with(source)
+        reset
         @scanner.string = source
-        @lineno = 1
-        @line_start = 0
       end
       # Scan the source and return an array of tokens.
@@ -65,47 +72,67 @@ module Rley
           tok_sequence << token unless token.nil?
         end
-        return tok_sequence
+        tok_sequence
       end
       private
-      def _next_token
-        pos_before = scanner.pos
-        skip_intertoken_spaces
-        ws_found = true if scanner.pos > pos_before
-        curr_ch = scanner.peek(1)
-        return nil if curr_ch.nil? || curr_ch.empty?
+      def reset
+        @lineno = 1
+        @line_start = 0
+      end
+      def _next_token
         token = nil
+        ws_found = false
-        if '(){},'.include? curr_ch
-          # Single delimiter, separator or character
-          token = build_token(@@lexeme2name[curr_ch], scanner.getch)
-        elsif '?*+,'.include? curr_ch # modifier character
-          # modifiers without prefix text are symbols
-          symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
-          token = build_token(symb, scanner.getch)
-        elsif (lexeme = scanner.scan(/\.\./))
-          # One or two special character tokens
-          token = build_token(@@lexeme2name[lexeme], lexeme)
-        elsif scanner.check(/"|'/) # Start of string detected...
-          token = build_string_token
-        elsif (lexeme = scanner.scan(/\d+/))
-          token = build_token('INT_LIT', lexeme)
-        elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
-          keyw = @@keywords[lexeme.chop!]
-          token = build_token('KEY', lexeme) if keyw
-          # ... error case
-        elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
-           token = build_token('SYMBOL', lexeme)
-        else # Unknown token
-          col = scanner.pos - @line_start + 1
-          _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
-          raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
-        end
+        # Loop until end of input reached or token found
+        until token || scanner.eos?
-        return token
+          nl_found = scanner.skip(PATT_NEWLINE)
+          if nl_found
+            next_line_scanned
+            next
+          end
+          if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
+            ws_found = true
+            next
+          end
+          curr_ch = scanner.peek(1)
+          if '(){},'.include? curr_ch
+            # Single delimiter, separator or character
+            token = build_token(Lexeme2name[curr_ch], scanner.getch)
+          elsif '?*+,'.include? curr_ch # modifier character
+            # modifiers without prefix text are symbols
+            symb = (ws_found || nl_found) ? 'SYMBOL' : Lexeme2name[curr_ch]
+            token = build_token(symb, scanner.getch)
+          elsif (lexeme = scanner.scan(/\.\./))
+            # One or two special character tokens
+            token = build_token(Lexeme2name[lexeme], lexeme)
+          elsif scanner.check(PATT_STRING_START) # Start of string detected...
+            token = build_string_token
+          elsif (lexeme = scanner.scan(PATT_INTEGER))
+            token = build_token('INT_LIT', lexeme)
+          elsif (lexeme = scanner.scan(PATT_KEY))
+            keyw = @@keywords[lexeme.chop!]
+            token = build_token('KEY', lexeme) if keyw
+            # ... error case
+          elsif (lexeme = scanner.scan(PATT_SYMBOL))
+             token = build_token('SYMBOL', lexeme)
+          else # Unknown token
+            col = scanner.pos - @line_start + 1
+            _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
+            raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
+          end
+          ws_found = false
+        end # until
+        # unterminated(@string_start.line, @string_start.column) if state == :multiline
+        token
+        # return token
       end
       def build_token(aSymbolName, aLexeme)
@@ -154,24 +181,8 @@ module Rley
         Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
       end
-      # Skip non-significant whitespaces and comments.
-      # Advance the scanner until something significant is found.
-      def skip_intertoken_spaces
-        loop do
-          ws_found = scanner.skip(/[ \t\f]+/) ? true : false
-          nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
-          if nl_found
-            ws_found = true
-            next_line
-          end
-          break unless ws_found
-        end
-        scanner.pos
-      end
-      def next_line
+      # Event: next line detected.
+      def next_line_scanned
         @lineno += 1
         @line_start = scanner.pos
       end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rley
 version: !ruby/object:Gem::Version
-  version: 0.8.09
+  version: 0.8.10
 platform: ruby
 authors:
 - Dimitri Geshef
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-01-28 00:00:00.000000000 Z
+date: 2022-04-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake