RubyGems - src_lexer - Versions diffs - 0.0.1 → 0.0.2 - Mend

src_lexer 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,15 +1,15 @@
 ---
 !binary "U0hBMQ==":
   metadata.gz: !binary |-
-    YWNmNzQ2N2JjN2UyNTVhM2UwZTE3ZmVkMWVlZjE4OTJiZGRkNGUwNg==
+    N2I2ZmY3ZjlhNjU3MTBiYzE1ZmMzMTYwMjk0MzEwNTU3MmE3NWVlYw==
   data.tar.gz: !binary |-
-    ZWFmYTU0NmVjYTkxZWI5NjNkNTQ3Zjc4ZjNlMTkzNjQwMTFkOWI2Mg==
+    ZmQ2YjRkOTVkODhmODEyZDA4OWY2Y2JhN2M5Yzc3YjkxNzJjYmMzNQ==
 SHA512:
   metadata.gz: !binary |-
-    MGY0M2JlNWZkNjY3ZTEzNjk5M2E4OTZlMjBhYmU4Mzg5OGI3ZGZjNWQyNTZm
-    NzliYjU2OGFmNDk2OWJkZDY5ZGJlMmYzMTViMjhkYmMwMDk2NmUyYzBhYjdl
-    ZDFmNmExOGZkNTA2YTY2NDVjOGI3YzVkZjlhYzI4ZDU1YzViMzI=
+    MTc4NmI5ZTM4MmZkNThlM2FiY2VkMDRiNTRjOWIzMzEzZjZlNzI2NGQ2NzU5
+    MTM5ZGQ3MTY5NmY5MzFiYjQ3NzNkNzhiMDY3NmM0MmY4ZDVlNDlmM2RiZmQy
+    NDdhYzk2YjI5MGU2MGQ3Yzc1YjQyYzVhYzE4MTJiMzAyYTI0Mzc=
   data.tar.gz: !binary |-
-    ZmE5YWQ3YTViY2FmZWE0YTRlMWQxODhlZThkMDA0ZWExMDNiODQwNGI0ZTQ1
-    ZDRkYzAyZTczZDhiNGJiZjVjYWNhMDI5M2Y5NDBlNmI3ZjBkZTUxODlkMzM4
-    MDNiNGI3ZjBiZjFlM2ZmMmY4MWYxZTVjM2ZhMjEzNDIwNGM1ZGM=
+    NTcyOWE4NjlmODE2ODMxMWEwMzlmZTJmNmZkNWVhYTcwNDEyODIzZjkwYTE2
+    MDYyNjk5MzkxZGVkZTEzZDkzMmRhOTNlZjE2MjA3Y2UzNTkzNTc1N2JiZmRm
+    NTgxNjQ1MGNmOWQxZmE4ODRkYjAxODVlNTk3ZTQ4ZjNjNTJjODg=

data/lib/src_lexer/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module SrcLexer
-  VERSION = "0.0.1"
+  VERSION = "0.0.2"
 end

data/lib/src_lexer.rb CHANGED Viewed

@@ -18,13 +18,16 @@ module SrcLexer
   class Lexer
     END_TOKEN = [false, nil]
-    attr_reader :keywords, :symbols, :line_comment_marker, :comment_markers, :tokens, :str
-    def initialize(keywords, symbols, line_comment_marker, comment_marker)
-      @keywords = ((keywords.nil?) ? [] : keywords.uniq.compact)
-      @symbols = ((symbols.nil?) ? [] : symbols.uniq.compact)
-      @line_comment_marker = ((line_comment_marker.nil?) ? '' : line_comment_marker)
-      @comment_markers = ((comment_marker.nil?) ? ['', ''] : comment_marker)
+    NUMBER_REGEX = /^[\d]+[\.]?[\d]*\z/
+    STRING_REGEX = /^\"(.*)\"\z/m
+    attr_reader :keywords, :symbols, :string_literal_marker, :line_comment_marker, :comment_markers, :tokens, :str
+    def initialize(keywords, symbols, string_literal_marker, line_comment_marker, comment_markers)
+      @keywords = (keywords ? keywords.uniq.compact : [])
+      @symbols = (symbols ? symbols.uniq.compact : [])
+      @string_literal_marker = string_literal_marker
+      @line_comment_marker = line_comment_marker
+      @comment_markers = comment_markers
     end
     def analyze(str)
@@ -34,39 +37,51 @@ module SrcLexer
     def pop_token
       token = @tokens.shift
-      if token.nil? then
-        return END_TOKEN
-      end
+      return END_TOKEN if token.nil?
       case token[0]
-      when /^[\d]+[\.]?[\d]*\z/
+      when NUMBER_REGEX
         [:NUMBER, Token.new(token[0], token[1], token[2])]
-      when /^\"(.*)\"\z/m
+      when STRING_REGEX
         [:STRING, Token.new(token[0], token[1], token[2])]
       else
-        id = is_reserved?(token[0]) ? token[0] : :IDENT
-        [id, Token.new(token[0], token[1], token[2])]
+        [is_reserved?(token[0]) ? token[0] : :IDENT, Token.new(token[0], token[1], token[2])]
       end
     end
     private
-    class StringIterator
-      attr_reader :index
+    class PosInfo
+      attr_accessor :index, :line_no, :char_no
+      def initialize
+        @index = 0
+        @line_no = 1
+        @char_no = 1
+      end
+    end
+    class StringIterator
       def initialize(str)
         @str = str
-        @index = 0
-        @marked_pos = -1
+        @current_pos = PosInfo.new
+        @marked_pos = PosInfo.new
+        mark_clear()
+      end
+      def mark_clear
+        @marked_pos.index = -1
+        @marked_pos.line_no = 0
+        @marked_pos.char_no = 0
       end
       def mark_set
-        @marked_pos = @index
+        @marked_pos = @current_pos.clone
       end
       def is(target_string)
         return false if target_string.length.zero?
-        end_pos = (@index + target_string.length - 1)
-        @str[@index..end_pos] == target_string
+        end_pos = (@current_pos.index + target_string.length - 1)
+        @str[@current_pos.index..end_pos] == target_string
       end
       def is_in(target_list)
@@ -74,57 +89,51 @@ module SrcLexer
       end
       def move_next
-        @index += 1
+        if /\n/.match @str[@current_pos.index]
+          @current_pos.line_no += 1
+          @current_pos.char_no = 1
+        else
+          @current_pos.char_no += 1
+        end
+        @current_pos.index += 1
       end
       def move_to_the_end_of_the_line
-        @index += (@str[@index..-1] =~ /$/) - 1
+        char_count_to_the_end_of_the_line = (@str[@current_pos.index..-1] =~ /$/) - 1
+        @current_pos.index += char_count_to_the_end_of_the_line
+        @current_pos.char_no += char_count_to_the_end_of_the_line
       end
       def move_to(target)
-        esceped_target = Regexp.escape(target)
-        @index += (@str[@index..-1] =~ /#{esceped_target}/m) + target.length - 1
-      end
-      def [](range)
-        @str[range]
-      end
-      def <(pos)
-        @index < pos
+        char_count_to_target = (@str[@current_pos.index..-1] =~ /#{Regexp.escape(target)}/m) + target.length - 1
+        chopped_string = @str[@current_pos.index..@current_pos.index + char_count_to_target]
+        @current_pos.index += char_count_to_target
+        match = /.*\n(.*)$/m.match(chopped_string)
+        p match[1].length if match
+        if match
+          @current_pos.char_no = match[1].length
+        else
+          @current_pos.char_no += char_count_to_target
+        end
+        @current_pos.line_no += chopped_string.each_char.select{|char| /\n/.match char}.length
       end
-      def char
-        @str[@index]
+      def <(index)
+        @current_pos.index < index
       end
       def is_white_space
-        /[\s]/.match(char)
-      end
-      def info(pos)
-        [0, 0] if pos == 0
-        line_no, char_no = 1, 0
-        @str[0..pos].each_char do |char|
-          if /\n/.match(char)
-            line_no += 1
-            char_no = 0
-          else
-            char_no += 1
-          end
-        end
-        [line_no, char_no]
+        /\s/.match(@str[@current_pos.index])
       end
       def marked?
-        @marked_pos != -1
+        @marked_pos.index != -1
       end
       def shift
-        result = @str[@marked_pos..(@index - 1)]
-        line_no_and_char_no = info(@marked_pos)
-        @marked_pos = -1
-        return result, *line_no_and_char_no
+        result = [@str[@marked_pos.index..(@current_pos.index - 1)], @marked_pos.line_no, @marked_pos.char_no]
+        mark_clear()
+        return result
       end
     end
@@ -135,31 +144,33 @@ module SrcLexer
       while iterator < @str.length do
         if iterator.is_white_space then
           @tokens.push iterator.shift if iterator.marked?
-        elsif iterator.is(@line_comment_marker) then
+          iterator.move_next
+        elsif @line_comment_marker && iterator.is(@line_comment_marker) then
           @tokens.push iterator.shift if iterator.marked?
           iterator.move_to_the_end_of_the_line
-        elsif iterator.is(@comment_markers[0]) then
+          iterator.move_next
+        elsif @comment_markers && iterator.is(@comment_markers[0]) then
           @tokens.push iterator.shift if iterator.marked?
           iterator.move_to(@comment_markers[1])
-        elsif iterator.is('"') then
+          iterator.move_next
+        elsif @string_literal_marker && iterator.is(@string_literal_marker[0]) then
           @tokens.push iterator.shift if iterator.marked?
           iterator.mark_set
           iterator.move_next
-          iterator.move_to('"')
+          iterator.move_to(@string_literal_marker[1])
           iterator.move_next
           @tokens.push iterator.shift
-          next
         elsif iterator.is_in(@symbols) then
           @tokens.push iterator.shift if iterator.marked?
-          symbol = @symbols.find { |symbol| iterator.is(symbol) }
-          @tokens.push [iterator[iterator.index..(iterator.index + symbol.length - 1)], *iterator.info(iterator.index)]
-          (symbol.length - 1).times { iterator.move_next }
+          iterator.mark_set
+          @symbols.find { |symbol| iterator.is(symbol) }.length.times { iterator.move_next }
+          @tokens.push iterator.shift
         elsif !iterator.marked? then
           iterator.mark_set
+        else
+          iterator.move_next
         end
-        iterator.move_next
       end
       @tokens.push iterator.shift if iterator.marked?
     end
@@ -167,4 +178,47 @@ module SrcLexer
       @keywords.include?(token) || @symbols.include?(token)
     end
   end
+  class CSharpLexer < Lexer
+    def initialize
+      super(
+        [ # C# keywords
+          'abstract',   'as',       'base',       'bool',      'break',
+          'byte',       'case',     'catch',      'char',      'checked',
+          'class',      'const',    'continue',   'decimal',   'default',
+          'delegate',   'do',       'double',     'else',      'enum',
+          'event',      'explicit', 'extern',     'false',     'finally',
+          'fixed',      'float',    'for',        'foreach',   'goto',
+          'if',         'implicit', 'in',         'int',       'interface',
+          'internal',   'is',       'lock',       'long',      'namespace',
+          'new',        'null',     'object',     'operator',  'out',
+          'override',   'params',   'private',    'protected', 'public',
+          'readonly',   'ref',      'return',     'sbyte',     'sealed',
+          'short',      'sizeof',   'stackalloc', 'static',    'string',
+          'struct',     'switch',   'this',       'throw',     'true',
+          'try',        'typeof',   'uint',       'ulong',     'unchecked',
+          'unsafe',     'ushort',   'using',      'virtual',   'void',
+          'volatile',   'while',
+          # C# context keywords
+          'add',        'alias',    'ascending',  'async',     'await',
+          'descending', 'dynamic',  'from',       'get',       'global',
+          'group',      'into',     'join',       'let',       'orderby',
+          'partial',    'remove',   'select',     'set',       'value',
+          'var',        'where',    'yield'
+        ],
+        [
+          '<<=', '>>=', '<<',  '>>',  '<=',
+          '>=',  '==',  '!=',  '&&',  '||',
+          '??',  '+=',  '-=',  '*=',  '/=',
+          '%=',  '&=',  '|=',  '^=',  '=>',
+          '*',   '/',   '%',   '+',   '-',
+          '<',   '>',   '&',   '^',   '|',
+          '?',   ':',   '=',   '{',   '}',
+          '(',   ')',   '[',   ']',   ';'
+        ],
+        ['"', '"'], # comment markers
+        '//', # line comment marker
+        ['/*', '*/']) # multi line comment markers
+    end
+  end
 end

data/spec/src_lexer_spec.rb CHANGED Viewed

@@ -9,7 +9,7 @@ end
 describe SrcLexer::Lexer, 'with empty string' do
   it 'should return Lexer::END_TOKEN' do
-    sut = SrcLexer::Lexer.new(nil, nil, nil, nil)
+    sut = SrcLexer::Lexer.new(nil, nil, nil, nil, nil)
     sut.analyze('')
     sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
   end
@@ -17,7 +17,7 @@ end
 describe SrcLexer::Lexer, 'with keyword definitions' do
   it 'should recognize keywords' do
-    sut = SrcLexer::Lexer.new(['struct', 'enum'], nil, nil, nil)
+    sut = SrcLexer::Lexer.new(['struct', 'enum'], nil, nil, nil, nil)
     sut.analyze('struct structenum enum')
     sut.pop_token.should == ['struct', SrcLexer::Token.new('struct', 1, 1)]
     sut.pop_token.should == [:IDENT, SrcLexer::Token.new('structenum', 1, 8)]
@@ -25,18 +25,18 @@ describe SrcLexer::Lexer, 'with keyword definitions' do
     sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
   end
   it 'should reduce keyword duplication' do
-    sut = SrcLexer::Lexer.new(['struct', 'struct'], nil, nil, nil)
+    sut = SrcLexer::Lexer.new(['struct', 'struct'], nil, nil, nil, nil)
     sut.keywords.should == ['struct']
   end
   it 'should ignore nil keyword' do
-    sut = SrcLexer::Lexer.new(['struct', nil, 'enum'], nil, nil, nil)
+    sut = SrcLexer::Lexer.new(['struct', nil, 'enum'], nil, nil, nil, nil)
     sut.keywords.should == ['struct', 'enum']
   end
 end
 describe SrcLexer::Lexer, 'with symbol definitions' do
   it 'should recognize symbols' do
-    sut = SrcLexer::Lexer.new(nil, ['..', ','], nil, nil)
+    sut = SrcLexer::Lexer.new(nil, ['..', ','], nil, nil, nil)
     sut.analyze('.. A ,')
     sut.pop_token.should == ['..', SrcLexer::Token.new('..', 1, 1)]
     sut.pop_token.should == [:IDENT, SrcLexer::Token.new('A', 1, 4)]
@@ -44,7 +44,7 @@ describe SrcLexer::Lexer, 'with symbol definitions' do
     sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
   end
   it 'should recognize symbols(,) if continues like "A,B"' do
-    sut = SrcLexer::Lexer.new(nil, [','], nil, nil)
+    sut = SrcLexer::Lexer.new(nil, [','], nil, nil, nil)
     sut.analyze('A,B')
     sut.pop_token.should == [:IDENT, SrcLexer::Token.new('A', 1, 1)]
     sut.pop_token.should == [',', SrcLexer::Token.new(',', 1, 2)]
@@ -52,18 +52,18 @@ describe SrcLexer::Lexer, 'with symbol definitions' do
     sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
   end
   it 'should reduce symbol duplication' do
-    sut = SrcLexer::Lexer.new(nil, [',', ','], nil, nil)
+    sut = SrcLexer::Lexer.new(nil, [',', ','], nil, nil, nil)
     sut.symbols.should == [',']
   end
   it 'should ignore nil keyword' do
-    sut = SrcLexer::Lexer.new(nil, ['{', nil, '}'], nil, nil)
+    sut = SrcLexer::Lexer.new(nil, ['{', nil, '}'], nil, nil, nil)
     sut.symbols.should == ['{', '}']
   end
 end
 describe SrcLexer::Lexer, 'with line comment marker' do
   it 'should recognize line comment' do
-    sut = SrcLexer::Lexer.new(nil, nil, '//', nil)
+    sut = SrcLexer::Lexer.new(nil, nil, nil, '//', nil)
     sut.analyze(<<-'EOS')
       A//comment
       B
@@ -73,7 +73,7 @@ describe SrcLexer::Lexer, 'with line comment marker' do
     sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
   end
   it 'should recognize multi line comment' do
-    sut = SrcLexer::Lexer.new(nil, nil, '//', ['/*', '*/'])
+    sut = SrcLexer::Lexer.new(nil, nil, nil, '//', ['/*', '*/'])
     sut.analyze(<<-'EOS')
       A/*comment
       B//still in comment*/C
@@ -86,14 +86,14 @@ end
 describe SrcLexer::Lexer do
   it 'should analyze number string' do
-    sut = SrcLexer::Lexer.new(nil, nil, nil, nil)
+    sut = SrcLexer::Lexer.new(nil, nil, nil, nil, nil)
     sut.analyze('9 1.5')
     sut.pop_token.should == [:NUMBER, SrcLexer::Token.new("9", 1, 1,)]
     sut.pop_token.should == [:NUMBER, SrcLexer::Token.new("1.5", 1, 3)]
     sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
   end
   it 'should analyze string literal' do
-    sut = SrcLexer::Lexer.new(nil, nil, '//', ['/*', '*/'])
+    sut = SrcLexer::Lexer.new(nil, nil, ['"', '"'], '//', ['/*', '*/'])
     sut.analyze('A"//"B"/**/"C')
     sut.pop_token.should == [:IDENT, SrcLexer::Token.new('A', 1, 1)]
     sut.pop_token.should == [:STRING, SrcLexer::Token.new('"//"', 1, 2)]

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: src_lexer
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.0.2
 platform: ruby
 authors:
 - kkikzk
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-04-27 00:00:00.000000000 Z
+date: 2014-04-29 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler