RubyGems - scanner - Versions diffs - 0.0.1 → 0.0.2 - Mend

scanner 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

data/lib/scanner/scanner.rb +84 -7
data/lib/scanner/version.rb +1 -1
data/spec/scanner/scanner_spec.rb +86 -4
metadata +4 -4

data/lib/scanner/scanner.rb CHANGED Viewed

@@ -6,16 +6,31 @@ module Scanner
     aModule.instance_eval do
       @language_tokens = {}
       @ignore = nil
-      def token(token_symbol, regular_expression)
-        @language_tokens[token_symbol] = regular_expression
+      @keywords = nil
+      @check_for_token_separator = {}
+      @separator = nil
+      def token(token_symbol, regular_expression, options = {})
+        modified_reg_exp = "\\A#{regular_expression}"
+        @language_tokens[token_symbol] = /#{modified_reg_exp}/
+        @check_for_token_separator[token_symbol] = options[:check_for_token_separator] == true
       end
       def ignore(regular_expression)
-        @ignore = regular_expression
+        modified_reg_exp = "\\A#{regular_expression}"
+        @ignore = /#{modified_reg_exp}/
+      end
+      def keywords(keywords)
+        @keywords = keywords
       end
-      token :eof, /\A\z/
+      def token_separator(regular_expression)
+        modified_reg_exp = "\\A#{regular_expression}"
+        @separator = /#{modified_reg_exp}/
+      end
+      token :eof, '\z'
     end
   end
@@ -29,11 +44,25 @@ module Scanner
     self.class.instance_eval { @ignore }
   end
+  def keywords
+    self.class.instance_eval { @keywords }
+  end
+  def check_for_token_separator
+    self.class.instance_eval { @check_for_token_separator }
+  end
+  def separator
+    self.class.instance_eval { @separator }
+  end
   public
   def parse(program)
     @program = program
     @token_list = []
+    @line_number = 1
+    @column_number = 1
   end
   def consume
@@ -55,24 +84,62 @@ module Scanner
     @token_list[-1]
   end
+  def token_is?(token_type)
+    look_ahead.is? token_type
+  end
+  def token_is_not?(token_type)
+    not (look_ahead.is? token_type)
+  end
+  def tokens_are?(*tokens)
+    look_ahead_index = 1
+    tokens.each do |token|
+      return false unless look_ahead(look_ahead_index).is? token
+      look_ahead_index += 1
+    end
+    return true
+  end
   private
   def consume_next_token
     clear_ignore_text
+    currently_at_column = @column_number
     language_tokens.each do |symbol, reg_exp|
       if @program =~ reg_exp
-        return Token.new(symbol, consume_regular_expression(reg_exp), 0, 0)
+        content, token_type = get_token_from_reg_exp(reg_exp, symbol)
+        if check_for_token_separator[symbol]
+          check_for_separator
+        end
+        return Token.new(token_type, content, @line_number, currently_at_column)
       end
     end
     throw :scanner_exception
   end
+  def check_for_separator
+    eof = language_tokens[:eof]
+    throw :scanner_exception unless @program =~ separator || @program =~ eof
+  end
+  def get_token_from_reg_exp(reg_exp, symbol)
+    content = consume_regular_expression(reg_exp)
+    if keywords.include? content
+      token_type = content.to_sym
+    else
+      token_type = symbol
+    end
+    return content, token_type
+  end
   def consume_regular_expression(regexp)
     content = @program[regexp]
     @program.gsub!(regexp,"")
+    calculate_position_after content
     content
   end
@@ -80,5 +147,15 @@ module Scanner
     consume_regular_expression(ignore) if ignore
   end
+  def calculate_position_after(content)
+    if content
+      number_of_new_lines = content.scan(/\n/).size
+      if number_of_new_lines > 0
+        @line_number += number_of_new_lines
+        @column_number = content.gsub(/.*\n/,"").length + 1
+      else
+        @column_number += content.length
+      end
+    end
+  end
 end

data/lib/scanner/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Scanner
-  VERSION = "0.0.1"
+  VERSION = "0.0.2"
 end

data/spec/scanner/scanner_spec.rb CHANGED Viewed

@@ -4,9 +4,11 @@ describe Scanner do
   before(:each) do
     class TestScanner
       include Scanner
-      ignore /\s+/
-      token :number, /\d+/
-      token :id, /\w+/
+      ignore '[\s|\n]+'
+      token :number, '\d+', check_for_token_separator: true
+      token :id, '[a-z]+', check_for_token_separator: true
+      keywords %w{if}
+      token_separator '\s'
     end
     @scanner = TestScanner.new
@@ -27,7 +29,7 @@ describe Scanner do
     end
     it "consume clears ignore tokens before token" do
-      @scanner.parse("   123")
+      @scanner.parse("  \n 123")
       token = @scanner.consume
       token.is?(:number).should be true
     end
@@ -51,5 +53,85 @@ describe Scanner do
     end
   end
+  describe "line number" do
+    it "starts with one" do
+      @scanner.parse("123")
+      @scanner.consume.line.should eq 1
+    end
+    it "increases after newlines" do
+      @scanner.parse("\n\n123")
+      @scanner.consume.line.should eq 3
+    end
+  end
+  describe "column number" do
+    it "starts with one" do
+      @scanner.parse("123")
+      @scanner.consume.column.should eq 1
+    end
+    it "increases after tokens are consumed" do
+      @scanner.parse("123 abc")
+      @scanner.consume # 123
+      @scanner.consume.column.should eq 5
+    end
+    it "resets after new lines" do
+      @scanner.parse("123\n abc")
+      @scanner.consume # 123
+      @scanner.consume.column.should eq 2
+    end
+  end
+  describe "keywords" do
+    it "a keyword is identified as such" do
+      @scanner.parse("if")
+      @scanner.consume.is?(:if).should be true
+    end
+  end
+  describe "token is?" do
+    it "correctly identifies token to come" do
+      @scanner.parse("if")
+      @scanner.token_is?(:if).should be true
+    end
+  end
+  describe "token is not?" do
+    it "correctly identifies tokens that aren't" do
+      @scanner.parse("if")
+      @scanner.token_is_not?(:id).should be true
+    end
+    it "return false if the token is the one in the parameter" do
+      @scanner.parse("if")
+      @scanner.token_is_not?(:if).should be false
+    end
+  end
+  describe "tokens are?" do
+    it "correctly identifies valid sequences" do
+      @scanner.parse("if other 123")
+      @scanner.tokens_are?(:if, :id, :number).should be true
+    end
+    it "correctly identifies invalid sequences" do
+      @scanner.parse("if other 123")
+      @scanner.tokens_are?(:if, :id, :id).should be false
+    end
+  end
+  describe "token separator" do
+    it "fails to recognise token without separator" do
+      @scanner.parse("other123")
+      lambda { @scanner.consume }.should raise_error
+    end
+    it "works if token after is eof" do
+      @scanner.parse("123")
+      @scanner.consume.is?(:number).should be true
+    end
+  end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: scanner
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.0.2
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-07-31 00:00:00.000000000 Z
+date: 2012-08-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
@@ -77,7 +77,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: -2890427904096243855
+      hash: 1008594902208819548
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
@@ -86,7 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: -2890427904096243855
+      hash: 1008594902208819548
 requirements: []
 rubyforge_project:
 rubygems_version: 1.8.24