RubyGems - gherkin - Versions diffs - 0.0.3-universal-java-1.5 - Mend

gherkin 0.0.3-universal-java-1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

data/.gitignore +8 -0
data/LICENSE +20 -0
data/README.rdoc +47 -0
data/Rakefile +48 -0
data/VERSION.yml +4 -0
data/bin/gherkin +10 -0
data/cucumber.yml +3 -0
data/ext/gherkin_lexer/.gitignore +6 -0
data/ext/gherkin_lexer/extconf.rb +6 -0
data/features/feature_parser.feature +206 -0
data/features/native_lexer.feature +19 -0
data/features/parser_with_native_lexer.feature +205 -0
data/features/pretty_printer.feature +11 -0
data/features/step_definitions/gherkin_steps.rb +34 -0
data/features/step_definitions/pretty_printer_steps.rb +51 -0
data/features/steps_parser.feature +46 -0
data/features/support/env.rb +33 -0
data/gherkin.gemspec +177 -0
data/java/.gitignore +2 -0
data/java/Gherkin.iml +24 -0
data/java/build.xml +13 -0
data/java/src/gherkin/FixJava.java +34 -0
data/java/src/gherkin/Lexer.java +5 -0
data/java/src/gherkin/LexingError.java +7 -0
data/java/src/gherkin/Listener.java +27 -0
data/java/src/gherkin/ParseError.java +22 -0
data/java/src/gherkin/Parser.java +185 -0
data/java/src/gherkin/lexer/.gitignore +1 -0
data/java/src/gherkin/parser/StateMachineReader.java +62 -0
data/lib/.gitignore +2 -0
data/lib/gherkin.rb +2 -0
data/lib/gherkin/c_lexer.rb +10 -0
data/lib/gherkin/i18n.yml +535 -0
data/lib/gherkin/i18n_lexer.rb +29 -0
data/lib/gherkin/java_lexer.rb +10 -0
data/lib/gherkin/lexer.rb +42 -0
data/lib/gherkin/parser.rb +19 -0
data/lib/gherkin/parser/meta.txt +4 -0
data/lib/gherkin/parser/root.txt +9 -0
data/lib/gherkin/parser/steps.txt +3 -0
data/lib/gherkin/rb_lexer.rb +9 -0
data/lib/gherkin/rb_lexer/.gitignore +1 -0
data/lib/gherkin/rb_lexer/README.rdoc +8 -0
data/lib/gherkin/rb_parser.rb +117 -0
data/lib/gherkin/tools/pretty_printer.rb +77 -0
data/ragel/i18n/.gitignore +1 -0
data/ragel/lexer.c.rl.erb +385 -0
data/ragel/lexer.java.rl.erb +198 -0
data/ragel/lexer.rb.rl.erb +172 -0
data/ragel/lexer_common.rl.erb +46 -0
data/spec/gherkin/c_lexer_spec.rb +21 -0
data/spec/gherkin/fixtures/complex.feature +43 -0
data/spec/gherkin/fixtures/i18n_fr.feature +13 -0
data/spec/gherkin/fixtures/i18n_no.feature +6 -0
data/spec/gherkin/fixtures/i18n_zh-CN.feature +8 -0
data/spec/gherkin/fixtures/simple.feature +3 -0
data/spec/gherkin/fixtures/simple_with_comments.feature +7 -0
data/spec/gherkin/fixtures/simple_with_tags.feature +11 -0
data/spec/gherkin/i18n_spec.rb +57 -0
data/spec/gherkin/java_lexer_spec.rb +20 -0
data/spec/gherkin/parser_spec.rb +28 -0
data/spec/gherkin/rb_lexer_spec.rb +18 -0
data/spec/gherkin/sexp_recorder.rb +29 -0
data/spec/gherkin/shared/lexer_spec.rb +420 -0
data/spec/gherkin/shared/py_string_spec.rb +112 -0
data/spec/gherkin/shared/table_spec.rb +97 -0
data/spec/gherkin/shared/tags_spec.rb +50 -0
data/spec/spec_helper.rb +53 -0
data/tasks/bench.rake +176 -0
data/tasks/bench/feature_builder.rb +49 -0
data/tasks/bench/generated/.gitignore +1 -0
data/tasks/bench/null_listener.rb +4 -0
data/tasks/cucumber.rake +20 -0
data/tasks/ext.rake +49 -0
data/tasks/ragel.rake +94 -0
data/tasks/rdoc.rake +12 -0
data/tasks/rspec.rake +15 -0
metadata +204 -0

data/lib/gherkin/i18n_lexer.rb ADDED

@@ -0,0 +1,29 @@
+require 'gherkin/lexer'
+module Gherkin
+  # The main entry point to lexing Gherkin source.
+  class I18nLexer
+    LANGUAGE_PATTERN = /language\s*:\s*(.*)/ #:nodoc:
+    def initialize(parser)
+      @parser = parser
+    end
+    def scan(source)
+      lang = lang(source) || 'en'
+      delegate = Lexer[lang].new(@parser)
+      delegate.scan(source)
+    end
+    private
+    def lang(source)
+      line_one = source.split(/\n/)[0]
+      if line_one =~ LANGUAGE_PATTERN
+        $1.strip
+      else
+        nil
+      end
+    end
+  end
+end

data/lib/gherkin/java_lexer.rb ADDED

@@ -0,0 +1,10 @@
+require 'gherkin.jar'
+module Gherkin
+  module JavaLexer
+    def self.[](i18n_language)
+      i18n_lexer_class_name = i18n_language.gsub(/[\s-]/, '').capitalize
+      Java::GherkinLexer.__send__(i18n_lexer_class_name)
+    end
+  end
+end

data/lib/gherkin/lexer.rb ADDED

@@ -0,0 +1,42 @@
+module Gherkin
+  module Lexer
+    I18nLexerNotFound = Class.new(LoadError)
+    LexingError = Class.new(StandardError)
+    class << self
+      def [](i18n_lang)
+        begin
+          if defined?(JRUBY_VERSION)
+            java[i18n_lang]
+          else
+            begin
+              c[i18n_lang]
+            rescue NameError => e
+              warn("WARNING: #{e.message}. Reverting to Ruby lexer")
+              rb[i18n_lang]
+            rescue LoadError
+              rb[i18n_lang]
+            end
+          end
+        rescue LoadError
+          raise I18nLexerNotFound, "No lexer was found for #{i18n_lang}. Supported languages are listed in gherkin/i18n.yml."
+        end
+      end
+      def c
+        require 'gherkin/c_lexer'
+        CLexer
+      end
+      def java
+        require 'gherkin/java_lexer'
+        JavaLexer
+      end
+      def rb
+        require 'gherkin/rb_lexer'
+        RbLexer
+      end
+    end
+  end
+end

data/lib/gherkin/parser.rb ADDED

@@ -0,0 +1,19 @@
+module Gherkin
+  class ParseError < StandardError
+    def initialize(state, new_state, expected_states, line)
+      super("Parse error on line #{line}. Found #{new_state} when expecting one of: #{expected_states.join(', ')}. (Current state: #{state}).")
+    end
+  end
+  class Parser
+    def self.new(listener, raise_on_error=false, machine_names='root')
+      if defined?(JRUBY_VERSION)
+        require 'gherkin.jar'
+        Java::Gherkin::Parser.new(listener, raise_on_error, machine_names)
+      else
+        require 'gherkin/rb_parser'
+        Gherkin::RbParser.new(listener, raise_on_error, machine_names)
+      end
+    end
+  end
+end

data/lib/gherkin/parser/meta.txt ADDED

@@ -0,0 +1,4 @@
+ |         | feature | background | scenario | scenario_outline | examples | step  | table | py_string | comment | tag |
+ | meta    | E       | E          | E        | E                | E        | E     | E     | E         | comment | tag |
+ | comment | pop()   | pop()      | pop()    | pop()            | pop()    | pop() | pop() | pop()     | pop()   | tag |
+ | tag     | pop()   | E          | pop()    | pop()            | pop()    | E     | E     | E         | E       | tag |

data/lib/gherkin/parser/root.txt ADDED

@@ -0,0 +1,9 @@
+ |                  | feature | background | scenario | scenario_outline | examples | step | table          | py_string | comment    | tag        |
+ | root             | feature | E          | E        | E                | E        | E    | E              | E         | push(meta) | push(meta) |
+ | feature          | E       | background | scenario | scenario_outline | E        | E    | E              | E         | push(meta) | push(meta) |
+ | step             | E       | E          | scenario | scenario_outline | examples | step | step           | step      | push(meta) | push(meta) |
+ | background       | E       | E          | scenario | scenario_outline | E        | step | E              | E         | push(meta) | push(meta) |
+ | scenario         | E       | E          | scenario | scenario_outline | E        | step | E              | E         | push(meta) | push(meta) |
+ | scenario_outline | E       | E          | E        | E                | E        | step | E              | E         | push(meta) | push(meta) |
+ | examples         | E       | E          | E        | E                | E        | E    | examples_table | E         | push(meta) | push(meta) |
+ | examples_table   | E       | E          | scenario | scenario_outline | examples | E    | E              | E         | push(meta) | push(meta) |

data/lib/gherkin/parser/steps.txt ADDED

@@ -0,0 +1,3 @@
+ |       | feature | background | scenario | scenario_outline | examples | step | table | py_string | comment | tag |
+ | steps | E       | E          | E        | E                | E        | step | E     | E         | E       | E   |
+ | step  | E       | E          | E        | E                | E        | step | steps | steps     | E       | E   |

data/lib/gherkin/rb_lexer.rb ADDED

@@ -0,0 +1,9 @@
+module Gherkin
+  module RbLexer
+    def self.[](i18n_language)
+      require "gherkin/rb_lexer/#{i18n_language}"
+      i18n_lexer_class_name = i18n_language.gsub(/[\s-]/, '').capitalize
+      const_get(i18n_lexer_class_name)
+    end
+  end
+end

data/lib/gherkin/rb_lexer/.gitignore ADDED

	@@ -0,0 +1 @@
1	+ *.rb

data/lib/gherkin/rb_lexer/README.rdoc ADDED

@@ -0,0 +1,8 @@
+= Lexers
+Gherkin support lexing of lots of natural languages, defined by gherkin/i18n.yml
+The lexers are generated with the following command:
+    rake ragel:i18n
+You have to run this command if you modify gherkin/i18n.yml

data/lib/gherkin/rb_parser.rb ADDED

@@ -0,0 +1,117 @@
+module Gherkin
+  class RbParser
+    # Initialize the parser. +machine_name+ refers to a state machine table.
+    def initialize(listener, raise_on_error, machine_name)
+      @listener = listener
+      @raise_on_error = raise_on_error
+      @machines = []
+      push_machine(machine_name)
+    end
+    # Doesn't yet fall back to super
+    def method_missing(method, *args)
+      # TODO: Catch exception and call super
+      if(event(method.to_s, args[-1]))
+        @listener.send(method, *args)
+      end
+    end
+    def event(ev, line)
+      machine.event(ev, line) do |state, expected|
+        if @raise_on_error
+          raise ParseError.new(state, ev, expected, line)
+        else
+          @listener.syntax_error(state, ev, expected, line)
+          return false
+        end
+      end
+      true
+    end
+    def push_machine(name)
+      @machines.push(Machine.new(self, name))
+    end
+    def pop_machine
+      @machines.pop
+    end
+    def machine
+      @machines[-1]
+    end
+    def expected
+      machine.expected
+    end
+    def force_state(state)
+      machine.instance_variable_set('@state', state)
+    end
+    class Machine
+      def initialize(parser, name)
+        @parser = parser
+        @name = name
+        @transition_map = transition_map(name)
+        @state = name
+      end
+      def event(ev, line)
+        states = @transition_map[@state]
+        raise "Unknown state: #{@state.inspect} for machine #{@name}" if states.nil?
+        new_state = states[ev]
+        case new_state
+        when "E"
+          yield @state, expected
+        when /push\((.+)\)/
+          @parser.push_machine($1)
+          @parser.event(ev, line)
+        when "pop()"
+          @parser.pop_machine()
+          @parser.event(ev, line)
+        else
+          raise "Unknown transition: #{ev.inspect} among #{states.inspect} for machine #{@name}" if new_state.nil?
+          @state = new_state
+        end
+      end
+      def expected
+        allowed = @transition_map[@state].find_all { |_, action| action != "E" }
+        allowed.collect { |state| state[0] }.sort
+      end
+      private
+      @@transition_maps = {}
+      def transition_map(name)
+        @@transition_maps[name] ||= build_transition_map(name)
+      end
+      def build_transition_map(name)
+        table = transition_table(name)
+        events = table.shift[1..-1]
+        table.inject({}) do |machine, actions|
+          state = actions.shift
+          machine[state] = Hash[*events.zip(actions).flatten]
+          machine
+        end
+      end
+      def transition_table(name)
+        state_machine_reader = StateMachineReader.new
+        lexer = Gherkin::Lexer['en'].new(state_machine_reader)
+        lexer.scan(File.read(File.dirname(__FILE__) + "/parser/#{name}.txt"))
+        state_machine_reader.rows
+      end
+      class StateMachineReader
+        attr_reader :rows
+        def table(rows, line_number)
+          @rows = rows
+        end
+      end
+    end
+  end
+end

data/lib/gherkin/tools/pretty_printer.rb ADDED

@@ -0,0 +1,77 @@
+module Gherkin
+  module Tools
+    class PrettyPrinter
+      def initialize(io)
+        @io = io
+      end
+      def tag(name, line)
+        @tags ||= []
+        @tags << "@#{name}"
+      end
+      def comment(content, line)
+        @io.puts content
+      end
+      def feature(keyword, name, line)
+        tags = @tags ? @tags.join(' ') + "\n" : ''
+        @tags = nil
+        @io.puts "#{tags}#{keyword}: #{indent(name, '  ')}"
+      end
+      def background(keyword, name, line)
+        @io.puts "\n  #{keyword}: #{indent(name, '    ')}"
+      end
+      def scenario(keyword, name, line)
+        tags = @tags ? '  ' + @tags.join(' ') + "\n" : ''
+        @tags = nil
+        @io.puts "\n#{tags}  #{keyword}: #{indent(name, '    ')}"
+      end
+      def scenario_outline(keyword, name, line)
+        tags = @tags ? '  ' + @tags.join(' ') + "\n" : ''
+        @tags = nil
+        @io.puts "\n#{tags}  #{keyword}: #{indent(name, '    ')}"
+      end
+      def examples(keyword, name, line)
+        @io.puts "\n  #{keyword}: #{indent(name, '    ')}"
+      end
+      def step(keyword, name, line)
+        @io.puts "    #{keyword} #{indent(name, '    ')}"
+      end
+      def table(rows, line)
+        rows = rows.to_a.map {|row| row.to_a} if defined?(JRUBY_VERSION) # Convert ArrayList
+        max_lengths =  rows.transpose.map { |col| col.map { |cell| cell.unpack("U*").length }.max }.flatten
+        rows.each do |line|
+          @io.puts '      | ' + line.zip(max_lengths).map { |cell, max_length| cell + ' ' * (max_length-cell.unpack("U*").length) }.join(' | ') + ' |'
+        end
+      end
+      def py_string(string, line)
+        @io.puts '      """'
+        @io.puts string.gsub(/^/, '      ')
+        @io.puts '      """'
+      end
+      def syntax_error(state, event, legal_events, line)
+        raise "SYNTAX ERROR"
+      end
+    private
+      def indent(string, indentation)
+        indent = ""
+        string.split(/\n/n).map do |l|
+          s = "#{indent}#{l}"
+          indent = indentation
+          s
+        end.join("\n")
+      end
+    end
+  end
+end

data/ragel/i18n/.gitignore ADDED

	@@ -0,0 +1 @@
1	+ *.rl

data/ragel/lexer.c.rl.erb ADDED

@@ -0,0 +1,385 @@
+#include <assert.h>
+#include <ruby.h>
+#if defined(_WIN32)
+#include <stddef.h>
+#endif
+#ifndef RSTRING_PTR
+#define RSTRING_PTR(s) (RSTRING(s)->ptr)
+#endif
+#ifndef RSTRING_LEN
+#define RSTRING_LEN(s) (RSTRING(s)->len)
+#endif
+#define DATA_GET(FROM, TYPE, NAME) \
+  Data_Get_Struct(FROM, TYPE, NAME); \
+  if (NAME == NULL) { \
+    rb_raise(rb_eArgError, "NULL found for " # NAME " when it shouldn't be."); \
+  }
+typedef struct lexer_state {
+  int content_len;
+  int line_number;
+  int current_line;
+  int start_col;
+  size_t mark;
+  size_t keyword_start;
+  size_t keyword_end;
+  size_t next_keyword_start;
+  size_t content_start;
+  size_t content_end;
+  size_t field_len;
+  size_t query_start;
+  size_t last_newline;
+  size_t final_newline;
+} lexer_state;
+static VALUE mGherkin;
+static VALUE mLexer;
+static VALUE mCLexer;
+static VALUE cI18nLexer;
+static VALUE rb_eGherkinLexerError;
+#define LEN(AT, P) (P - data - lexer->AT)
+#define MARK(M, P) (lexer->M = (P) - data)
+#define PTR_TO(P) (data + lexer->P)
+#define STORE_KW_END_CON(EVENT) \
+  store_kw_con(listener, # EVENT, \
+    PTR_TO(keyword_start), LEN(keyword_start, PTR_TO(keyword_end - 1)), \
+    PTR_TO(content_start), LEN(content_start, PTR_TO(content_end)), \
+    lexer->current_line); \
+    if (lexer->content_end != 0) { \
+      p = PTR_TO(content_end - 1); \
+    } \
+    lexer->content_end = 0;
+#define STORE_ATTR(ATTR) \
+    store_attr(listener, # ATTR, \
+      PTR_TO(content_start), LEN(content_start, p), \
+      lexer->line_number);
+%%{
+  machine lexer;
+  action begin_content {
+		MARK(content_start, p);
+    lexer->current_line = lexer->line_number;
+  }
+  action begin_pystring_content {
+    MARK(content_start, p);
+  }
+  action start_pystring {
+    lexer->current_line = lexer->line_number;
+    lexer->start_col = p - data - lexer->last_newline;
+  }
+  action store_pystring_content {
+    int len = LEN(content_start, PTR_TO(final_newline));
+    if (len < 0) len = 0;
+    store_pystring_content(listener, lexer->start_col, PTR_TO(content_start), len, lexer->current_line);
+  }
+  action store_feature_content {
+    STORE_KW_END_CON(feature)
+  }
+  action store_background_content {
+    STORE_KW_END_CON(background)
+  }
+  action store_scenario_content {
+    STORE_KW_END_CON(scenario)
+  }
+  action store_scenario_outline_content {
+    STORE_KW_END_CON(scenario_outline)
+  }
+  action store_examples_content {
+    STORE_KW_END_CON(examples)
+  }
+  action store_step_content {
+    store_kw_con(listener, "step",
+      PTR_TO(keyword_start), LEN(keyword_start, PTR_TO(keyword_end)),
+      PTR_TO(content_start), LEN(content_start, p),
+      lexer->current_line);
+  }
+  action store_comment_content {
+    STORE_ATTR(comment)
+  }
+  action store_tag_content {
+    STORE_ATTR(tag)
+  }
+  action inc_line_number {
+    lexer->line_number += 1;
+    MARK(final_newline, p);
+  }
+  action last_newline {
+    MARK(last_newline, p + 1);
+  }
+  action start_keyword {
+    if (lexer->mark == 0) {
+      MARK(mark, p);
+    }
+  }
+  action end_keyword {
+    MARK(keyword_end, p);
+    MARK(keyword_start, PTR_TO(mark));
+    MARK(content_start, p + 1);
+    lexer->mark = 0;
+  }
+  action next_keyword_start {
+    MARK(content_end, p);
+  }
+  action start_table {
+    p = p - 1;
+    lexer->current_line = lexer->line_number;
+    rb_ary_clear(rows);
+    rb_ary_clear(current_row);
+  }
+  action begin_cell_content {
+		MARK(content_start, p);
+  }
+  action store_cell_content {
+    VALUE con = Qnil;
+    con = rb_str_new(PTR_TO(content_start), LEN(content_start, p));
+    rb_funcall(con, rb_intern("strip!"), 0);
+    rb_ary_push(current_row, con);
+  }
+  action start_row {
+    current_row = rb_ary_new();
+  }
+  action store_row {
+    rb_ary_push(rows, current_row);
+  }
+  action store_table {
+    rb_funcall(listener, rb_intern("table"), 2, rows, INT2FIX(lexer->current_line));
+  }
+  action end_feature {
+    if (cs < lexer_first_final) {
+      if (raise_lexer_error != NULL) {
+        int count = 0;
+        int newstr_count = 0;
+        size_t len;
+        const char *buff;
+        if (lexer->last_newline != 0) {
+          len = LEN(last_newline, eof);
+          buff = PTR_TO(last_newline);
+        } else {
+          len = strlen(data);
+          buff = data;
+        }
+        char newstr[len];
+        for (count = 0; count < len; count++) {
+          if(buff[count] == 10) {
+            newstr[newstr_count] = '\0'; // terminate new string at first newline found
+            break;
+          } else {
+            if (buff[count] == '%') {
+              newstr[newstr_count++] = buff[count];
+              newstr[newstr_count] = buff[count];
+            } else {
+              newstr[newstr_count] = buff[count];
+            }
+          }
+          newstr_count++;
+        }
+        int line = lexer->line_number;
+        lexer_init(lexer); // Re-initialize so we can scan again with the same lexer
+        raise_lexer_error(listener, newstr, line);
+      }
+    }
+  }
+  include lexer_common "lexer_common.<%= i18n_language %>.rl";
+}%%
+/** Data **/
+%% write data;
+static VALUE
+strip_i(VALUE str, VALUE ary)
+{
+  rb_funcall(str, rb_intern("strip!"), 0);
+  rb_ary_push(ary, str);
+  return Qnil;
+}
+static VALUE
+multiline_strip(VALUE text)
+{
+  VALUE map = rb_ary_new();
+  VALUE split = rb_str_split(text, "\n");
+  rb_iterate(rb_each, split, strip_i, map);
+  return rb_ary_join(split, rb_str_new2("\n"));
+}
+static void
+store_kw_con(VALUE listener, const char * event_name,
+             const char * keyword_at, size_t keyword_length,
+             const char * at,         size_t length,
+             int current_line)
+{
+  VALUE con = Qnil, kw = Qnil;
+  kw = rb_str_new(keyword_at, keyword_length);
+  con = rb_str_new(at, length);
+  con = multiline_strip(con);
+  rb_funcall(con, rb_intern("strip!"), 0);
+  rb_funcall(kw, rb_intern("strip!"), 0);
+  rb_funcall(listener, rb_intern(event_name), 3, kw, con, INT2FIX(current_line));
+}
+static void
+store_attr(VALUE listener, const char * attr_type,
+           const char * at, size_t length,
+           int line)
+{
+  VALUE val = rb_str_new(at, length);
+  rb_funcall(listener, rb_intern(attr_type), 2, val, INT2FIX(line));
+}
+static void
+store_pystring_content(VALUE listener,
+          int start_col,
+          const char *at, size_t length,
+          int current_line)
+{
+  VALUE con = rb_str_new(at, length);
+  // Gherkin will crash gracefully if the string representation of start_col pushes the pattern past 64 characters
+  char pat[32];
+  snprintf(pat, 32, "^ {0,%d}", start_col);
+  VALUE re = rb_reg_regcomp(rb_str_new2(pat));
+  rb_funcall(con, rb_intern("gsub!"), 2, re, rb_str_new2(""));
+  rb_funcall(listener, rb_intern("py_string"), 2, con, INT2FIX(current_line));
+}
+static void
+raise_lexer_error(VALUE listener, const char * at, int line)
+{
+  rb_raise(rb_eGherkinLexerError, "Lexing error on line %d: '%s'.", line, at);
+}
+static void lexer_init(lexer_state *lexer) {
+  lexer->content_start = 0;
+  lexer->content_end = 0;
+  lexer->content_len = 0;
+  lexer->mark = 0;
+  lexer->field_len = 0;
+  lexer->keyword_start = 0;
+  lexer->keyword_end = 0;
+  lexer->next_keyword_start = 0;
+  lexer->line_number = 1;
+  lexer->last_newline = 0;
+  lexer->final_newline = 0;
+  lexer->start_col = 0;
+}
+static VALUE CLexer_alloc(VALUE klass)
+{
+  VALUE obj;
+  lexer_state *lxr = ALLOC(lexer_state);
+  lexer_init(lxr);
+  obj = Data_Wrap_Struct(klass, NULL, -1, lxr);
+  return obj;
+}
+static VALUE CLexer_init(VALUE self, VALUE listener)
+{
+  rb_iv_set(self, "@listener", listener);
+  lexer_state *lxr = NULL;
+  DATA_GET(self, lexer_state, lxr);
+  lexer_init(lxr);
+  return self;
+}
+static VALUE CLexer_scan(VALUE self, VALUE input)
+{
+  lexer_state *lexer = NULL;
+  DATA_GET(self, lexer_state, lexer);
+  rb_str_append(input, rb_str_new2("\n%_FEATURE_END_%"));
+  char *data = RSTRING_PTR(input);
+  long len = RSTRING_LEN(input);
+  if (len == 0) {
+    rb_raise(rb_eGherkinLexerError, "No content to lex.");
+  } else {
+    const char *p, *pe, *eof;
+    int cs = 0;
+    VALUE listener = rb_iv_get(self, "@listener");
+    VALUE rows = rb_ary_new();
+    VALUE current_row = rb_ary_new();
+    p = data;
+    pe = data + len;
+    eof = pe;
+    assert(*pe == '\0' && "pointer does not end on NULL");
+    assert(pe - p == len && "pointers aren't same distance");
+    %% write init;
+    %% write exec;
+    assert(p <= pe && "data overflow after parsing execute");
+    assert(lexer->content_start <= len && "content starts after data end");
+    assert(lexer->mark < len && "mark is after data end");
+    assert(lexer->field_len <= len && "field has length longer than the whole data");
+    // Reset lexer by re-initializing the whole thing
+    lexer_init(lexer);
+    if (cs == lexer_error) {
+      rb_raise(rb_eGherkinLexerError, "Invalid format, lexing fails.");
+    } else {
+      return Qtrue;
+    }
+  }
+}
+void Init_gherkin_lexer()
+{
+  mGherkin = rb_define_module("Gherkin");
+  mLexer = rb_const_get(mGherkin, rb_intern("Lexer"));
+  rb_eGherkinLexerError = rb_const_get(mLexer, rb_intern("LexingError"));
+  mCLexer = rb_define_module_under(mGherkin, "CLexer");
+  cI18nLexer = rb_define_class_under(mCLexer, "En", rb_cObject);
+  rb_define_alloc_func(cI18nLexer, CLexer_alloc);
+  rb_define_method(cI18nLexer, "initialize", CLexer_init, 1);
+  rb_define_method(cI18nLexer, "scan", CLexer_scan, 1);
+}