gherkin 0.0.4-i386-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. data/.gitignore +7 -0
  2. data/LICENSE +20 -0
  3. data/README.rdoc +66 -0
  4. data/Rakefile +49 -0
  5. data/VERSION.yml +4 -0
  6. data/bin/gherkin +10 -0
  7. data/cucumber.yml +3 -0
  8. data/features/feature_parser.feature +206 -0
  9. data/features/native_lexer.feature +19 -0
  10. data/features/parser_with_native_lexer.feature +205 -0
  11. data/features/pretty_printer.feature +14 -0
  12. data/features/step_definitions/gherkin_steps.rb +34 -0
  13. data/features/step_definitions/pretty_printer_steps.rb +56 -0
  14. data/features/steps_parser.feature +46 -0
  15. data/features/support/env.rb +33 -0
  16. data/gherkin.gemspec +180 -0
  17. data/java/.gitignore +2 -0
  18. data/java/Gherkin.iml +24 -0
  19. data/java/build.xml +13 -0
  20. data/java/src/gherkin/FixJava.java +34 -0
  21. data/java/src/gherkin/Lexer.java +5 -0
  22. data/java/src/gherkin/LexingError.java +7 -0
  23. data/java/src/gherkin/Listener.java +27 -0
  24. data/java/src/gherkin/ParseError.java +22 -0
  25. data/java/src/gherkin/Parser.java +185 -0
  26. data/java/src/gherkin/lexer/.gitignore +1 -0
  27. data/java/src/gherkin/parser/StateMachineReader.java +62 -0
  28. data/lib/.gitignore +4 -0
  29. data/lib/gherkin.rb +2 -0
  30. data/lib/gherkin/c_lexer.rb +10 -0
  31. data/lib/gherkin/core_ext/array.rb +5 -0
  32. data/lib/gherkin/i18n.yml +535 -0
  33. data/lib/gherkin/i18n_lexer.rb +29 -0
  34. data/lib/gherkin/java_lexer.rb +10 -0
  35. data/lib/gherkin/lexer.rb +43 -0
  36. data/lib/gherkin/parser.rb +19 -0
  37. data/lib/gherkin/parser/meta.txt +4 -0
  38. data/lib/gherkin/parser/root.txt +9 -0
  39. data/lib/gherkin/parser/steps.txt +3 -0
  40. data/lib/gherkin/rb_lexer.rb +10 -0
  41. data/lib/gherkin/rb_lexer/.gitignore +1 -0
  42. data/lib/gherkin/rb_lexer/README.rdoc +8 -0
  43. data/lib/gherkin/rb_parser.rb +117 -0
  44. data/lib/gherkin/tools/pretty_printer.rb +83 -0
  45. data/nativegems.sh +5 -0
  46. data/ragel/i18n/.gitignore +1 -0
  47. data/ragel/lexer.c.rl.erb +401 -0
  48. data/ragel/lexer.java.rl.erb +200 -0
  49. data/ragel/lexer.rb.rl.erb +171 -0
  50. data/ragel/lexer_common.rl.erb +46 -0
  51. data/spec/gherkin/c_lexer_spec.rb +21 -0
  52. data/spec/gherkin/fixtures/1.feature +8 -0
  53. data/spec/gherkin/fixtures/complex.feature +43 -0
  54. data/spec/gherkin/fixtures/i18n_fr.feature +13 -0
  55. data/spec/gherkin/fixtures/i18n_no.feature +6 -0
  56. data/spec/gherkin/fixtures/i18n_zh-CN.feature +8 -0
  57. data/spec/gherkin/fixtures/simple.feature +3 -0
  58. data/spec/gherkin/fixtures/simple_with_comments.feature +7 -0
  59. data/spec/gherkin/fixtures/simple_with_tags.feature +11 -0
  60. data/spec/gherkin/i18n_spec.rb +57 -0
  61. data/spec/gherkin/java_lexer_spec.rb +20 -0
  62. data/spec/gherkin/parser_spec.rb +28 -0
  63. data/spec/gherkin/rb_lexer_spec.rb +18 -0
  64. data/spec/gherkin/sexp_recorder.rb +29 -0
  65. data/spec/gherkin/shared/lexer_spec.rb +433 -0
  66. data/spec/gherkin/shared/py_string_spec.rb +124 -0
  67. data/spec/gherkin/shared/table_spec.rb +97 -0
  68. data/spec/gherkin/shared/tags_spec.rb +50 -0
  69. data/spec/spec_helper.rb +53 -0
  70. data/tasks/bench.rake +193 -0
  71. data/tasks/bench/feature_builder.rb +49 -0
  72. data/tasks/bench/generated/.gitignore +1 -0
  73. data/tasks/bench/null_listener.rb +4 -0
  74. data/tasks/compile.rake +70 -0
  75. data/tasks/cucumber.rake +20 -0
  76. data/tasks/ragel_task.rb +83 -0
  77. data/tasks/rdoc.rake +12 -0
  78. data/tasks/rspec.rake +15 -0
  79. metadata +214 -0
@@ -0,0 +1,29 @@
1
+ require 'gherkin/lexer'
2
+
3
+ module Gherkin
4
+ # The main entry point to lexing Gherkin source.
5
+ class I18nLexer
6
+ LANGUAGE_PATTERN = /language\s*:\s*(.*)/ #:nodoc:
7
+
8
+ def initialize(parser)
9
+ @parser = parser
10
+ end
11
+
12
+ def scan(source)
13
+ lang = lang(source) || 'en'
14
+ delegate = Lexer[lang].new(@parser)
15
+ delegate.scan(source)
16
+ end
17
+
18
+ private
19
+
20
+ def lang(source)
21
+ line_one = source.split(/\n/)[0]
22
+ if line_one =~ LANGUAGE_PATTERN
23
+ $1.strip
24
+ else
25
+ nil
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,10 @@
1
+ require 'gherkin.jar'
2
+
3
+ module Gherkin
4
+ module JavaLexer
5
+ def self.[](i18n_language)
6
+ i18n_lexer_class_name = i18n_language.gsub(/[\s-]/, '').capitalize
7
+ Java::GherkinLexer.__send__(i18n_lexer_class_name)
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,43 @@
1
+ module Gherkin
2
+ module Lexer
3
+ I18nLexerNotFound = Class.new(LoadError)
4
+ LexingError = Class.new(StandardError)
5
+
6
+ class << self
7
+ def [](i18n_lang)
8
+ begin
9
+ # Uncomment the line below (during development) to force use of Ruby lexer
10
+ # return rb[i18n_lang]
11
+
12
+ if defined?(JRUBY_VERSION)
13
+ java[i18n_lang]
14
+ else
15
+ begin
16
+ c[i18n_lang]
17
+ rescue NameError, LoadError => e
18
+ warn("WARNING: #{e.message}. Reverting to Ruby lexer")
19
+ rb[i18n_lang]
20
+ end
21
+ end
22
+ rescue LoadError => e
23
+ raise I18nLexerNotFound, "No lexer was found for #{i18n_lang} (#{e.message}). Supported languages are listed in gherkin/i18n.yml."
24
+ end
25
+ end
26
+
27
+ def c
28
+ require 'gherkin/c_lexer'
29
+ CLexer
30
+ end
31
+
32
+ def java
33
+ require 'gherkin/java_lexer'
34
+ JavaLexer
35
+ end
36
+
37
+ def rb
38
+ require 'gherkin/rb_lexer'
39
+ RbLexer
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,19 @@
1
+ module Gherkin
2
+ class ParseError < StandardError
3
+ def initialize(state, new_state, expected_states, line)
4
+ super("Parse error on line #{line}. Found #{new_state} when expecting one of: #{expected_states.join(', ')}. (Current state: #{state}).")
5
+ end
6
+ end
7
+
8
+ class Parser
9
+ def self.new(listener, raise_on_error=false, machine_names='root')
10
+ if defined?(JRUBY_VERSION)
11
+ require 'gherkin.jar'
12
+ Java::Gherkin::Parser.new(listener, raise_on_error, machine_names)
13
+ else
14
+ require 'gherkin/rb_parser'
15
+ Gherkin::RbParser.new(listener, raise_on_error, machine_names)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,4 @@
1
+ | | feature | background | scenario | scenario_outline | examples | step | table | py_string | comment | tag |
2
+ | meta | E | E | E | E | E | E | E | E | comment | tag |
3
+ | comment | pop() | pop() | pop() | pop() | pop() | pop() | pop() | pop() | pop() | tag |
4
+ | tag | pop() | E | pop() | pop() | pop() | E | E | E | E | tag |
@@ -0,0 +1,9 @@
1
+ | | feature | background | scenario | scenario_outline | examples | step | table | py_string | comment | tag |
2
+ | root | feature | E | E | E | E | E | E | E | push(meta) | push(meta) |
3
+ | feature | E | background | scenario | scenario_outline | E | E | E | E | push(meta) | push(meta) |
4
+ | step | E | E | scenario | scenario_outline | examples | step | step | step | push(meta) | push(meta) |
5
+ | background | E | E | scenario | scenario_outline | E | step | E | E | push(meta) | push(meta) |
6
+ | scenario | E | E | scenario | scenario_outline | E | step | E | E | push(meta) | push(meta) |
7
+ | scenario_outline | E | E | E | E | E | step | E | E | push(meta) | push(meta) |
8
+ | examples | E | E | E | E | E | E | examples_table | E | push(meta) | push(meta) |
9
+ | examples_table | E | E | scenario | scenario_outline | examples | E | E | E | push(meta) | push(meta) |
@@ -0,0 +1,3 @@
1
+ | | feature | background | scenario | scenario_outline | examples | step | table | py_string | comment | tag |
2
+ | steps | E | E | E | E | E | step | E | E | E | E |
3
+ | step | E | E | E | E | E | step | steps | steps | E | E |
@@ -0,0 +1,10 @@
1
+ module Gherkin
2
+ module RbLexer
3
+ def self.[](i18n_language)
4
+ name = i18n_language.gsub(/[\s-]/, '')
5
+ require "gherkin/rb_lexer/#{name}"
6
+ i18n_lexer_class_name = name.capitalize
7
+ const_get(i18n_lexer_class_name)
8
+ end
9
+ end
10
+ end
@@ -0,0 +1 @@
1
+ *.rb
@@ -0,0 +1,8 @@
1
+ = Lexers
2
+
3
+ Gherkin support lexing of lots of natural languages, defined by gherkin/i18n.yml
4
+ The lexers are generated with the following command:
5
+
6
+ rake ragel:i18n
7
+
8
+ You have to run this command if you modify gherkin/i18n.yml
@@ -0,0 +1,117 @@
1
+ module Gherkin
2
+ class RbParser
3
+ # Initialize the parser. +machine_name+ refers to a state machine table.
4
+ def initialize(listener, raise_on_error, machine_name)
5
+ @listener = listener
6
+ @raise_on_error = raise_on_error
7
+ @machines = []
8
+ push_machine(machine_name)
9
+ end
10
+
11
+ # Doesn't yet fall back to super
12
+ def method_missing(method, *args)
13
+ # TODO: Catch exception and call super
14
+ if(event(method.to_s, args[-1]))
15
+ @listener.send(method, *args)
16
+ end
17
+ end
18
+
19
+ def event(ev, line)
20
+ machine.event(ev, line) do |state, expected|
21
+ if @raise_on_error
22
+ raise ParseError.new(state, ev, expected, line)
23
+ else
24
+ @listener.syntax_error(state, ev, expected, line)
25
+ return false
26
+ end
27
+ end
28
+ true
29
+ end
30
+
31
+ def push_machine(name)
32
+ @machines.push(Machine.new(self, name))
33
+ end
34
+
35
+ def pop_machine
36
+ @machines.pop
37
+ end
38
+
39
+ def machine
40
+ @machines[-1]
41
+ end
42
+
43
+ def expected
44
+ machine.expected
45
+ end
46
+
47
+ def force_state(state)
48
+ machine.instance_variable_set('@state', state)
49
+ end
50
+
51
+ class Machine
52
+ def initialize(parser, name)
53
+ @parser = parser
54
+ @name = name
55
+ @transition_map = transition_map(name)
56
+ @state = name
57
+ end
58
+
59
+ def event(ev, line)
60
+ states = @transition_map[@state]
61
+ raise "Unknown state: #{@state.inspect} for machine #{@name}" if states.nil?
62
+ new_state = states[ev]
63
+ case new_state
64
+ when "E"
65
+ yield @state, expected
66
+ when /push\((.+)\)/
67
+ @parser.push_machine($1)
68
+ @parser.event(ev, line)
69
+ when "pop()"
70
+ @parser.pop_machine()
71
+ @parser.event(ev, line)
72
+ else
73
+ raise "Unknown transition: #{ev.inspect} among #{states.inspect} for machine #{@name}" if new_state.nil?
74
+ @state = new_state
75
+ end
76
+ end
77
+
78
+ def expected
79
+ allowed = @transition_map[@state].find_all { |_, action| action != "E" }
80
+ allowed.collect { |state| state[0] }.sort
81
+ end
82
+
83
+ private
84
+
85
+ @@transition_maps = {}
86
+
87
+ def transition_map(name)
88
+ @@transition_maps[name] ||= build_transition_map(name)
89
+ end
90
+
91
+ def build_transition_map(name)
92
+ table = transition_table(name)
93
+ events = table.shift[1..-1]
94
+ table.inject({}) do |machine, actions|
95
+ state = actions.shift
96
+ machine[state] = Hash[*events.zip(actions).flatten]
97
+ machine
98
+ end
99
+ end
100
+
101
+ def transition_table(name)
102
+ state_machine_reader = StateMachineReader.new
103
+ lexer = Gherkin::Lexer['en'].new(state_machine_reader)
104
+ lexer.scan(File.read(File.dirname(__FILE__) + "/parser/#{name}.txt"))
105
+ state_machine_reader.rows
106
+ end
107
+
108
+ class StateMachineReader
109
+ attr_reader :rows
110
+ def table(rows, line_number)
111
+ @rows = rows
112
+ end
113
+ end
114
+
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,83 @@
1
+ # encoding: utf-8
2
+ module Gherkin
3
+ module Tools
4
+ class PrettyPrinter
5
+ def initialize(io)
6
+ @io = io
7
+ end
8
+
9
+ def tag(name, line)
10
+ @tags ||= []
11
+ @tags << "@#{name}"
12
+ end
13
+
14
+ def comment(content, line)
15
+ @io.puts content
16
+ end
17
+
18
+ def feature(keyword, name, line)
19
+ tags = @tags ? @tags.join(' ') + "\n" : ''
20
+ @tags = nil
21
+ @io.puts "#{tags}#{keyword}: #{indent(name, ' ')}"
22
+ end
23
+
24
+ def background(keyword, name, line)
25
+ @io.puts "\n #{keyword}: #{indent(name, ' ')}"
26
+ end
27
+
28
+ def scenario(keyword, name, line)
29
+ tags = @tags ? ' ' + @tags.join(' ') + "\n" : ''
30
+ @tags = nil
31
+ @io.puts "\n#{tags} #{keyword}: #{indent(name, ' ')}"
32
+ end
33
+
34
+ def scenario_outline(keyword, name, line)
35
+ tags = @tags ? ' ' + @tags.join(' ') + "\n" : ''
36
+ @tags = nil
37
+ @io.puts "\n#{tags} #{keyword}: #{indent(name, ' ')}"
38
+ end
39
+
40
+ def examples(keyword, name, line)
41
+ @io.puts "\n #{keyword}: #{indent(name, ' ')}"
42
+ end
43
+
44
+ def step(keyword, name, line)
45
+ @io.puts " #{keyword} #{indent(name, ' ')}"
46
+ end
47
+
48
+ def table(rows, line)
49
+ rows = rows.to_a.map {|row| row.to_a} if defined?(JRUBY_VERSION) # Convert ArrayList
50
+ max_lengths = rows.transpose.map { |col| col.map { |cell| cell.unpack("U*").length }.max }.flatten
51
+ rows.each do |line|
52
+ @io.puts ' | ' + line.zip(max_lengths).map { |cell, max_length| cell + ' ' * (max_length-cell.unpack("U*").length) }.join(' | ') + ' |'
53
+ end
54
+ end
55
+
56
+ def py_string(string, line)
57
+ @io.puts " \"\"\"\n" + string.gsub(START, ' ') + "\n \"\"\""
58
+ end
59
+
60
+ def syntax_error(state, event, legal_events, line)
61
+ raise "SYNTAX ERROR"
62
+ end
63
+
64
+ private
65
+ if(RUBY_VERSION =~ /^1\.9/)
66
+ START = /#{"^".encode('UTF-8')}/
67
+ NL = Regexp.new("\n".encode('UTF-8'))
68
+ else
69
+ START = /^/
70
+ NL = /\n/n
71
+ end
72
+
73
+ def indent(string, indentation)
74
+ indent = ""
75
+ string.split(NL).map do |l|
76
+ s = "#{indent}#{l}"
77
+ indent = indentation
78
+ s
79
+ end.join("\n")
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,5 @@
1
+ #!/bin/sh
2
+ # Builds gems for all supported platforms
3
+ rake gemspec build PLATFORM=universal-java-1.5
4
+ rake cross compile gemspec build PLATFORM=i386-mswin32 RUBY_CC_VERSION=1.8.6
5
+ rake gemspec build PLATFORM=i386-mingw32 RUBY_CC_VERSION=1.8.6
@@ -0,0 +1 @@
1
+ *.rl
@@ -0,0 +1,401 @@
1
+ #include <assert.h>
2
+ #include <ruby.h>
3
+
4
+ #if defined(_WIN32)
5
+ #include <stddef.h>
6
+ #endif
7
+
8
+ #ifdef HAVE_RUBY_RE_H
9
+ #include <ruby/re.h>
10
+ #endif
11
+
12
+ #ifdef HAVE_RUBY_ENCODING_H
13
+ #include <ruby/encoding.h>
14
+ #define ENCODED_STR_NEW(ptr, len) \
15
+ rb_enc_str_new(ptr, len, rb_utf8_encoding());
16
+ #else
17
+ #define ENCODED_STR_NEW(ptr, len) \
18
+ rb_str_new(ptr, len);
19
+ #endif
20
+
21
+ #ifndef RSTRING_PTR
22
+ #define RSTRING_PTR(s) (RSTRING(s)->ptr)
23
+ #endif
24
+
25
+ #ifndef RSTRING_LEN
26
+ #define RSTRING_LEN(s) (RSTRING(s)->len)
27
+ #endif
28
+
29
+ #define DATA_GET(FROM, TYPE, NAME) \
30
+ Data_Get_Struct(FROM, TYPE, NAME); \
31
+ if (NAME == NULL) { \
32
+ rb_raise(rb_eArgError, "NULL found for " # NAME " when it shouldn't be."); \
33
+ }
34
+
35
+ typedef struct lexer_state {
36
+ int content_len;
37
+ int line_number;
38
+ int current_line;
39
+ int start_col;
40
+ size_t mark;
41
+ size_t keyword_start;
42
+ size_t keyword_end;
43
+ size_t next_keyword_start;
44
+ size_t content_start;
45
+ size_t content_end;
46
+ size_t field_len;
47
+ size_t query_start;
48
+ size_t last_newline;
49
+ size_t final_newline;
50
+ } lexer_state;
51
+
52
+ static VALUE mGherkin;
53
+ static VALUE mLexer;
54
+ static VALUE mCLexer;
55
+ static VALUE cI18nLexer;
56
+ static VALUE rb_eGherkinLexerError;
57
+
58
+ #define LEN(AT, P) (P - data - lexer->AT)
59
+ #define MARK(M, P) (lexer->M = (P) - data)
60
+ #define PTR_TO(P) (data + lexer->P)
61
+
62
+ #define STORE_KW_END_CON(EVENT) \
63
+ store_kw_con(listener, # EVENT, \
64
+ PTR_TO(keyword_start), LEN(keyword_start, PTR_TO(keyword_end - 1)), \
65
+ PTR_TO(content_start), LEN(content_start, PTR_TO(content_end)), \
66
+ lexer->current_line); \
67
+ if (lexer->content_end != 0) { \
68
+ p = PTR_TO(content_end - 1); \
69
+ } \
70
+ lexer->content_end = 0;
71
+
72
+ #define STORE_ATTR(ATTR) \
73
+ store_attr(listener, # ATTR, \
74
+ PTR_TO(content_start), LEN(content_start, p), \
75
+ lexer->line_number);
76
+
77
+ %%{
78
+ machine lexer;
79
+
80
+ action begin_content {
81
+ MARK(content_start, p);
82
+ lexer->current_line = lexer->line_number;
83
+ }
84
+
85
+ action begin_pystring_content {
86
+ MARK(content_start, p);
87
+ }
88
+
89
+ action start_pystring {
90
+ lexer->current_line = lexer->line_number;
91
+ lexer->start_col = p - data - lexer->last_newline;
92
+ }
93
+
94
+ action store_pystring_content {
95
+ int len = LEN(content_start, PTR_TO(final_newline));
96
+
97
+ if (len < 0) len = 0;
98
+
99
+ store_pystring_content(listener, lexer->start_col, PTR_TO(content_start), len, lexer->current_line);
100
+ }
101
+
102
+ action store_feature_content {
103
+ STORE_KW_END_CON(feature)
104
+ }
105
+
106
+ action store_background_content {
107
+ STORE_KW_END_CON(background)
108
+ }
109
+
110
+ action store_scenario_content {
111
+ STORE_KW_END_CON(scenario)
112
+ }
113
+
114
+ action store_scenario_outline_content {
115
+ STORE_KW_END_CON(scenario_outline)
116
+ }
117
+
118
+ action store_examples_content {
119
+ STORE_KW_END_CON(examples)
120
+ }
121
+
122
+ action store_step_content {
123
+ store_kw_con(listener, "step",
124
+ PTR_TO(keyword_start), LEN(keyword_start, PTR_TO(keyword_end)),
125
+ PTR_TO(content_start), LEN(content_start, p),
126
+ lexer->current_line);
127
+ }
128
+
129
+ action store_comment_content {
130
+ STORE_ATTR(comment)
131
+ lexer->mark = 0;
132
+ }
133
+
134
+ action store_tag_content {
135
+ STORE_ATTR(tag)
136
+ lexer->mark = 0;
137
+ }
138
+
139
+ action inc_line_number {
140
+ lexer->line_number += 1;
141
+ MARK(final_newline, p);
142
+ }
143
+
144
+ action last_newline {
145
+ MARK(last_newline, p + 1);
146
+ }
147
+
148
+ action start_keyword {
149
+ if (lexer->mark == 0) {
150
+ MARK(mark, p);
151
+ }
152
+ }
153
+
154
+ action end_keyword {
155
+ MARK(keyword_end, p);
156
+ MARK(keyword_start, PTR_TO(mark));
157
+ MARK(content_start, p + 1);
158
+ lexer->mark = 0;
159
+ }
160
+
161
+ action next_keyword_start {
162
+ MARK(content_end, p);
163
+ }
164
+
165
+ action start_table {
166
+ p = p - 1;
167
+ lexer->current_line = lexer->line_number;
168
+ rb_ary_clear(rows);
169
+ rb_ary_clear(current_row);
170
+ }
171
+
172
+ action begin_cell_content {
173
+ MARK(content_start, p);
174
+ }
175
+
176
+ action store_cell_content {
177
+ VALUE con = Qnil;
178
+ con = ENCODED_STR_NEW(PTR_TO(content_start), LEN(content_start, p));
179
+ rb_funcall(con, rb_intern("strip!"), 0);
180
+
181
+ rb_ary_push(current_row, con);
182
+ }
183
+
184
+ action start_row {
185
+ current_row = rb_ary_new();
186
+ }
187
+
188
+ action store_row {
189
+ rb_ary_push(rows, current_row);
190
+ }
191
+
192
+ action store_table {
193
+ rb_funcall(listener, rb_intern("table"), 2, rows, INT2FIX(lexer->current_line));
194
+ }
195
+
196
+ action end_feature {
197
+ if (cs < lexer_first_final) {
198
+ if (raise_lexer_error != NULL) {
199
+ int count = 0;
200
+ int newstr_count = 0;
201
+ size_t len;
202
+ const char *buff;
203
+ if (lexer->last_newline != 0) {
204
+ len = LEN(last_newline, eof);
205
+ buff = PTR_TO(last_newline);
206
+ } else {
207
+ len = strlen(data);
208
+ buff = data;
209
+ }
210
+
211
+ char newstr[len];
212
+
213
+ for (count = 0; count < len; count++) {
214
+ if(buff[count] == 10) {
215
+ newstr[newstr_count] = '\0'; // terminate new string at first newline found
216
+ break;
217
+ } else {
218
+ if (buff[count] == '%') {
219
+ newstr[newstr_count++] = buff[count];
220
+ newstr[newstr_count] = buff[count];
221
+ } else {
222
+ newstr[newstr_count] = buff[count];
223
+ }
224
+ }
225
+ newstr_count++;
226
+ }
227
+
228
+ int line = lexer->line_number;
229
+ lexer_init(lexer); // Re-initialize so we can scan again with the same lexer
230
+ raise_lexer_error(listener, newstr, line);
231
+ }
232
+ }
233
+ }
234
+
235
+ include lexer_common "lexer_common.<%= @i18n %>.rl";
236
+
237
+ }%%
238
+
239
+ /** Data **/
240
+ %% write data;
241
+
242
+ static VALUE
243
+ strip_i(VALUE str, VALUE ary)
244
+ {
245
+ rb_funcall(str, rb_intern("strip!"), 0);
246
+ rb_ary_push(ary, str);
247
+
248
+ return Qnil;
249
+ }
250
+
251
+ static VALUE
252
+ multiline_strip(VALUE text)
253
+ {
254
+ VALUE map = rb_ary_new();
255
+ VALUE split = rb_str_split(text, "\n");
256
+
257
+ rb_iterate(rb_each, split, strip_i, map);
258
+
259
+ return rb_ary_join(split, rb_str_new2("\n"));
260
+ }
261
+
262
+ static void
263
+ store_kw_con(VALUE listener, const char * event_name,
264
+ const char * keyword_at, size_t keyword_length,
265
+ const char * at, size_t length,
266
+ int current_line)
267
+ {
268
+ VALUE con = Qnil, kw = Qnil;
269
+ kw = ENCODED_STR_NEW(keyword_at, keyword_length);
270
+ con = ENCODED_STR_NEW(at, length);
271
+ con = multiline_strip(con);
272
+ rb_funcall(con, rb_intern("strip!"), 0);
273
+ rb_funcall(kw, rb_intern("strip!"), 0);
274
+ rb_funcall(listener, rb_intern(event_name), 3, kw, con, INT2FIX(current_line));
275
+ }
276
+
277
+ static void
278
+ store_attr(VALUE listener, const char * attr_type,
279
+ const char * at, size_t length,
280
+ int line)
281
+ {
282
+ VALUE val = ENCODED_STR_NEW(at, length);
283
+ rb_funcall(listener, rb_intern(attr_type), 2, val, INT2FIX(line));
284
+ }
285
+
286
+ static void
287
+ store_pystring_content(VALUE listener,
288
+ int start_col,
289
+ const char *at, size_t length,
290
+ int current_line)
291
+ {
292
+ VALUE con = ENCODED_STR_NEW(at, length);
293
+ // Gherkin will crash gracefully if the string representation of start_col pushes the pattern past 32 characters
294
+ char pat[32];
295
+ snprintf(pat, 32, "^ {0,%d}", start_col);
296
+ VALUE re = rb_reg_regcomp(rb_str_new2(pat));
297
+ rb_funcall(con, rb_intern("gsub!"), 2, re, rb_str_new2(""));
298
+ rb_funcall(listener, rb_intern("py_string"), 2, con, INT2FIX(current_line));
299
+ }
300
+
301
+ static void
302
+ raise_lexer_error(VALUE listener, const char * at, int line)
303
+ {
304
+ rb_raise(rb_eGherkinLexerError, "Lexing error on line %d: '%s'.", line, at);
305
+ }
306
+
307
+ static void lexer_init(lexer_state *lexer) {
308
+ lexer->content_start = 0;
309
+ lexer->content_end = 0;
310
+ lexer->content_len = 0;
311
+ lexer->mark = 0;
312
+ lexer->field_len = 0;
313
+ lexer->keyword_start = 0;
314
+ lexer->keyword_end = 0;
315
+ lexer->next_keyword_start = 0;
316
+ lexer->line_number = 1;
317
+ lexer->last_newline = 0;
318
+ lexer->final_newline = 0;
319
+ lexer->start_col = 0;
320
+ }
321
+
322
+ static VALUE CLexer_alloc(VALUE klass)
323
+ {
324
+ VALUE obj;
325
+ lexer_state *lxr = ALLOC(lexer_state);
326
+ lexer_init(lxr);
327
+
328
+ obj = Data_Wrap_Struct(klass, NULL, -1, lxr);
329
+
330
+ return obj;
331
+ }
332
+
333
+ static VALUE CLexer_init(VALUE self, VALUE listener)
334
+ {
335
+ rb_iv_set(self, "@listener", listener);
336
+
337
+ lexer_state *lxr = NULL;
338
+ DATA_GET(self, lexer_state, lxr);
339
+ lexer_init(lxr);
340
+
341
+ return self;
342
+ }
343
+
344
+ static VALUE CLexer_scan(VALUE self, VALUE input)
345
+ {
346
+ lexer_state *lexer = NULL;
347
+ DATA_GET(self, lexer_state, lexer);
348
+
349
+ VALUE input_copy = rb_str_dup(input);
350
+ rb_str_append(input_copy, rb_str_new2("\n%_FEATURE_END_%"));
351
+ char *data = RSTRING_PTR(input_copy);
352
+ long len = RSTRING_LEN(input_copy);
353
+
354
+ if (len == 0) {
355
+ rb_raise(rb_eGherkinLexerError, "No content to lex.");
356
+ } else {
357
+ const char *p, *pe, *eof;
358
+ int cs = 0;
359
+
360
+ VALUE listener = rb_iv_get(self, "@listener");
361
+ VALUE rows = rb_ary_new();
362
+ VALUE current_row = rb_ary_new();
363
+
364
+ p = data;
365
+ pe = data + len;
366
+ eof = pe;
367
+
368
+ assert(*pe == '\0' && "pointer does not end on NULL");
369
+ assert(pe - p == len && "pointers aren't same distance");
370
+
371
+ %% write init;
372
+ %% write exec;
373
+
374
+ assert(p <= pe && "data overflow after parsing execute");
375
+ assert(lexer->content_start <= len && "content starts after data end");
376
+ assert(lexer->mark < len && "mark is after data end");
377
+ assert(lexer->field_len <= len && "field has length longer than the whole data");
378
+
379
+ // Reset lexer by re-initializing the whole thing
380
+ lexer_init(lexer);
381
+
382
+ if (cs == lexer_error) {
383
+ rb_raise(rb_eGherkinLexerError, "Invalid format, lexing fails.");
384
+ } else {
385
+ return Qtrue;
386
+ }
387
+ }
388
+ }
389
+
390
+ void Init_gherkin_lexer_<%= @i18n %>()
391
+ {
392
+ mGherkin = rb_define_module("Gherkin");
393
+ mLexer = rb_const_get(mGherkin, rb_intern("Lexer"));
394
+ rb_eGherkinLexerError = rb_const_get(mLexer, rb_intern("LexingError"));
395
+
396
+ mCLexer = rb_define_module_under(mGherkin, "CLexer");
397
+ cI18nLexer = rb_define_class_under(mCLexer, "<%= @i18n.capitalize %>", rb_cObject);
398
+ rb_define_alloc_func(cI18nLexer, CLexer_alloc);
399
+ rb_define_method(cI18nLexer, "initialize", CLexer_init, 1);
400
+ rb_define_method(cI18nLexer, "scan", CLexer_scan, 1);
401
+ }