gherkin 0.0.3-universal-java-1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. data/.gitignore +8 -0
  2. data/LICENSE +20 -0
  3. data/README.rdoc +47 -0
  4. data/Rakefile +48 -0
  5. data/VERSION.yml +4 -0
  6. data/bin/gherkin +10 -0
  7. data/cucumber.yml +3 -0
  8. data/ext/gherkin_lexer/.gitignore +6 -0
  9. data/ext/gherkin_lexer/extconf.rb +6 -0
  10. data/features/feature_parser.feature +206 -0
  11. data/features/native_lexer.feature +19 -0
  12. data/features/parser_with_native_lexer.feature +205 -0
  13. data/features/pretty_printer.feature +11 -0
  14. data/features/step_definitions/gherkin_steps.rb +34 -0
  15. data/features/step_definitions/pretty_printer_steps.rb +51 -0
  16. data/features/steps_parser.feature +46 -0
  17. data/features/support/env.rb +33 -0
  18. data/gherkin.gemspec +177 -0
  19. data/java/.gitignore +2 -0
  20. data/java/Gherkin.iml +24 -0
  21. data/java/build.xml +13 -0
  22. data/java/src/gherkin/FixJava.java +34 -0
  23. data/java/src/gherkin/Lexer.java +5 -0
  24. data/java/src/gherkin/LexingError.java +7 -0
  25. data/java/src/gherkin/Listener.java +27 -0
  26. data/java/src/gherkin/ParseError.java +22 -0
  27. data/java/src/gherkin/Parser.java +185 -0
  28. data/java/src/gherkin/lexer/.gitignore +1 -0
  29. data/java/src/gherkin/parser/StateMachineReader.java +62 -0
  30. data/lib/.gitignore +2 -0
  31. data/lib/gherkin.rb +2 -0
  32. data/lib/gherkin/c_lexer.rb +10 -0
  33. data/lib/gherkin/i18n.yml +535 -0
  34. data/lib/gherkin/i18n_lexer.rb +29 -0
  35. data/lib/gherkin/java_lexer.rb +10 -0
  36. data/lib/gherkin/lexer.rb +42 -0
  37. data/lib/gherkin/parser.rb +19 -0
  38. data/lib/gherkin/parser/meta.txt +4 -0
  39. data/lib/gherkin/parser/root.txt +9 -0
  40. data/lib/gherkin/parser/steps.txt +3 -0
  41. data/lib/gherkin/rb_lexer.rb +9 -0
  42. data/lib/gherkin/rb_lexer/.gitignore +1 -0
  43. data/lib/gherkin/rb_lexer/README.rdoc +8 -0
  44. data/lib/gherkin/rb_parser.rb +117 -0
  45. data/lib/gherkin/tools/pretty_printer.rb +77 -0
  46. data/ragel/i18n/.gitignore +1 -0
  47. data/ragel/lexer.c.rl.erb +385 -0
  48. data/ragel/lexer.java.rl.erb +198 -0
  49. data/ragel/lexer.rb.rl.erb +172 -0
  50. data/ragel/lexer_common.rl.erb +46 -0
  51. data/spec/gherkin/c_lexer_spec.rb +21 -0
  52. data/spec/gherkin/fixtures/complex.feature +43 -0
  53. data/spec/gherkin/fixtures/i18n_fr.feature +13 -0
  54. data/spec/gherkin/fixtures/i18n_no.feature +6 -0
  55. data/spec/gherkin/fixtures/i18n_zh-CN.feature +8 -0
  56. data/spec/gherkin/fixtures/simple.feature +3 -0
  57. data/spec/gherkin/fixtures/simple_with_comments.feature +7 -0
  58. data/spec/gherkin/fixtures/simple_with_tags.feature +11 -0
  59. data/spec/gherkin/i18n_spec.rb +57 -0
  60. data/spec/gherkin/java_lexer_spec.rb +20 -0
  61. data/spec/gherkin/parser_spec.rb +28 -0
  62. data/spec/gherkin/rb_lexer_spec.rb +18 -0
  63. data/spec/gherkin/sexp_recorder.rb +29 -0
  64. data/spec/gherkin/shared/lexer_spec.rb +420 -0
  65. data/spec/gherkin/shared/py_string_spec.rb +112 -0
  66. data/spec/gherkin/shared/table_spec.rb +97 -0
  67. data/spec/gherkin/shared/tags_spec.rb +50 -0
  68. data/spec/spec_helper.rb +53 -0
  69. data/tasks/bench.rake +176 -0
  70. data/tasks/bench/feature_builder.rb +49 -0
  71. data/tasks/bench/generated/.gitignore +1 -0
  72. data/tasks/bench/null_listener.rb +4 -0
  73. data/tasks/cucumber.rake +20 -0
  74. data/tasks/ext.rake +49 -0
  75. data/tasks/ragel.rake +94 -0
  76. data/tasks/rdoc.rake +12 -0
  77. data/tasks/rspec.rake +15 -0
  78. metadata +204 -0
@@ -0,0 +1,29 @@
1
+ require 'gherkin/lexer'
2
+
3
+ module Gherkin
4
+ # The main entry point to lexing Gherkin source.
5
+ class I18nLexer
6
+ LANGUAGE_PATTERN = /language\s*:\s*(.*)/ #:nodoc:
7
+
8
+ def initialize(parser)
9
+ @parser = parser
10
+ end
11
+
12
+ def scan(source)
13
+ lang = lang(source) || 'en'
14
+ delegate = Lexer[lang].new(@parser)
15
+ delegate.scan(source)
16
+ end
17
+
18
+ private
19
+
20
+ def lang(source)
21
+ line_one = source.split(/\n/)[0]
22
+ if line_one =~ LANGUAGE_PATTERN
23
+ $1.strip
24
+ else
25
+ nil
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,10 @@
1
+ require 'gherkin.jar'
2
+
3
+ module Gherkin
4
+ module JavaLexer
5
+ def self.[](i18n_language)
6
+ i18n_lexer_class_name = i18n_language.gsub(/[\s-]/, '').capitalize
7
+ Java::GherkinLexer.__send__(i18n_lexer_class_name)
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,42 @@
1
+ module Gherkin
2
+ module Lexer
3
+ I18nLexerNotFound = Class.new(LoadError)
4
+ LexingError = Class.new(StandardError)
5
+
6
+ class << self
7
+ def [](i18n_lang)
8
+ begin
9
+ if defined?(JRUBY_VERSION)
10
+ java[i18n_lang]
11
+ else
12
+ begin
13
+ c[i18n_lang]
14
+ rescue NameError => e
15
+ warn("WARNING: #{e.message}. Reverting to Ruby lexer")
16
+ rb[i18n_lang]
17
+ rescue LoadError
18
+ rb[i18n_lang]
19
+ end
20
+ end
21
+ rescue LoadError
22
+ raise I18nLexerNotFound, "No lexer was found for #{i18n_lang}. Supported languages are listed in gherkin/i18n.yml."
23
+ end
24
+ end
25
+
26
+ def c
27
+ require 'gherkin/c_lexer'
28
+ CLexer
29
+ end
30
+
31
+ def java
32
+ require 'gherkin/java_lexer'
33
+ JavaLexer
34
+ end
35
+
36
+ def rb
37
+ require 'gherkin/rb_lexer'
38
+ RbLexer
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,19 @@
1
+ module Gherkin
2
+ class ParseError < StandardError
3
+ def initialize(state, new_state, expected_states, line)
4
+ super("Parse error on line #{line}. Found #{new_state} when expecting one of: #{expected_states.join(', ')}. (Current state: #{state}).")
5
+ end
6
+ end
7
+
8
+ class Parser
9
+ def self.new(listener, raise_on_error=false, machine_names='root')
10
+ if defined?(JRUBY_VERSION)
11
+ require 'gherkin.jar'
12
+ Java::Gherkin::Parser.new(listener, raise_on_error, machine_names)
13
+ else
14
+ require 'gherkin/rb_parser'
15
+ Gherkin::RbParser.new(listener, raise_on_error, machine_names)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,4 @@
1
+ | | feature | background | scenario | scenario_outline | examples | step | table | py_string | comment | tag |
2
+ | meta | E | E | E | E | E | E | E | E | comment | tag |
3
+ | comment | pop() | pop() | pop() | pop() | pop() | pop() | pop() | pop() | pop() | tag |
4
+ | tag | pop() | E | pop() | pop() | pop() | E | E | E | E | tag |
@@ -0,0 +1,9 @@
1
+ | | feature | background | scenario | scenario_outline | examples | step | table | py_string | comment | tag |
2
+ | root | feature | E | E | E | E | E | E | E | push(meta) | push(meta) |
3
+ | feature | E | background | scenario | scenario_outline | E | E | E | E | push(meta) | push(meta) |
4
+ | step | E | E | scenario | scenario_outline | examples | step | step | step | push(meta) | push(meta) |
5
+ | background | E | E | scenario | scenario_outline | E | step | E | E | push(meta) | push(meta) |
6
+ | scenario | E | E | scenario | scenario_outline | E | step | E | E | push(meta) | push(meta) |
7
+ | scenario_outline | E | E | E | E | E | step | E | E | push(meta) | push(meta) |
8
+ | examples | E | E | E | E | E | E | examples_table | E | push(meta) | push(meta) |
9
+ | examples_table | E | E | scenario | scenario_outline | examples | E | E | E | push(meta) | push(meta) |
@@ -0,0 +1,3 @@
1
+ | | feature | background | scenario | scenario_outline | examples | step | table | py_string | comment | tag |
2
+ | steps | E | E | E | E | E | step | E | E | E | E |
3
+ | step | E | E | E | E | E | step | steps | steps | E | E |
@@ -0,0 +1,9 @@
1
+ module Gherkin
2
+ module RbLexer
3
+ def self.[](i18n_language)
4
+ require "gherkin/rb_lexer/#{i18n_language}"
5
+ i18n_lexer_class_name = i18n_language.gsub(/[\s-]/, '').capitalize
6
+ const_get(i18n_lexer_class_name)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1 @@
1
+ *.rb
@@ -0,0 +1,8 @@
1
+ = Lexers
2
+
3
+ Gherkin support lexing of lots of natural languages, defined by gherkin/i18n.yml
4
+ The lexers are generated with the following command:
5
+
6
+ rake ragel:i18n
7
+
8
+ You have to run this command if you modify gherkin/i18n.yml
@@ -0,0 +1,117 @@
1
+ module Gherkin
2
+ class RbParser
3
+ # Initialize the parser. +machine_name+ refers to a state machine table.
4
+ def initialize(listener, raise_on_error, machine_name)
5
+ @listener = listener
6
+ @raise_on_error = raise_on_error
7
+ @machines = []
8
+ push_machine(machine_name)
9
+ end
10
+
11
+ # Doesn't yet fall back to super
12
+ def method_missing(method, *args)
13
+ # TODO: Catch exception and call super
14
+ if(event(method.to_s, args[-1]))
15
+ @listener.send(method, *args)
16
+ end
17
+ end
18
+
19
+ def event(ev, line)
20
+ machine.event(ev, line) do |state, expected|
21
+ if @raise_on_error
22
+ raise ParseError.new(state, ev, expected, line)
23
+ else
24
+ @listener.syntax_error(state, ev, expected, line)
25
+ return false
26
+ end
27
+ end
28
+ true
29
+ end
30
+
31
+ def push_machine(name)
32
+ @machines.push(Machine.new(self, name))
33
+ end
34
+
35
+ def pop_machine
36
+ @machines.pop
37
+ end
38
+
39
+ def machine
40
+ @machines[-1]
41
+ end
42
+
43
+ def expected
44
+ machine.expected
45
+ end
46
+
47
+ def force_state(state)
48
+ machine.instance_variable_set('@state', state)
49
+ end
50
+
51
+ class Machine
52
+ def initialize(parser, name)
53
+ @parser = parser
54
+ @name = name
55
+ @transition_map = transition_map(name)
56
+ @state = name
57
+ end
58
+
59
+ def event(ev, line)
60
+ states = @transition_map[@state]
61
+ raise "Unknown state: #{@state.inspect} for machine #{@name}" if states.nil?
62
+ new_state = states[ev]
63
+ case new_state
64
+ when "E"
65
+ yield @state, expected
66
+ when /push\((.+)\)/
67
+ @parser.push_machine($1)
68
+ @parser.event(ev, line)
69
+ when "pop()"
70
+ @parser.pop_machine()
71
+ @parser.event(ev, line)
72
+ else
73
+ raise "Unknown transition: #{ev.inspect} among #{states.inspect} for machine #{@name}" if new_state.nil?
74
+ @state = new_state
75
+ end
76
+ end
77
+
78
+ def expected
79
+ allowed = @transition_map[@state].find_all { |_, action| action != "E" }
80
+ allowed.collect { |state| state[0] }.sort
81
+ end
82
+
83
+ private
84
+
85
+ @@transition_maps = {}
86
+
87
+ def transition_map(name)
88
+ @@transition_maps[name] ||= build_transition_map(name)
89
+ end
90
+
91
+ def build_transition_map(name)
92
+ table = transition_table(name)
93
+ events = table.shift[1..-1]
94
+ table.inject({}) do |machine, actions|
95
+ state = actions.shift
96
+ machine[state] = Hash[*events.zip(actions).flatten]
97
+ machine
98
+ end
99
+ end
100
+
101
+ def transition_table(name)
102
+ state_machine_reader = StateMachineReader.new
103
+ lexer = Gherkin::Lexer['en'].new(state_machine_reader)
104
+ lexer.scan(File.read(File.dirname(__FILE__) + "/parser/#{name}.txt"))
105
+ state_machine_reader.rows
106
+ end
107
+
108
+ class StateMachineReader
109
+ attr_reader :rows
110
+ def table(rows, line_number)
111
+ @rows = rows
112
+ end
113
+ end
114
+
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,77 @@
1
+ module Gherkin
2
+ module Tools
3
+ class PrettyPrinter
4
+ def initialize(io)
5
+ @io = io
6
+ end
7
+
8
+ def tag(name, line)
9
+ @tags ||= []
10
+ @tags << "@#{name}"
11
+ end
12
+
13
+ def comment(content, line)
14
+ @io.puts content
15
+ end
16
+
17
+ def feature(keyword, name, line)
18
+ tags = @tags ? @tags.join(' ') + "\n" : ''
19
+ @tags = nil
20
+ @io.puts "#{tags}#{keyword}: #{indent(name, ' ')}"
21
+ end
22
+
23
+ def background(keyword, name, line)
24
+ @io.puts "\n #{keyword}: #{indent(name, ' ')}"
25
+ end
26
+
27
+ def scenario(keyword, name, line)
28
+ tags = @tags ? ' ' + @tags.join(' ') + "\n" : ''
29
+ @tags = nil
30
+ @io.puts "\n#{tags} #{keyword}: #{indent(name, ' ')}"
31
+ end
32
+
33
+ def scenario_outline(keyword, name, line)
34
+ tags = @tags ? ' ' + @tags.join(' ') + "\n" : ''
35
+ @tags = nil
36
+ @io.puts "\n#{tags} #{keyword}: #{indent(name, ' ')}"
37
+ end
38
+
39
+ def examples(keyword, name, line)
40
+ @io.puts "\n #{keyword}: #{indent(name, ' ')}"
41
+ end
42
+
43
+ def step(keyword, name, line)
44
+ @io.puts " #{keyword} #{indent(name, ' ')}"
45
+ end
46
+
47
+ def table(rows, line)
48
+ rows = rows.to_a.map {|row| row.to_a} if defined?(JRUBY_VERSION) # Convert ArrayList
49
+ max_lengths = rows.transpose.map { |col| col.map { |cell| cell.unpack("U*").length }.max }.flatten
50
+ rows.each do |line|
51
+ @io.puts ' | ' + line.zip(max_lengths).map { |cell, max_length| cell + ' ' * (max_length-cell.unpack("U*").length) }.join(' | ') + ' |'
52
+ end
53
+ end
54
+
55
+ def py_string(string, line)
56
+ @io.puts ' """'
57
+ @io.puts string.gsub(/^/, ' ')
58
+ @io.puts ' """'
59
+ end
60
+
61
+ def syntax_error(state, event, legal_events, line)
62
+ raise "SYNTAX ERROR"
63
+ end
64
+
65
+ private
66
+
67
+ def indent(string, indentation)
68
+ indent = ""
69
+ string.split(/\n/n).map do |l|
70
+ s = "#{indent}#{l}"
71
+ indent = indentation
72
+ s
73
+ end.join("\n")
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1 @@
1
+ *.rl
@@ -0,0 +1,385 @@
1
+ #include <assert.h>
2
+ #include <ruby.h>
3
+
4
+ #if defined(_WIN32)
5
+ #include <stddef.h>
6
+ #endif
7
+
8
+ #ifndef RSTRING_PTR
9
+ #define RSTRING_PTR(s) (RSTRING(s)->ptr)
10
+ #endif
11
+
12
+ #ifndef RSTRING_LEN
13
+ #define RSTRING_LEN(s) (RSTRING(s)->len)
14
+ #endif
15
+
16
+ #define DATA_GET(FROM, TYPE, NAME) \
17
+ Data_Get_Struct(FROM, TYPE, NAME); \
18
+ if (NAME == NULL) { \
19
+ rb_raise(rb_eArgError, "NULL found for " # NAME " when it shouldn't be."); \
20
+ }
21
+
22
+ typedef struct lexer_state {
23
+ int content_len;
24
+ int line_number;
25
+ int current_line;
26
+ int start_col;
27
+ size_t mark;
28
+ size_t keyword_start;
29
+ size_t keyword_end;
30
+ size_t next_keyword_start;
31
+ size_t content_start;
32
+ size_t content_end;
33
+ size_t field_len;
34
+ size_t query_start;
35
+ size_t last_newline;
36
+ size_t final_newline;
37
+ } lexer_state;
38
+
39
+ static VALUE mGherkin;
40
+ static VALUE mLexer;
41
+ static VALUE mCLexer;
42
+ static VALUE cI18nLexer;
43
+ static VALUE rb_eGherkinLexerError;
44
+
45
+ #define LEN(AT, P) (P - data - lexer->AT)
46
+ #define MARK(M, P) (lexer->M = (P) - data)
47
+ #define PTR_TO(P) (data + lexer->P)
48
+
49
+ #define STORE_KW_END_CON(EVENT) \
50
+ store_kw_con(listener, # EVENT, \
51
+ PTR_TO(keyword_start), LEN(keyword_start, PTR_TO(keyword_end - 1)), \
52
+ PTR_TO(content_start), LEN(content_start, PTR_TO(content_end)), \
53
+ lexer->current_line); \
54
+ if (lexer->content_end != 0) { \
55
+ p = PTR_TO(content_end - 1); \
56
+ } \
57
+ lexer->content_end = 0;
58
+
59
+ #define STORE_ATTR(ATTR) \
60
+ store_attr(listener, # ATTR, \
61
+ PTR_TO(content_start), LEN(content_start, p), \
62
+ lexer->line_number);
63
+
64
+ %%{
65
+ machine lexer;
66
+
67
+ action begin_content {
68
+ MARK(content_start, p);
69
+ lexer->current_line = lexer->line_number;
70
+ }
71
+
72
+ action begin_pystring_content {
73
+ MARK(content_start, p);
74
+ }
75
+
76
+ action start_pystring {
77
+ lexer->current_line = lexer->line_number;
78
+ lexer->start_col = p - data - lexer->last_newline;
79
+ }
80
+
81
+ action store_pystring_content {
82
+ int len = LEN(content_start, PTR_TO(final_newline));
83
+
84
+ if (len < 0) len = 0;
85
+
86
+ store_pystring_content(listener, lexer->start_col, PTR_TO(content_start), len, lexer->current_line);
87
+ }
88
+
89
+ action store_feature_content {
90
+ STORE_KW_END_CON(feature)
91
+ }
92
+
93
+ action store_background_content {
94
+ STORE_KW_END_CON(background)
95
+ }
96
+
97
+ action store_scenario_content {
98
+ STORE_KW_END_CON(scenario)
99
+ }
100
+
101
+ action store_scenario_outline_content {
102
+ STORE_KW_END_CON(scenario_outline)
103
+ }
104
+
105
+ action store_examples_content {
106
+ STORE_KW_END_CON(examples)
107
+ }
108
+
109
+ action store_step_content {
110
+ store_kw_con(listener, "step",
111
+ PTR_TO(keyword_start), LEN(keyword_start, PTR_TO(keyword_end)),
112
+ PTR_TO(content_start), LEN(content_start, p),
113
+ lexer->current_line);
114
+ }
115
+
116
+ action store_comment_content {
117
+ STORE_ATTR(comment)
118
+ }
119
+
120
+ action store_tag_content {
121
+ STORE_ATTR(tag)
122
+ }
123
+
124
+ action inc_line_number {
125
+ lexer->line_number += 1;
126
+ MARK(final_newline, p);
127
+ }
128
+
129
+ action last_newline {
130
+ MARK(last_newline, p + 1);
131
+ }
132
+
133
+ action start_keyword {
134
+ if (lexer->mark == 0) {
135
+ MARK(mark, p);
136
+ }
137
+ }
138
+
139
+ action end_keyword {
140
+ MARK(keyword_end, p);
141
+ MARK(keyword_start, PTR_TO(mark));
142
+ MARK(content_start, p + 1);
143
+ lexer->mark = 0;
144
+ }
145
+
146
+ action next_keyword_start {
147
+ MARK(content_end, p);
148
+ }
149
+
150
+ action start_table {
151
+ p = p - 1;
152
+ lexer->current_line = lexer->line_number;
153
+ rb_ary_clear(rows);
154
+ rb_ary_clear(current_row);
155
+ }
156
+
157
+ action begin_cell_content {
158
+ MARK(content_start, p);
159
+ }
160
+
161
+ action store_cell_content {
162
+ VALUE con = Qnil;
163
+ con = rb_str_new(PTR_TO(content_start), LEN(content_start, p));
164
+ rb_funcall(con, rb_intern("strip!"), 0);
165
+
166
+ rb_ary_push(current_row, con);
167
+ }
168
+
169
+ action start_row {
170
+ current_row = rb_ary_new();
171
+ }
172
+
173
+ action store_row {
174
+ rb_ary_push(rows, current_row);
175
+ }
176
+
177
+ action store_table {
178
+ rb_funcall(listener, rb_intern("table"), 2, rows, INT2FIX(lexer->current_line));
179
+ }
180
+
181
+ action end_feature {
182
+ if (cs < lexer_first_final) {
183
+ if (raise_lexer_error != NULL) {
184
+ int count = 0;
185
+ int newstr_count = 0;
186
+ size_t len;
187
+ const char *buff;
188
+ if (lexer->last_newline != 0) {
189
+ len = LEN(last_newline, eof);
190
+ buff = PTR_TO(last_newline);
191
+ } else {
192
+ len = strlen(data);
193
+ buff = data;
194
+ }
195
+
196
+ char newstr[len];
197
+
198
+ for (count = 0; count < len; count++) {
199
+ if(buff[count] == 10) {
200
+ newstr[newstr_count] = '\0'; // terminate new string at first newline found
201
+ break;
202
+ } else {
203
+ if (buff[count] == '%') {
204
+ newstr[newstr_count++] = buff[count];
205
+ newstr[newstr_count] = buff[count];
206
+ } else {
207
+ newstr[newstr_count] = buff[count];
208
+ }
209
+ }
210
+ newstr_count++;
211
+ }
212
+
213
+ int line = lexer->line_number;
214
+ lexer_init(lexer); // Re-initialize so we can scan again with the same lexer
215
+ raise_lexer_error(listener, newstr, line);
216
+ }
217
+ }
218
+ }
219
+
220
+ include lexer_common "lexer_common.<%= i18n_language %>.rl";
221
+
222
+ }%%
223
+
224
+ /** Data **/
225
+ %% write data;
226
+
227
+ static VALUE
228
+ strip_i(VALUE str, VALUE ary)
229
+ {
230
+ rb_funcall(str, rb_intern("strip!"), 0);
231
+ rb_ary_push(ary, str);
232
+
233
+ return Qnil;
234
+ }
235
+
236
+ static VALUE
237
+ multiline_strip(VALUE text)
238
+ {
239
+ VALUE map = rb_ary_new();
240
+ VALUE split = rb_str_split(text, "\n");
241
+
242
+ rb_iterate(rb_each, split, strip_i, map);
243
+
244
+ return rb_ary_join(split, rb_str_new2("\n"));
245
+ }
246
+
247
+ static void
248
+ store_kw_con(VALUE listener, const char * event_name,
249
+ const char * keyword_at, size_t keyword_length,
250
+ const char * at, size_t length,
251
+ int current_line)
252
+ {
253
+ VALUE con = Qnil, kw = Qnil;
254
+ kw = rb_str_new(keyword_at, keyword_length);
255
+ con = rb_str_new(at, length);
256
+ con = multiline_strip(con);
257
+ rb_funcall(con, rb_intern("strip!"), 0);
258
+ rb_funcall(kw, rb_intern("strip!"), 0);
259
+ rb_funcall(listener, rb_intern(event_name), 3, kw, con, INT2FIX(current_line));
260
+ }
261
+
262
+ static void
263
+ store_attr(VALUE listener, const char * attr_type,
264
+ const char * at, size_t length,
265
+ int line)
266
+ {
267
+ VALUE val = rb_str_new(at, length);
268
+ rb_funcall(listener, rb_intern(attr_type), 2, val, INT2FIX(line));
269
+ }
270
+
271
+ static void
272
+ store_pystring_content(VALUE listener,
273
+ int start_col,
274
+ const char *at, size_t length,
275
+ int current_line)
276
+ {
277
+ VALUE con = rb_str_new(at, length);
278
+ // Gherkin will crash gracefully if the string representation of start_col pushes the pattern past 64 characters
279
+ char pat[32];
280
+ snprintf(pat, 32, "^ {0,%d}", start_col);
281
+ VALUE re = rb_reg_regcomp(rb_str_new2(pat));
282
+ rb_funcall(con, rb_intern("gsub!"), 2, re, rb_str_new2(""));
283
+ rb_funcall(listener, rb_intern("py_string"), 2, con, INT2FIX(current_line));
284
+ }
285
+
286
+ static void
287
+ raise_lexer_error(VALUE listener, const char * at, int line)
288
+ {
289
+ rb_raise(rb_eGherkinLexerError, "Lexing error on line %d: '%s'.", line, at);
290
+ }
291
+
292
+ static void lexer_init(lexer_state *lexer) {
293
+ lexer->content_start = 0;
294
+ lexer->content_end = 0;
295
+ lexer->content_len = 0;
296
+ lexer->mark = 0;
297
+ lexer->field_len = 0;
298
+ lexer->keyword_start = 0;
299
+ lexer->keyword_end = 0;
300
+ lexer->next_keyword_start = 0;
301
+ lexer->line_number = 1;
302
+ lexer->last_newline = 0;
303
+ lexer->final_newline = 0;
304
+ lexer->start_col = 0;
305
+ }
306
+
307
+ static VALUE CLexer_alloc(VALUE klass)
308
+ {
309
+ VALUE obj;
310
+ lexer_state *lxr = ALLOC(lexer_state);
311
+ lexer_init(lxr);
312
+
313
+ obj = Data_Wrap_Struct(klass, NULL, -1, lxr);
314
+
315
+ return obj;
316
+ }
317
+
318
+ static VALUE CLexer_init(VALUE self, VALUE listener)
319
+ {
320
+ rb_iv_set(self, "@listener", listener);
321
+
322
+ lexer_state *lxr = NULL;
323
+ DATA_GET(self, lexer_state, lxr);
324
+ lexer_init(lxr);
325
+
326
+ return self;
327
+ }
328
+
329
+ static VALUE CLexer_scan(VALUE self, VALUE input)
330
+ {
331
+ lexer_state *lexer = NULL;
332
+ DATA_GET(self, lexer_state, lexer);
333
+
334
+ rb_str_append(input, rb_str_new2("\n%_FEATURE_END_%"));
335
+ char *data = RSTRING_PTR(input);
336
+ long len = RSTRING_LEN(input);
337
+
338
+ if (len == 0) {
339
+ rb_raise(rb_eGherkinLexerError, "No content to lex.");
340
+ } else {
341
+ const char *p, *pe, *eof;
342
+ int cs = 0;
343
+
344
+ VALUE listener = rb_iv_get(self, "@listener");
345
+ VALUE rows = rb_ary_new();
346
+ VALUE current_row = rb_ary_new();
347
+
348
+ p = data;
349
+ pe = data + len;
350
+ eof = pe;
351
+
352
+ assert(*pe == '\0' && "pointer does not end on NULL");
353
+ assert(pe - p == len && "pointers aren't same distance");
354
+
355
+ %% write init;
356
+ %% write exec;
357
+
358
+ assert(p <= pe && "data overflow after parsing execute");
359
+ assert(lexer->content_start <= len && "content starts after data end");
360
+ assert(lexer->mark < len && "mark is after data end");
361
+ assert(lexer->field_len <= len && "field has length longer than the whole data");
362
+
363
+ // Reset lexer by re-initializing the whole thing
364
+ lexer_init(lexer);
365
+
366
+ if (cs == lexer_error) {
367
+ rb_raise(rb_eGherkinLexerError, "Invalid format, lexing fails.");
368
+ } else {
369
+ return Qtrue;
370
+ }
371
+ }
372
+ }
373
+
374
+ void Init_gherkin_lexer()
375
+ {
376
+ mGherkin = rb_define_module("Gherkin");
377
+ mLexer = rb_const_get(mGherkin, rb_intern("Lexer"));
378
+ rb_eGherkinLexerError = rb_const_get(mLexer, rb_intern("LexingError"));
379
+
380
+ mCLexer = rb_define_module_under(mGherkin, "CLexer");
381
+ cI18nLexer = rb_define_class_under(mCLexer, "En", rb_cObject);
382
+ rb_define_alloc_func(cI18nLexer, CLexer_alloc);
383
+ rb_define_method(cI18nLexer, "initialize", CLexer_init, 1);
384
+ rb_define_method(cI18nLexer, "scan", CLexer_scan, 1);
385
+ }