gherkin 0.0.3-universal-java-1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. data/.gitignore +8 -0
  2. data/LICENSE +20 -0
  3. data/README.rdoc +47 -0
  4. data/Rakefile +48 -0
  5. data/VERSION.yml +4 -0
  6. data/bin/gherkin +10 -0
  7. data/cucumber.yml +3 -0
  8. data/ext/gherkin_lexer/.gitignore +6 -0
  9. data/ext/gherkin_lexer/extconf.rb +6 -0
  10. data/features/feature_parser.feature +206 -0
  11. data/features/native_lexer.feature +19 -0
  12. data/features/parser_with_native_lexer.feature +205 -0
  13. data/features/pretty_printer.feature +11 -0
  14. data/features/step_definitions/gherkin_steps.rb +34 -0
  15. data/features/step_definitions/pretty_printer_steps.rb +51 -0
  16. data/features/steps_parser.feature +46 -0
  17. data/features/support/env.rb +33 -0
  18. data/gherkin.gemspec +177 -0
  19. data/java/.gitignore +2 -0
  20. data/java/Gherkin.iml +24 -0
  21. data/java/build.xml +13 -0
  22. data/java/src/gherkin/FixJava.java +34 -0
  23. data/java/src/gherkin/Lexer.java +5 -0
  24. data/java/src/gherkin/LexingError.java +7 -0
  25. data/java/src/gherkin/Listener.java +27 -0
  26. data/java/src/gherkin/ParseError.java +22 -0
  27. data/java/src/gherkin/Parser.java +185 -0
  28. data/java/src/gherkin/lexer/.gitignore +1 -0
  29. data/java/src/gherkin/parser/StateMachineReader.java +62 -0
  30. data/lib/.gitignore +2 -0
  31. data/lib/gherkin.rb +2 -0
  32. data/lib/gherkin/c_lexer.rb +10 -0
  33. data/lib/gherkin/i18n.yml +535 -0
  34. data/lib/gherkin/i18n_lexer.rb +29 -0
  35. data/lib/gherkin/java_lexer.rb +10 -0
  36. data/lib/gherkin/lexer.rb +42 -0
  37. data/lib/gherkin/parser.rb +19 -0
  38. data/lib/gherkin/parser/meta.txt +4 -0
  39. data/lib/gherkin/parser/root.txt +9 -0
  40. data/lib/gherkin/parser/steps.txt +3 -0
  41. data/lib/gherkin/rb_lexer.rb +9 -0
  42. data/lib/gherkin/rb_lexer/.gitignore +1 -0
  43. data/lib/gherkin/rb_lexer/README.rdoc +8 -0
  44. data/lib/gherkin/rb_parser.rb +117 -0
  45. data/lib/gherkin/tools/pretty_printer.rb +77 -0
  46. data/ragel/i18n/.gitignore +1 -0
  47. data/ragel/lexer.c.rl.erb +385 -0
  48. data/ragel/lexer.java.rl.erb +198 -0
  49. data/ragel/lexer.rb.rl.erb +172 -0
  50. data/ragel/lexer_common.rl.erb +46 -0
  51. data/spec/gherkin/c_lexer_spec.rb +21 -0
  52. data/spec/gherkin/fixtures/complex.feature +43 -0
  53. data/spec/gherkin/fixtures/i18n_fr.feature +13 -0
  54. data/spec/gherkin/fixtures/i18n_no.feature +6 -0
  55. data/spec/gherkin/fixtures/i18n_zh-CN.feature +8 -0
  56. data/spec/gherkin/fixtures/simple.feature +3 -0
  57. data/spec/gherkin/fixtures/simple_with_comments.feature +7 -0
  58. data/spec/gherkin/fixtures/simple_with_tags.feature +11 -0
  59. data/spec/gherkin/i18n_spec.rb +57 -0
  60. data/spec/gherkin/java_lexer_spec.rb +20 -0
  61. data/spec/gherkin/parser_spec.rb +28 -0
  62. data/spec/gherkin/rb_lexer_spec.rb +18 -0
  63. data/spec/gherkin/sexp_recorder.rb +29 -0
  64. data/spec/gherkin/shared/lexer_spec.rb +420 -0
  65. data/spec/gherkin/shared/py_string_spec.rb +112 -0
  66. data/spec/gherkin/shared/table_spec.rb +97 -0
  67. data/spec/gherkin/shared/tags_spec.rb +50 -0
  68. data/spec/spec_helper.rb +53 -0
  69. data/tasks/bench.rake +176 -0
  70. data/tasks/bench/feature_builder.rb +49 -0
  71. data/tasks/bench/generated/.gitignore +1 -0
  72. data/tasks/bench/null_listener.rb +4 -0
  73. data/tasks/cucumber.rake +20 -0
  74. data/tasks/ext.rake +49 -0
  75. data/tasks/ragel.rake +94 -0
  76. data/tasks/rdoc.rake +12 -0
  77. data/tasks/rspec.rake +15 -0
  78. metadata +204 -0
@@ -0,0 +1,29 @@
1
+ require 'gherkin/lexer'
2
+
3
+ module Gherkin
4
+ # The main entry point to lexing Gherkin source.
5
+ class I18nLexer
6
+ LANGUAGE_PATTERN = /language\s*:\s*(.*)/ #:nodoc:
7
+
8
+ def initialize(parser)
9
+ @parser = parser
10
+ end
11
+
12
+ def scan(source)
13
+ lang = lang(source) || 'en'
14
+ delegate = Lexer[lang].new(@parser)
15
+ delegate.scan(source)
16
+ end
17
+
18
+ private
19
+
20
+ def lang(source)
21
+ line_one = source.split(/\n/)[0]
22
+ if line_one =~ LANGUAGE_PATTERN
23
+ $1.strip
24
+ else
25
+ nil
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,10 @@
1
+ require 'gherkin.jar'
2
+
3
+ module Gherkin
4
+ module JavaLexer
5
+ def self.[](i18n_language)
6
+ i18n_lexer_class_name = i18n_language.gsub(/[\s-]/, '').capitalize
7
+ Java::GherkinLexer.__send__(i18n_lexer_class_name)
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,42 @@
1
+ module Gherkin
2
+ module Lexer
3
+ I18nLexerNotFound = Class.new(LoadError)
4
+ LexingError = Class.new(StandardError)
5
+
6
+ class << self
7
+ def [](i18n_lang)
8
+ begin
9
+ if defined?(JRUBY_VERSION)
10
+ java[i18n_lang]
11
+ else
12
+ begin
13
+ c[i18n_lang]
14
+ rescue NameError => e
15
+ warn("WARNING: #{e.message}. Reverting to Ruby lexer")
16
+ rb[i18n_lang]
17
+ rescue LoadError
18
+ rb[i18n_lang]
19
+ end
20
+ end
21
+ rescue LoadError
22
+ raise I18nLexerNotFound, "No lexer was found for #{i18n_lang}. Supported languages are listed in gherkin/i18n.yml."
23
+ end
24
+ end
25
+
26
+ def c
27
+ require 'gherkin/c_lexer'
28
+ CLexer
29
+ end
30
+
31
+ def java
32
+ require 'gherkin/java_lexer'
33
+ JavaLexer
34
+ end
35
+
36
+ def rb
37
+ require 'gherkin/rb_lexer'
38
+ RbLexer
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,19 @@
1
+ module Gherkin
2
+ class ParseError < StandardError
3
+ def initialize(state, new_state, expected_states, line)
4
+ super("Parse error on line #{line}. Found #{new_state} when expecting one of: #{expected_states.join(', ')}. (Current state: #{state}).")
5
+ end
6
+ end
7
+
8
+ class Parser
9
+ def self.new(listener, raise_on_error=false, machine_names='root')
10
+ if defined?(JRUBY_VERSION)
11
+ require 'gherkin.jar'
12
+ Java::Gherkin::Parser.new(listener, raise_on_error, machine_names)
13
+ else
14
+ require 'gherkin/rb_parser'
15
+ Gherkin::RbParser.new(listener, raise_on_error, machine_names)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,4 @@
1
+ | | feature | background | scenario | scenario_outline | examples | step | table | py_string | comment | tag |
2
+ | meta | E | E | E | E | E | E | E | E | comment | tag |
3
+ | comment | pop() | pop() | pop() | pop() | pop() | pop() | pop() | pop() | pop() | tag |
4
+ | tag | pop() | E | pop() | pop() | pop() | E | E | E | E | tag |
@@ -0,0 +1,9 @@
1
+ | | feature | background | scenario | scenario_outline | examples | step | table | py_string | comment | tag |
2
+ | root | feature | E | E | E | E | E | E | E | push(meta) | push(meta) |
3
+ | feature | E | background | scenario | scenario_outline | E | E | E | E | push(meta) | push(meta) |
4
+ | step | E | E | scenario | scenario_outline | examples | step | step | step | push(meta) | push(meta) |
5
+ | background | E | E | scenario | scenario_outline | E | step | E | E | push(meta) | push(meta) |
6
+ | scenario | E | E | scenario | scenario_outline | E | step | E | E | push(meta) | push(meta) |
7
+ | scenario_outline | E | E | E | E | E | step | E | E | push(meta) | push(meta) |
8
+ | examples | E | E | E | E | E | E | examples_table | E | push(meta) | push(meta) |
9
+ | examples_table | E | E | scenario | scenario_outline | examples | E | E | E | push(meta) | push(meta) |
@@ -0,0 +1,3 @@
1
+ | | feature | background | scenario | scenario_outline | examples | step | table | py_string | comment | tag |
2
+ | steps | E | E | E | E | E | step | E | E | E | E |
3
+ | step | E | E | E | E | E | step | steps | steps | E | E |
@@ -0,0 +1,9 @@
1
+ module Gherkin
2
+ module RbLexer
3
+ def self.[](i18n_language)
4
+ require "gherkin/rb_lexer/#{i18n_language}"
5
+ i18n_lexer_class_name = i18n_language.gsub(/[\s-]/, '').capitalize
6
+ const_get(i18n_lexer_class_name)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1 @@
1
+ *.rb
@@ -0,0 +1,8 @@
1
+ = Lexers
2
+
3
+ Gherkin support lexing of lots of natural languages, defined by gherkin/i18n.yml
4
+ The lexers are generated with the following command:
5
+
6
+ rake ragel:i18n
7
+
8
+ You have to run this command if you modify gherkin/i18n.yml
@@ -0,0 +1,117 @@
1
+ module Gherkin
2
+ class RbParser
3
+ # Initialize the parser. +machine_name+ refers to a state machine table.
4
+ def initialize(listener, raise_on_error, machine_name)
5
+ @listener = listener
6
+ @raise_on_error = raise_on_error
7
+ @machines = []
8
+ push_machine(machine_name)
9
+ end
10
+
11
+ # Doesn't yet fall back to super
12
+ def method_missing(method, *args)
13
+ # TODO: Catch exception and call super
14
+ if(event(method.to_s, args[-1]))
15
+ @listener.send(method, *args)
16
+ end
17
+ end
18
+
19
+ def event(ev, line)
20
+ machine.event(ev, line) do |state, expected|
21
+ if @raise_on_error
22
+ raise ParseError.new(state, ev, expected, line)
23
+ else
24
+ @listener.syntax_error(state, ev, expected, line)
25
+ return false
26
+ end
27
+ end
28
+ true
29
+ end
30
+
31
+ def push_machine(name)
32
+ @machines.push(Machine.new(self, name))
33
+ end
34
+
35
+ def pop_machine
36
+ @machines.pop
37
+ end
38
+
39
+ def machine
40
+ @machines[-1]
41
+ end
42
+
43
+ def expected
44
+ machine.expected
45
+ end
46
+
47
+ def force_state(state)
48
+ machine.instance_variable_set('@state', state)
49
+ end
50
+
51
+ class Machine
52
+ def initialize(parser, name)
53
+ @parser = parser
54
+ @name = name
55
+ @transition_map = transition_map(name)
56
+ @state = name
57
+ end
58
+
59
+ def event(ev, line)
60
+ states = @transition_map[@state]
61
+ raise "Unknown state: #{@state.inspect} for machine #{@name}" if states.nil?
62
+ new_state = states[ev]
63
+ case new_state
64
+ when "E"
65
+ yield @state, expected
66
+ when /push\((.+)\)/
67
+ @parser.push_machine($1)
68
+ @parser.event(ev, line)
69
+ when "pop()"
70
+ @parser.pop_machine()
71
+ @parser.event(ev, line)
72
+ else
73
+ raise "Unknown transition: #{ev.inspect} among #{states.inspect} for machine #{@name}" if new_state.nil?
74
+ @state = new_state
75
+ end
76
+ end
77
+
78
+ def expected
79
+ allowed = @transition_map[@state].find_all { |_, action| action != "E" }
80
+ allowed.collect { |state| state[0] }.sort
81
+ end
82
+
83
+ private
84
+
85
+ @@transition_maps = {}
86
+
87
+ def transition_map(name)
88
+ @@transition_maps[name] ||= build_transition_map(name)
89
+ end
90
+
91
+ def build_transition_map(name)
92
+ table = transition_table(name)
93
+ events = table.shift[1..-1]
94
+ table.inject({}) do |machine, actions|
95
+ state = actions.shift
96
+ machine[state] = Hash[*events.zip(actions).flatten]
97
+ machine
98
+ end
99
+ end
100
+
101
+ def transition_table(name)
102
+ state_machine_reader = StateMachineReader.new
103
+ lexer = Gherkin::Lexer['en'].new(state_machine_reader)
104
+ lexer.scan(File.read(File.dirname(__FILE__) + "/parser/#{name}.txt"))
105
+ state_machine_reader.rows
106
+ end
107
+
108
+ class StateMachineReader
109
+ attr_reader :rows
110
+ def table(rows, line_number)
111
+ @rows = rows
112
+ end
113
+ end
114
+
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,77 @@
1
+ module Gherkin
2
+ module Tools
3
+ class PrettyPrinter
4
+ def initialize(io)
5
+ @io = io
6
+ end
7
+
8
+ def tag(name, line)
9
+ @tags ||= []
10
+ @tags << "@#{name}"
11
+ end
12
+
13
+ def comment(content, line)
14
+ @io.puts content
15
+ end
16
+
17
+ def feature(keyword, name, line)
18
+ tags = @tags ? @tags.join(' ') + "\n" : ''
19
+ @tags = nil
20
+ @io.puts "#{tags}#{keyword}: #{indent(name, ' ')}"
21
+ end
22
+
23
+ def background(keyword, name, line)
24
+ @io.puts "\n #{keyword}: #{indent(name, ' ')}"
25
+ end
26
+
27
+ def scenario(keyword, name, line)
28
+ tags = @tags ? ' ' + @tags.join(' ') + "\n" : ''
29
+ @tags = nil
30
+ @io.puts "\n#{tags} #{keyword}: #{indent(name, ' ')}"
31
+ end
32
+
33
+ def scenario_outline(keyword, name, line)
34
+ tags = @tags ? ' ' + @tags.join(' ') + "\n" : ''
35
+ @tags = nil
36
+ @io.puts "\n#{tags} #{keyword}: #{indent(name, ' ')}"
37
+ end
38
+
39
+ def examples(keyword, name, line)
40
+ @io.puts "\n #{keyword}: #{indent(name, ' ')}"
41
+ end
42
+
43
+ def step(keyword, name, line)
44
+ @io.puts " #{keyword} #{indent(name, ' ')}"
45
+ end
46
+
47
+ def table(rows, line)
48
+ rows = rows.to_a.map {|row| row.to_a} if defined?(JRUBY_VERSION) # Convert ArrayList
49
+ max_lengths = rows.transpose.map { |col| col.map { |cell| cell.unpack("U*").length }.max }.flatten
50
+ rows.each do |line|
51
+ @io.puts ' | ' + line.zip(max_lengths).map { |cell, max_length| cell + ' ' * (max_length-cell.unpack("U*").length) }.join(' | ') + ' |'
52
+ end
53
+ end
54
+
55
+ def py_string(string, line)
56
+ @io.puts ' """'
57
+ @io.puts string.gsub(/^/, ' ')
58
+ @io.puts ' """'
59
+ end
60
+
61
+ def syntax_error(state, event, legal_events, line)
62
+ raise "SYNTAX ERROR"
63
+ end
64
+
65
+ private
66
+
67
+ def indent(string, indentation)
68
+ indent = ""
69
+ string.split(/\n/n).map do |l|
70
+ s = "#{indent}#{l}"
71
+ indent = indentation
72
+ s
73
+ end.join("\n")
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1 @@
1
+ *.rl
@@ -0,0 +1,385 @@
1
+ #include <assert.h>
2
+ #include <ruby.h>
3
+
4
+ #if defined(_WIN32)
5
+ #include <stddef.h>
6
+ #endif
7
+
8
+ #ifndef RSTRING_PTR
9
+ #define RSTRING_PTR(s) (RSTRING(s)->ptr)
10
+ #endif
11
+
12
+ #ifndef RSTRING_LEN
13
+ #define RSTRING_LEN(s) (RSTRING(s)->len)
14
+ #endif
15
+
16
+ #define DATA_GET(FROM, TYPE, NAME) \
17
+ Data_Get_Struct(FROM, TYPE, NAME); \
18
+ if (NAME == NULL) { \
19
+ rb_raise(rb_eArgError, "NULL found for " # NAME " when it shouldn't be."); \
20
+ }
21
+
22
+ typedef struct lexer_state {
23
+ int content_len;
24
+ int line_number;
25
+ int current_line;
26
+ int start_col;
27
+ size_t mark;
28
+ size_t keyword_start;
29
+ size_t keyword_end;
30
+ size_t next_keyword_start;
31
+ size_t content_start;
32
+ size_t content_end;
33
+ size_t field_len;
34
+ size_t query_start;
35
+ size_t last_newline;
36
+ size_t final_newline;
37
+ } lexer_state;
38
+
39
+ static VALUE mGherkin;
40
+ static VALUE mLexer;
41
+ static VALUE mCLexer;
42
+ static VALUE cI18nLexer;
43
+ static VALUE rb_eGherkinLexerError;
44
+
45
+ #define LEN(AT, P) (P - data - lexer->AT)
46
+ #define MARK(M, P) (lexer->M = (P) - data)
47
+ #define PTR_TO(P) (data + lexer->P)
48
+
49
+ #define STORE_KW_END_CON(EVENT) \
50
+ store_kw_con(listener, # EVENT, \
51
+ PTR_TO(keyword_start), LEN(keyword_start, PTR_TO(keyword_end - 1)), \
52
+ PTR_TO(content_start), LEN(content_start, PTR_TO(content_end)), \
53
+ lexer->current_line); \
54
+ if (lexer->content_end != 0) { \
55
+ p = PTR_TO(content_end - 1); \
56
+ } \
57
+ lexer->content_end = 0;
58
+
59
+ #define STORE_ATTR(ATTR) \
60
+ store_attr(listener, # ATTR, \
61
+ PTR_TO(content_start), LEN(content_start, p), \
62
+ lexer->line_number);
63
+
64
+ %%{
65
+ machine lexer;
66
+
67
+ action begin_content {
68
+ MARK(content_start, p);
69
+ lexer->current_line = lexer->line_number;
70
+ }
71
+
72
+ action begin_pystring_content {
73
+ MARK(content_start, p);
74
+ }
75
+
76
+ action start_pystring {
77
+ lexer->current_line = lexer->line_number;
78
+ lexer->start_col = p - data - lexer->last_newline;
79
+ }
80
+
81
+ action store_pystring_content {
82
+ int len = LEN(content_start, PTR_TO(final_newline));
83
+
84
+ if (len < 0) len = 0;
85
+
86
+ store_pystring_content(listener, lexer->start_col, PTR_TO(content_start), len, lexer->current_line);
87
+ }
88
+
89
+ action store_feature_content {
90
+ STORE_KW_END_CON(feature)
91
+ }
92
+
93
+ action store_background_content {
94
+ STORE_KW_END_CON(background)
95
+ }
96
+
97
+ action store_scenario_content {
98
+ STORE_KW_END_CON(scenario)
99
+ }
100
+
101
+ action store_scenario_outline_content {
102
+ STORE_KW_END_CON(scenario_outline)
103
+ }
104
+
105
+ action store_examples_content {
106
+ STORE_KW_END_CON(examples)
107
+ }
108
+
109
+ action store_step_content {
110
+ store_kw_con(listener, "step",
111
+ PTR_TO(keyword_start), LEN(keyword_start, PTR_TO(keyword_end)),
112
+ PTR_TO(content_start), LEN(content_start, p),
113
+ lexer->current_line);
114
+ }
115
+
116
+ action store_comment_content {
117
+ STORE_ATTR(comment)
118
+ }
119
+
120
+ action store_tag_content {
121
+ STORE_ATTR(tag)
122
+ }
123
+
124
+ action inc_line_number {
125
+ lexer->line_number += 1;
126
+ MARK(final_newline, p);
127
+ }
128
+
129
+ action last_newline {
130
+ MARK(last_newline, p + 1);
131
+ }
132
+
133
+ action start_keyword {
134
+ if (lexer->mark == 0) {
135
+ MARK(mark, p);
136
+ }
137
+ }
138
+
139
+ action end_keyword {
140
+ MARK(keyword_end, p);
141
+ MARK(keyword_start, PTR_TO(mark));
142
+ MARK(content_start, p + 1);
143
+ lexer->mark = 0;
144
+ }
145
+
146
+ action next_keyword_start {
147
+ MARK(content_end, p);
148
+ }
149
+
150
+ action start_table {
151
+ p = p - 1;
152
+ lexer->current_line = lexer->line_number;
153
+ rb_ary_clear(rows);
154
+ rb_ary_clear(current_row);
155
+ }
156
+
157
+ action begin_cell_content {
158
+ MARK(content_start, p);
159
+ }
160
+
161
+ action store_cell_content {
162
+ VALUE con = Qnil;
163
+ con = rb_str_new(PTR_TO(content_start), LEN(content_start, p));
164
+ rb_funcall(con, rb_intern("strip!"), 0);
165
+
166
+ rb_ary_push(current_row, con);
167
+ }
168
+
169
+ action start_row {
170
+ current_row = rb_ary_new();
171
+ }
172
+
173
+ action store_row {
174
+ rb_ary_push(rows, current_row);
175
+ }
176
+
177
+ action store_table {
178
+ rb_funcall(listener, rb_intern("table"), 2, rows, INT2FIX(lexer->current_line));
179
+ }
180
+
181
+ action end_feature {
182
+ if (cs < lexer_first_final) {
183
+ if (raise_lexer_error != NULL) {
184
+ int count = 0;
185
+ int newstr_count = 0;
186
+ size_t len;
187
+ const char *buff;
188
+ if (lexer->last_newline != 0) {
189
+ len = LEN(last_newline, eof);
190
+ buff = PTR_TO(last_newline);
191
+ } else {
192
+ len = strlen(data);
193
+ buff = data;
194
+ }
195
+
196
+ char newstr[len];
197
+
198
+ for (count = 0; count < len; count++) {
199
+ if(buff[count] == 10) {
200
+ newstr[newstr_count] = '\0'; // terminate new string at first newline found
201
+ break;
202
+ } else {
203
+ if (buff[count] == '%') {
204
+ newstr[newstr_count++] = buff[count];
205
+ newstr[newstr_count] = buff[count];
206
+ } else {
207
+ newstr[newstr_count] = buff[count];
208
+ }
209
+ }
210
+ newstr_count++;
211
+ }
212
+
213
+ int line = lexer->line_number;
214
+ lexer_init(lexer); // Re-initialize so we can scan again with the same lexer
215
+ raise_lexer_error(listener, newstr, line);
216
+ }
217
+ }
218
+ }
219
+
220
+ include lexer_common "lexer_common.<%= i18n_language %>.rl";
221
+
222
+ }%%
223
+
224
+ /** Data **/
225
+ %% write data;
226
+
227
+ static VALUE
228
+ strip_i(VALUE str, VALUE ary)
229
+ {
230
+ rb_funcall(str, rb_intern("strip!"), 0);
231
+ rb_ary_push(ary, str);
232
+
233
+ return Qnil;
234
+ }
235
+
236
+ static VALUE
237
+ multiline_strip(VALUE text)
238
+ {
239
+ VALUE map = rb_ary_new();
240
+ VALUE split = rb_str_split(text, "\n");
241
+
242
+ rb_iterate(rb_each, split, strip_i, map);
243
+
244
+ return rb_ary_join(split, rb_str_new2("\n"));
245
+ }
246
+
247
+ static void
248
+ store_kw_con(VALUE listener, const char * event_name,
249
+ const char * keyword_at, size_t keyword_length,
250
+ const char * at, size_t length,
251
+ int current_line)
252
+ {
253
+ VALUE con = Qnil, kw = Qnil;
254
+ kw = rb_str_new(keyword_at, keyword_length);
255
+ con = rb_str_new(at, length);
256
+ con = multiline_strip(con);
257
+ rb_funcall(con, rb_intern("strip!"), 0);
258
+ rb_funcall(kw, rb_intern("strip!"), 0);
259
+ rb_funcall(listener, rb_intern(event_name), 3, kw, con, INT2FIX(current_line));
260
+ }
261
+
262
+ static void
263
+ store_attr(VALUE listener, const char * attr_type,
264
+ const char * at, size_t length,
265
+ int line)
266
+ {
267
+ VALUE val = rb_str_new(at, length);
268
+ rb_funcall(listener, rb_intern(attr_type), 2, val, INT2FIX(line));
269
+ }
270
+
271
+ static void
272
+ store_pystring_content(VALUE listener,
273
+ int start_col,
274
+ const char *at, size_t length,
275
+ int current_line)
276
+ {
277
+ VALUE con = rb_str_new(at, length);
278
+ // Gherkin will crash gracefully if the string representation of start_col pushes the pattern past 64 characters
279
+ char pat[32];
280
+ snprintf(pat, 32, "^ {0,%d}", start_col);
281
+ VALUE re = rb_reg_regcomp(rb_str_new2(pat));
282
+ rb_funcall(con, rb_intern("gsub!"), 2, re, rb_str_new2(""));
283
+ rb_funcall(listener, rb_intern("py_string"), 2, con, INT2FIX(current_line));
284
+ }
285
+
286
+ static void
287
+ raise_lexer_error(VALUE listener, const char * at, int line)
288
+ {
289
+ rb_raise(rb_eGherkinLexerError, "Lexing error on line %d: '%s'.", line, at);
290
+ }
291
+
292
+ static void lexer_init(lexer_state *lexer) {
293
+ lexer->content_start = 0;
294
+ lexer->content_end = 0;
295
+ lexer->content_len = 0;
296
+ lexer->mark = 0;
297
+ lexer->field_len = 0;
298
+ lexer->keyword_start = 0;
299
+ lexer->keyword_end = 0;
300
+ lexer->next_keyword_start = 0;
301
+ lexer->line_number = 1;
302
+ lexer->last_newline = 0;
303
+ lexer->final_newline = 0;
304
+ lexer->start_col = 0;
305
+ }
306
+
307
+ static VALUE CLexer_alloc(VALUE klass)
308
+ {
309
+ VALUE obj;
310
+ lexer_state *lxr = ALLOC(lexer_state);
311
+ lexer_init(lxr);
312
+
313
+ obj = Data_Wrap_Struct(klass, NULL, -1, lxr);
314
+
315
+ return obj;
316
+ }
317
+
318
+ static VALUE CLexer_init(VALUE self, VALUE listener)
319
+ {
320
+ rb_iv_set(self, "@listener", listener);
321
+
322
+ lexer_state *lxr = NULL;
323
+ DATA_GET(self, lexer_state, lxr);
324
+ lexer_init(lxr);
325
+
326
+ return self;
327
+ }
328
+
329
+ static VALUE CLexer_scan(VALUE self, VALUE input)
330
+ {
331
+ lexer_state *lexer = NULL;
332
+ DATA_GET(self, lexer_state, lexer);
333
+
334
+ rb_str_append(input, rb_str_new2("\n%_FEATURE_END_%"));
335
+ char *data = RSTRING_PTR(input);
336
+ long len = RSTRING_LEN(input);
337
+
338
+ if (len == 0) {
339
+ rb_raise(rb_eGherkinLexerError, "No content to lex.");
340
+ } else {
341
+ const char *p, *pe, *eof;
342
+ int cs = 0;
343
+
344
+ VALUE listener = rb_iv_get(self, "@listener");
345
+ VALUE rows = rb_ary_new();
346
+ VALUE current_row = rb_ary_new();
347
+
348
+ p = data;
349
+ pe = data + len;
350
+ eof = pe;
351
+
352
+ assert(*pe == '\0' && "pointer does not end on NULL");
353
+ assert(pe - p == len && "pointers aren't same distance");
354
+
355
+ %% write init;
356
+ %% write exec;
357
+
358
+ assert(p <= pe && "data overflow after parsing execute");
359
+ assert(lexer->content_start <= len && "content starts after data end");
360
+ assert(lexer->mark < len && "mark is after data end");
361
+ assert(lexer->field_len <= len && "field has length longer than the whole data");
362
+
363
+ // Reset lexer by re-initializing the whole thing
364
+ lexer_init(lexer);
365
+
366
+ if (cs == lexer_error) {
367
+ rb_raise(rb_eGherkinLexerError, "Invalid format, lexing fails.");
368
+ } else {
369
+ return Qtrue;
370
+ }
371
+ }
372
+ }
373
+
374
+ void Init_gherkin_lexer()
375
+ {
376
+ mGherkin = rb_define_module("Gherkin");
377
+ mLexer = rb_const_get(mGherkin, rb_intern("Lexer"));
378
+ rb_eGherkinLexerError = rb_const_get(mLexer, rb_intern("LexingError"));
379
+
380
+ mCLexer = rb_define_module_under(mGherkin, "CLexer");
381
+ cI18nLexer = rb_define_class_under(mCLexer, "En", rb_cObject);
382
+ rb_define_alloc_func(cI18nLexer, CLexer_alloc);
383
+ rb_define_method(cI18nLexer, "initialize", CLexer_init, 1);
384
+ rb_define_method(cI18nLexer, "scan", CLexer_scan, 1);
385
+ }