gherkin 1.0.30-universal-dotnet

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. data/.gitattributes +2 -0
  2. data/.gitignore +9 -0
  3. data/.mailmap +2 -0
  4. data/History.txt +187 -0
  5. data/LICENSE +20 -0
  6. data/README.rdoc +59 -0
  7. data/Rakefile +58 -0
  8. data/VERSION.yml +5 -0
  9. data/bin/gherkin +5 -0
  10. data/cucumber.yml +3 -0
  11. data/features/escaped_pipes.feature +8 -0
  12. data/features/feature_parser.feature +226 -0
  13. data/features/native_lexer.feature +19 -0
  14. data/features/parser_with_native_lexer.feature +205 -0
  15. data/features/pretty_printer.feature +14 -0
  16. data/features/step_definitions/eyeball_steps.rb +3 -0
  17. data/features/step_definitions/gherkin_steps.rb +30 -0
  18. data/features/step_definitions/pretty_formatter_steps.rb +55 -0
  19. data/features/steps_parser.feature +46 -0
  20. data/features/support/env.rb +33 -0
  21. data/ikvm/.gitignore +3 -0
  22. data/java/.gitignore +2 -0
  23. data/java/src/main/java/gherkin/lexer/.gitignore +1 -0
  24. data/java/src/main/resources/gherkin/.gitignore +1 -0
  25. data/lib/.gitignore +4 -0
  26. data/lib/gherkin.rb +2 -0
  27. data/lib/gherkin/c_lexer.rb +17 -0
  28. data/lib/gherkin/cli/main.rb +33 -0
  29. data/lib/gherkin/formatter/argument.rb +27 -0
  30. data/lib/gherkin/formatter/colors.rb +119 -0
  31. data/lib/gherkin/formatter/escaping.rb +15 -0
  32. data/lib/gherkin/formatter/monochrome_format.rb +9 -0
  33. data/lib/gherkin/formatter/pretty_formatter.rb +168 -0
  34. data/lib/gherkin/i18n.rb +176 -0
  35. data/lib/gherkin/i18n.yml +588 -0
  36. data/lib/gherkin/i18n_lexer.rb +38 -0
  37. data/lib/gherkin/native.rb +7 -0
  38. data/lib/gherkin/native/ikvm.rb +55 -0
  39. data/lib/gherkin/native/java.rb +47 -0
  40. data/lib/gherkin/native/null.rb +9 -0
  41. data/lib/gherkin/parser/event.rb +45 -0
  42. data/lib/gherkin/parser/filter_listener.rb +199 -0
  43. data/lib/gherkin/parser/meta.txt +5 -0
  44. data/lib/gherkin/parser/parser.rb +142 -0
  45. data/lib/gherkin/parser/root.txt +11 -0
  46. data/lib/gherkin/parser/steps.txt +4 -0
  47. data/lib/gherkin/parser/tag_expression.rb +50 -0
  48. data/lib/gherkin/rb_lexer.rb +8 -0
  49. data/lib/gherkin/rb_lexer/.gitignore +1 -0
  50. data/lib/gherkin/rb_lexer/README.rdoc +8 -0
  51. data/lib/gherkin/rubify.rb +18 -0
  52. data/lib/gherkin/tools.rb +8 -0
  53. data/lib/gherkin/tools/files.rb +35 -0
  54. data/lib/gherkin/tools/reformat.rb +19 -0
  55. data/lib/gherkin/tools/stats.rb +21 -0
  56. data/lib/gherkin/tools/stats_listener.rb +57 -0
  57. data/ragel/i18n/.gitignore +1 -0
  58. data/ragel/lexer.c.rl.erb +425 -0
  59. data/ragel/lexer.java.rl.erb +216 -0
  60. data/ragel/lexer.rb.rl.erb +173 -0
  61. data/ragel/lexer_common.rl.erb +50 -0
  62. data/spec/gherkin/c_lexer_spec.rb +21 -0
  63. data/spec/gherkin/csharp_lexer_spec.rb +20 -0
  64. data/spec/gherkin/fixtures/1.feature +8 -0
  65. data/spec/gherkin/fixtures/comments_in_table.feature +9 -0
  66. data/spec/gherkin/fixtures/complex.feature +45 -0
  67. data/spec/gherkin/fixtures/dos_line_endings.feature +45 -0
  68. data/spec/gherkin/fixtures/i18n_fr.feature +14 -0
  69. data/spec/gherkin/fixtures/i18n_no.feature +7 -0
  70. data/spec/gherkin/fixtures/i18n_zh-CN.feature +9 -0
  71. data/spec/gherkin/fixtures/simple_with_comments.feature +7 -0
  72. data/spec/gherkin/fixtures/simple_with_tags.feature +11 -0
  73. data/spec/gherkin/fixtures/with_bom.feature +3 -0
  74. data/spec/gherkin/formatter/argument_spec.rb +28 -0
  75. data/spec/gherkin/formatter/colors_spec.rb +19 -0
  76. data/spec/gherkin/formatter/pretty_formatter_spec.rb +162 -0
  77. data/spec/gherkin/formatter/spaces.feature +9 -0
  78. data/spec/gherkin/formatter/tabs.feature +9 -0
  79. data/spec/gherkin/i18n_lexer_spec.rb +26 -0
  80. data/spec/gherkin/i18n_spec.rb +144 -0
  81. data/spec/gherkin/java_lexer_spec.rb +21 -0
  82. data/spec/gherkin/parser/filter_listener_spec.rb +390 -0
  83. data/spec/gherkin/parser/parser_spec.rb +50 -0
  84. data/spec/gherkin/parser/tag_expression_spec.rb +116 -0
  85. data/spec/gherkin/rb_lexer_spec.rb +19 -0
  86. data/spec/gherkin/sexp_recorder.rb +32 -0
  87. data/spec/gherkin/shared/lexer_spec.rb +550 -0
  88. data/spec/gherkin/shared/py_string_spec.rb +150 -0
  89. data/spec/gherkin/shared/row_spec.rb +104 -0
  90. data/spec/gherkin/shared/tags_spec.rb +50 -0
  91. data/spec/spec_helper.rb +87 -0
  92. data/tasks/bench.rake +188 -0
  93. data/tasks/bench/feature_builder.rb +49 -0
  94. data/tasks/bench/generated/.gitignore +1 -0
  95. data/tasks/bench/null_listener.rb +4 -0
  96. data/tasks/compile.rake +89 -0
  97. data/tasks/cucumber.rake +26 -0
  98. data/tasks/gems.rake +45 -0
  99. data/tasks/ikvm.rake +47 -0
  100. data/tasks/ragel_task.rb +70 -0
  101. data/tasks/rdoc.rake +12 -0
  102. data/tasks/release.rake +26 -0
  103. data/tasks/rspec.rake +15 -0
  104. metadata +257 -0
@@ -0,0 +1,11 @@
1
+ | | feature | background | scenario | scenario_outline | examples | step | row | py_string | eof | comment | tag |
2
+ | root | feature | E | E | E | E | E | E | E | eof | push(meta) | push(meta) |
3
+ | feature | E | background | scenario | scenario_outline | E | E | E | E | eof | push(meta) | push(meta) |
4
+ | step | E | E | scenario | scenario_outline | E | step | step | step | eof | push(meta) | push(meta) |
5
+ | outline_step | E | E | scenario | scenario_outline | examples | outline_step | outline_step | outline_step | eof | push(meta) | push(meta) |
6
+ | background | E | E | scenario | scenario_outline | E | step | E | E | eof | push(meta) | push(meta) |
7
+ | scenario | E | E | scenario | scenario_outline | E | step | E | E | eof | push(meta) | push(meta) |
8
+ | scenario_outline | E | E | E | E | E | outline_step | E | E | eof | push(meta) | push(meta) |
9
+ | examples | E | E | E | E | E | E | examples_table | E | eof | push(meta) | push(meta) |
10
+ | examples_table | E | E | scenario | scenario_outline | examples | E | examples_table | E | eof | push(meta) | push(meta) |
11
+ | eof | E | E | E | E | E | E | E | E | E | E | E |
@@ -0,0 +1,4 @@
1
+ | | feature | background | scenario | scenario_outline | examples | step | row | py_string | eof | comment | tag |
2
+ | steps | E | E | E | E | E | step | E | E | eof | E | E |
3
+ | step | E | E | E | E | E | step | step | steps | eof | E | E |
4
+ | eof | E | E | E | E | E | E | E | E | E | E | E |
@@ -0,0 +1,50 @@
1
+ require 'gherkin/native'
2
+
3
+ module Gherkin
4
+ module Parser
5
+ class TagExpression
6
+ native_impl('gherkin')
7
+
8
+ attr_reader :limits
9
+
10
+ def initialize(tag_expressions)
11
+ @ands = []
12
+ @limits = {}
13
+ tag_expressions.each do |expr|
14
+ add(expr.strip.split(/\s*,\s*/))
15
+ end
16
+ end
17
+
18
+ def empty?
19
+ @ands.empty?
20
+ end
21
+
22
+ def eval(tags)
23
+ return true if @ands.flatten.empty?
24
+ vars = Hash[*tags.map{|tag| [tag, true]}.flatten]
25
+ !!Kernel.eval(ruby_expression)
26
+ end
27
+
28
+ private
29
+
30
+ def add(tags)
31
+ negatives, positives = tags.partition{|tag| tag =~ /^~/}
32
+ positive_limits = Hash[*positives.map{|positive| tag, limit = positive.split(':'); [tag, limit ? limit.to_i : nil]}.flatten]
33
+ @limits.merge!(positive_limits)
34
+ @ands << (negatives + positive_limits.keys)
35
+ end
36
+
37
+ def ruby_expression
38
+ "(" + @ands.map do |ors|
39
+ ors.map do |tag|
40
+ if tag =~ /^~(.*)/
41
+ "!vars['#{$1}']"
42
+ else
43
+ "vars['#{tag}']"
44
+ end
45
+ end.join("||")
46
+ end.join(")&&(") + ")"
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,8 @@
1
+ module Gherkin
2
+ module RbLexer
3
+ def self.[](i18n_underscored_iso_code)
4
+ require "gherkin/rb_lexer/#{i18n_underscored_iso_code}"
5
+ const_get(i18n_underscored_iso_code.capitalize)
6
+ end
7
+ end
8
+ end
@@ -0,0 +1 @@
1
+ *.rb
@@ -0,0 +1,8 @@
1
+ = Lexers
2
+
3
+ Gherkin support lexing of lots of natural languages, defined by gherkin/i18n.yml
4
+ The lexers are generated with the following command:
5
+
6
+ rake ragel:i18n
7
+
8
+ You have to run this command if you modify gherkin/i18n.yml
@@ -0,0 +1,18 @@
1
+ module Gherkin
2
+ module Rubify
3
+ if defined?(JRUBY_VERSION)
4
+ # Translate Java objects to Ruby.
5
+ def rubify(o)
6
+ if Java.java.util.Collection === o || Array === o
7
+ o.map{|e| rubify(e)}
8
+ else
9
+ o
10
+ end
11
+ end
12
+ else
13
+ def rubify(o)
14
+ o
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,8 @@
1
+ module Gherkin
2
+ module Tools
3
+ SUB_COMMANDS = %w(stats reformat)
4
+ SUB_COMMANDS.each do |cmd|
5
+ autoload cmd.capitalize.to_sym, "gherkin/tools/#{cmd}"
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,35 @@
1
+ require 'gherkin'
2
+
3
+ module Gherkin
4
+ module Tools
5
+ # Base class for file based operations
6
+ class Files
7
+ include Enumerable
8
+
9
+ def initialize(paths)
10
+ raise "Please specify one or more paths" if paths.empty?
11
+ @paths = paths
12
+ end
13
+
14
+ def each(&proc)
15
+ globs = @paths.map do |path|
16
+ raise "#{path} does not exist" unless File.exist?(path)
17
+ File.directory?(path) ? File.join(path, '**', '*.feature') : path
18
+ end
19
+
20
+ Dir[*globs].uniq.sort.each(&proc)
21
+ end
22
+
23
+ def scan(file, listener)
24
+ parser = Gherkin::Parser::Parser.new(listener, true, "root")
25
+ lexer = Gherkin::I18nLexer.new(parser, false)
26
+ begin
27
+ lexer.scan(IO.read(file))
28
+ rescue => e
29
+ e.message << " (#{file})"
30
+ raise e
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,19 @@
1
+ require 'stringio'
2
+ require 'gherkin/tools/files'
3
+ require 'gherkin/formatter/pretty_formatter'
4
+
5
+ module Gherkin
6
+ module Tools
7
+ class Reformat < Files
8
+ def run
9
+ each do |file|
10
+ purdy = StringIO.new
11
+ listener = Formatter::PrettyFormatter.new(purdy)
12
+ scan(file, listener)
13
+ purdy.rewind
14
+ File.open(file, 'w') {|io| io.write(purdy.read)}
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,21 @@
1
+ require 'gherkin'
2
+ require 'gherkin/tools/files'
3
+ require 'gherkin/tools/stats_listener'
4
+
5
+ module Gherkin
6
+ module Tools
7
+ class Stats < Files
8
+ def run
9
+ listener = StatsListener.new
10
+ each do |f|
11
+ parser = Gherkin::Parser::Parser.new(listener, true)
12
+ lexer = Gherkin::I18nLexer.new(parser)
13
+ lexer.scan(IO.read(f))
14
+ end
15
+ puts "Features: #{listener.features}"
16
+ puts "Scenarios: #{listener.scenarios}"
17
+ puts "Steps: #{listener.steps}"
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,57 @@
1
+ require 'gherkin/native'
2
+
3
+ # encoding: utf-8
4
+ module Gherkin
5
+ module Tools
6
+ class StatsListener
7
+ implements 'gherkin.Listener'
8
+
9
+ attr_reader :features, :scenarios, :steps
10
+
11
+ def initialize
12
+ @features = 0
13
+ @scenarios = 0
14
+ @steps = 0
15
+ end
16
+
17
+ def tag(name, line)
18
+ end
19
+
20
+ def comment(content, line)
21
+ end
22
+
23
+ def feature(keyword, name, line)
24
+ @features += 1
25
+ end
26
+
27
+ def background(keyword, name, line)
28
+ end
29
+
30
+ def scenario(keyword, name, line)
31
+ @scenarios += 1
32
+ end
33
+
34
+ def scenario_outline(keyword, name, line)
35
+ end
36
+
37
+ def examples(keyword, name, line)
38
+ end
39
+
40
+ def step(keyword, name, line)
41
+ @steps += 1
42
+ end
43
+
44
+ def row(row, line)
45
+ end
46
+
47
+ def py_string(string, line)
48
+ end
49
+
50
+ def syntax_error(state, event, legal_events, line)
51
+ end
52
+
53
+ def eof
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1 @@
1
+ *.rl
@@ -0,0 +1,425 @@
1
+ #include <assert.h>
2
+ #include <ruby.h>
3
+
4
+ #if defined(_WIN32)
5
+ #include <stddef.h>
6
+ #endif
7
+
8
+ #ifdef HAVE_RUBY_RE_H
9
+ #include <ruby/re.h>
10
+ #else
11
+ #include <re.h>
12
+ #endif
13
+
14
+ #ifdef HAVE_RUBY_ENCODING_H
15
+ #include <ruby/encoding.h>
16
+ #define ENCODED_STR_NEW(ptr, len) \
17
+ rb_enc_str_new(ptr, len, rb_utf8_encoding())
18
+ #else
19
+ #define ENCODED_STR_NEW(ptr, len) \
20
+ rb_str_new(ptr, len)
21
+ #endif
22
+
23
+ #define LF_FLAG 0
24
+ #define CRLF_FLAG 1
25
+ #define LF "\n"
26
+ #define CRLF "\r\n"
27
+
28
+ #ifndef RSTRING_PTR
29
+ #define RSTRING_PTR(s) (RSTRING(s)->ptr)
30
+ #endif
31
+
32
+ #ifndef RSTRING_LEN
33
+ #define RSTRING_LEN(s) (RSTRING(s)->len)
34
+ #endif
35
+
36
+ #define DATA_GET(FROM, TYPE, NAME) \
37
+ Data_Get_Struct(FROM, TYPE, NAME); \
38
+ if (NAME == NULL) { \
39
+ rb_raise(rb_eArgError, "NULL found for " # NAME " when it shouldn't be."); \
40
+ }
41
+
42
+ typedef struct lexer_state {
43
+ int content_len;
44
+ int line_number;
45
+ int current_line;
46
+ int start_col;
47
+ int eol;
48
+ size_t mark;
49
+ size_t keyword_start;
50
+ size_t keyword_end;
51
+ size_t next_keyword_start;
52
+ size_t content_start;
53
+ size_t content_end;
54
+ size_t query_start;
55
+ size_t last_newline;
56
+ size_t final_newline;
57
+ } lexer_state;
58
+
59
+ static VALUE mGherkin;
60
+ static VALUE mCLexer;
61
+ static VALUE cI18nLexer;
62
+ static VALUE rb_eGherkinLexingError;
63
+
64
+ #define LEN(AT, P) (P - data - lexer->AT)
65
+ #define MARK(M, P) (lexer->M = (P) - data)
66
+ #define PTR_TO(P) (data + lexer->P)
67
+
68
+ #define STORE_KW_END_CON(EVENT) \
69
+ store_kw_con(listener, # EVENT, \
70
+ PTR_TO(keyword_start), LEN(keyword_start, PTR_TO(keyword_end - 1)), \
71
+ PTR_TO(content_start), LEN(content_start, PTR_TO(content_end)), \
72
+ lexer->current_line, lexer->eol); \
73
+ if (lexer->content_end != 0) { \
74
+ p = PTR_TO(content_end - 1); \
75
+ } \
76
+ lexer->content_end = 0
77
+
78
+ #define STORE_ATTR(ATTR) \
79
+ store_attr(listener, # ATTR, \
80
+ PTR_TO(content_start), LEN(content_start, p), \
81
+ lexer->line_number)
82
+
83
+ %%{
84
+ machine lexer;
85
+
86
+ action begin_content {
87
+ MARK(content_start, p);
88
+ lexer->current_line = lexer->line_number;
89
+ }
90
+
91
+ action begin_pystring_content {
92
+ MARK(content_start, p);
93
+ }
94
+
95
+ action start_pystring {
96
+ lexer->current_line = lexer->line_number;
97
+ lexer->start_col = p - data - lexer->last_newline;
98
+ }
99
+
100
+ action store_pystring_content {
101
+ int len = LEN(content_start, PTR_TO(final_newline));
102
+
103
+ if (len < 0) len = 0;
104
+
105
+ store_pystring_content(listener, lexer->start_col, PTR_TO(content_start), len, lexer->current_line);
106
+ }
107
+
108
+ action store_feature_content {
109
+ STORE_KW_END_CON(feature);
110
+ }
111
+
112
+ action store_background_content {
113
+ STORE_KW_END_CON(background);
114
+ }
115
+
116
+ action store_scenario_content {
117
+ STORE_KW_END_CON(scenario);
118
+ }
119
+
120
+ action store_scenario_outline_content {
121
+ STORE_KW_END_CON(scenario_outline);
122
+ }
123
+
124
+ action store_examples_content {
125
+ STORE_KW_END_CON(examples);
126
+ }
127
+
128
+ action store_step_content {
129
+ store_kw_con(listener, "step",
130
+ PTR_TO(keyword_start), LEN(keyword_start, PTR_TO(keyword_end)),
131
+ PTR_TO(content_start), LEN(content_start, p),
132
+ lexer->current_line, lexer->eol);
133
+ }
134
+
135
+ action store_comment_content {
136
+ STORE_ATTR(comment);
137
+ lexer->mark = 0;
138
+ }
139
+
140
+ action store_tag_content {
141
+ STORE_ATTR(tag);
142
+ lexer->mark = 0;
143
+ }
144
+
145
+ action inc_line_number {
146
+ lexer->line_number += 1;
147
+ MARK(final_newline, p);
148
+ }
149
+
150
+ action last_newline {
151
+ MARK(last_newline, p + 1);
152
+ }
153
+
154
+ action start_keyword {
155
+ if (lexer->mark == 0) {
156
+ MARK(mark, p);
157
+ }
158
+ }
159
+
160
+ action end_keyword {
161
+ MARK(keyword_end, p);
162
+ MARK(keyword_start, PTR_TO(mark));
163
+ MARK(content_start, p + 1);
164
+ lexer->mark = 0;
165
+ }
166
+
167
+ action next_keyword_start {
168
+ MARK(content_end, p);
169
+ }
170
+
171
+ action start_row {
172
+ p = p - 1;
173
+ lexer->current_line = lexer->line_number;
174
+ current_row = rb_ary_new();
175
+ }
176
+
177
+ action begin_cell_content {
178
+ MARK(content_start, p);
179
+ }
180
+
181
+ action store_cell_content {
182
+ VALUE con = ENCODED_STR_NEW(PTR_TO(content_start), LEN(content_start, p));
183
+ rb_funcall(con, rb_intern("strip!"), 0);
184
+ VALUE re_pipe = rb_reg_regcomp(rb_str_new2("\\\\\\|"));
185
+ VALUE re_backslash = rb_reg_regcomp(rb_str_new2("\\\\\\\\"));
186
+ rb_funcall(con, rb_intern("gsub!"), 2, re_pipe, rb_str_new2("|"));
187
+ rb_funcall(con, rb_intern("gsub!"), 2, re_backslash, rb_str_new2("\\"));
188
+
189
+ rb_ary_push(current_row, con);
190
+ }
191
+
192
+ action store_row {
193
+ rb_funcall(listener, rb_intern("row"), 2, current_row, INT2FIX(lexer->current_line));
194
+ }
195
+
196
+ action end_feature {
197
+ if (cs < lexer_first_final) {
198
+ if (raise_lexer_error != NULL) {
199
+ size_t count = 0;
200
+ int newstr_count = 0;
201
+ size_t len;
202
+ const char *buff;
203
+ if (lexer->last_newline != 0) {
204
+ len = LEN(last_newline, eof);
205
+ buff = PTR_TO(last_newline);
206
+ } else {
207
+ len = strlen(data);
208
+ buff = data;
209
+ }
210
+
211
+ char newstr[len];
212
+
213
+ for (count = 0; count < len; count++) {
214
+ if(buff[count] == 10) {
215
+ newstr[newstr_count] = '\0'; // terminate new string at first newline found
216
+ break;
217
+ } else {
218
+ if (buff[count] == '%') {
219
+ newstr[newstr_count++] = buff[count];
220
+ newstr[newstr_count] = buff[count];
221
+ } else {
222
+ newstr[newstr_count] = buff[count];
223
+ }
224
+ }
225
+ newstr_count++;
226
+ }
227
+
228
+ int line = lexer->line_number;
229
+ lexer_init(lexer); // Re-initialize so we can scan again with the same lexer
230
+ raise_lexer_error(newstr, line);
231
+ }
232
+ } else {
233
+ rb_funcall(listener, rb_intern("eof"), 0);
234
+ }
235
+ }
236
+
237
+ include lexer_common "lexer_common.<%= @i18n.underscored_iso_code %>.rl";
238
+
239
+ }%%
240
+
241
+ /** Data **/
242
+ %% write data;
243
+
244
+ static VALUE
245
+ strip_i(VALUE str, VALUE ary)
246
+ {
247
+ rb_funcall(str, rb_intern("strip!"), 0);
248
+ rb_ary_push(ary, str);
249
+
250
+ return Qnil;
251
+ }
252
+
253
+ static VALUE
254
+ multiline_strip(VALUE text, int eol)
255
+ {
256
+ VALUE map = rb_ary_new();
257
+ VALUE split = rb_str_split(text, "\n");
258
+
259
+ rb_iterate(rb_each, split, strip_i, map);
260
+
261
+ return rb_ary_join(split, rb_str_new2( \
262
+ eol == CRLF_FLAG ? CRLF : LF ));
263
+ }
264
+
265
+ static void
266
+ store_kw_con(VALUE listener, const char * event_name,
267
+ const char * keyword_at, size_t keyword_length,
268
+ const char * at, size_t length,
269
+ int current_line, int eol)
270
+ {
271
+ VALUE con = Qnil, kw = Qnil;
272
+ kw = ENCODED_STR_NEW(keyword_at, keyword_length);
273
+ con = ENCODED_STR_NEW(at, length);
274
+ con = multiline_strip(con, eol);
275
+ rb_funcall(con, rb_intern("strip!"), 0);
276
+ rb_funcall(listener, rb_intern(event_name), 3, kw, con, INT2FIX(current_line));
277
+ }
278
+
279
+ static void
280
+ store_attr(VALUE listener, const char * attr_type,
281
+ const char * at, size_t length,
282
+ int line)
283
+ {
284
+ VALUE val = ENCODED_STR_NEW(at, length);
285
+ rb_funcall(listener, rb_intern(attr_type), 2, val, INT2FIX(line));
286
+ }
287
+
288
+ static void
289
+ store_pystring_content(VALUE listener,
290
+ int start_col,
291
+ const char *at, size_t length,
292
+ int current_line)
293
+ {
294
+ VALUE con = ENCODED_STR_NEW(at, length);
295
+ // Gherkin will crash gracefully if the string representation of start_col pushes the pattern past 32 characters
296
+ char pat[32];
297
+ snprintf(pat, 32, "^[\t ]{0,%d}", start_col);
298
+ VALUE re = rb_reg_regcomp(rb_str_new2(pat));
299
+ VALUE re2 = rb_reg_regcomp(rb_str_new2("\r\\Z"));
300
+ VALUE unescape_escaped_quotes = rb_reg_regcomp(rb_str_new2("\\\\\"\\\\\"\\\\\""));
301
+ rb_funcall(con, rb_intern("gsub!"), 2, re, rb_str_new2(""));
302
+ rb_funcall(con, rb_intern("sub!"), 2, re2, rb_str_new2(""));
303
+ rb_funcall(con, rb_intern("gsub!"), 2, unescape_escaped_quotes, rb_str_new2("\"\"\""));
304
+ rb_funcall(listener, rb_intern("py_string"), 2, con, INT2FIX(current_line));
305
+ }
306
+
307
+ static void
308
+ raise_lexer_error(const char * at, int line)
309
+ {
310
+ rb_raise(rb_eGherkinLexingError, "Lexing error on line %d: '%s'.", line, at);
311
+ }
312
+
313
+ static int
314
+ count_char(char char_to_count, char *str) {
315
+
316
+ int count = 0;
317
+ int i = 0;
318
+ while(str[i] != '\0') {
319
+ if(str[i] == char_to_count) {
320
+ count++;
321
+ }
322
+ i++;
323
+ }
324
+ return count;
325
+ }
326
+
327
+ static void lexer_init(lexer_state *lexer) {
328
+ lexer->content_start = 0;
329
+ lexer->content_end = 0;
330
+ lexer->content_len = 0;
331
+ lexer->mark = 0;
332
+ lexer->keyword_start = 0;
333
+ lexer->keyword_end = 0;
334
+ lexer->next_keyword_start = 0;
335
+ lexer->line_number = 1;
336
+ lexer->last_newline = 0;
337
+ lexer->final_newline = 0;
338
+ lexer->start_col = 0;
339
+ lexer->eol = LF_FLAG;
340
+ }
341
+
342
+ static VALUE CLexer_alloc(VALUE klass)
343
+ {
344
+ VALUE obj;
345
+ lexer_state *lxr = ALLOC(lexer_state);
346
+ lexer_init(lxr);
347
+
348
+ obj = Data_Wrap_Struct(klass, NULL, -1, lxr);
349
+
350
+ return obj;
351
+ }
352
+
353
+ static VALUE CLexer_init(VALUE self, VALUE listener)
354
+ {
355
+ rb_iv_set(self, "@listener", listener);
356
+
357
+ lexer_state *lxr = NULL;
358
+ DATA_GET(self, lexer_state, lxr);
359
+ lexer_init(lxr);
360
+
361
+ return self;
362
+ }
363
+
364
+ static VALUE CLexer_scan(VALUE self, VALUE input)
365
+ {
366
+ lexer_state *lexer = NULL;
367
+ DATA_GET(self, lexer_state, lexer);
368
+
369
+
370
+ VALUE input_copy = rb_str_dup(input);
371
+
372
+ rb_str_append(input_copy, rb_str_new2("\n%_FEATURE_END_%"));
373
+ char *data = RSTRING_PTR(input_copy);
374
+ size_t len = RSTRING_LEN(input_copy);
375
+
376
+ if (count_char('\r', data) > (count_char('\n', data) / 2)) {
377
+ lexer->eol = CRLF_FLAG;
378
+ }
379
+
380
+ if (len == 0) {
381
+ rb_raise(rb_eGherkinLexingError, "No content to lex.");
382
+ } else {
383
+
384
+ const char *p, *pe, *eof;
385
+ int cs = 0;
386
+
387
+ VALUE listener = rb_iv_get(self, "@listener");
388
+ VALUE current_row = Qnil;
389
+
390
+ p = data;
391
+ pe = data + len;
392
+ eof = pe;
393
+
394
+ assert(*pe == '\0' && "pointer does not end on NULL");
395
+
396
+ %% write init;
397
+ %% write exec;
398
+
399
+ assert(p <= pe && "data overflow after parsing execute");
400
+ assert(lexer->content_start <= len && "content starts after data end");
401
+ assert(lexer->mark < len && "mark is after data end");
402
+
403
+ // Reset lexer by re-initializing the whole thing
404
+ lexer_init(lexer);
405
+
406
+ if (cs == lexer_error) {
407
+ rb_raise(rb_eGherkinLexingError, "Invalid format, lexing fails.");
408
+ } else {
409
+ return Qtrue;
410
+ }
411
+ }
412
+ }
413
+
414
+ void Init_gherkin_lexer_<%= @i18n.underscored_iso_code %>()
415
+ {
416
+ mGherkin = rb_define_module("Gherkin");
417
+ rb_eGherkinLexingError = rb_const_get(mGherkin, rb_intern("LexingError"));
418
+
419
+ mCLexer = rb_define_module_under(mGherkin, "CLexer");
420
+ cI18nLexer = rb_define_class_under(mCLexer, "<%= @i18n.underscored_iso_code.capitalize %>", rb_cObject);
421
+ rb_define_alloc_func(cI18nLexer, CLexer_alloc);
422
+ rb_define_method(cI18nLexer, "initialize", CLexer_init, 1);
423
+ rb_define_method(cI18nLexer, "scan", CLexer_scan, 1);
424
+ }
425
+