benhamill-gherkin 2.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. data/.gitattributes +2 -0
  2. data/.gitignore +11 -0
  3. data/.mailmap +2 -0
  4. data/.rspec +1 -0
  5. data/.rvmrc +1 -0
  6. data/Gemfile +7 -0
  7. data/History.txt +363 -0
  8. data/LICENSE +20 -0
  9. data/README.rdoc +149 -0
  10. data/Rakefile +19 -0
  11. data/VERSION +1 -0
  12. data/build_native_gems.sh +8 -0
  13. data/cucumber.yml +3 -0
  14. data/features/escaped_pipes.feature +8 -0
  15. data/features/feature_parser.feature +237 -0
  16. data/features/json_formatter.feature +377 -0
  17. data/features/json_parser.feature +318 -0
  18. data/features/native_lexer.feature +19 -0
  19. data/features/parser_with_native_lexer.feature +205 -0
  20. data/features/pretty_formatter.feature +15 -0
  21. data/features/step_definitions/eyeball_steps.rb +3 -0
  22. data/features/step_definitions/gherkin_steps.rb +29 -0
  23. data/features/step_definitions/json_formatter_steps.rb +28 -0
  24. data/features/step_definitions/json_parser_steps.rb +20 -0
  25. data/features/step_definitions/pretty_formatter_steps.rb +84 -0
  26. data/features/steps_parser.feature +46 -0
  27. data/features/support/env.rb +38 -0
  28. data/gherkin.gemspec +61 -0
  29. data/ikvm/.gitignore +3 -0
  30. data/java/.gitignore +5 -0
  31. data/java/src/main/java/gherkin/lexer/i18n/.gitignore +1 -0
  32. data/java/src/main/resources/gherkin/.gitignore +1 -0
  33. data/js/lib/gherkin/lexer/i18n/ar.js +1094 -0
  34. data/js/lib/gherkin/lexer/i18n/bg.js +1308 -0
  35. data/js/lib/gherkin/lexer/i18n/ca.js +1236 -0
  36. data/js/lib/gherkin/lexer/i18n/cs.js +1090 -0
  37. data/js/lib/gherkin/lexer/i18n/cy_gb.js +958 -0
  38. data/js/lib/gherkin/lexer/i18n/da.js +974 -0
  39. data/js/lib/gherkin/lexer/i18n/de.js +1082 -0
  40. data/js/lib/gherkin/lexer/i18n/en.js +965 -0
  41. data/js/lib/gherkin/lexer/i18n/en_au.js +902 -0
  42. data/js/lib/gherkin/lexer/i18n/en_lol.js +859 -0
  43. data/js/lib/gherkin/lexer/i18n/en_pirate.js +1136 -0
  44. data/js/lib/gherkin/lexer/i18n/en_scouse.js +1289 -0
  45. data/js/lib/gherkin/lexer/i18n/en_tx.js +942 -0
  46. data/js/lib/gherkin/lexer/i18n/eo.js +916 -0
  47. data/js/lib/gherkin/lexer/i18n/es.js +1049 -0
  48. data/js/lib/gherkin/lexer/i18n/et.js +915 -0
  49. data/js/lib/gherkin/lexer/i18n/fi.js +894 -0
  50. data/js/lib/gherkin/lexer/i18n/fr.js +1116 -0
  51. data/js/lib/gherkin/lexer/i18n/he.js +1044 -0
  52. data/js/lib/gherkin/lexer/i18n/hr.js +994 -0
  53. data/js/lib/gherkin/lexer/i18n/hu.js +1043 -0
  54. data/js/lib/gherkin/lexer/i18n/id.js +884 -0
  55. data/js/lib/gherkin/lexer/i18n/it.js +1007 -0
  56. data/js/lib/gherkin/lexer/i18n/ja.js +1344 -0
  57. data/js/lib/gherkin/lexer/i18n/ko.js +1028 -0
  58. data/js/lib/gherkin/lexer/i18n/lt.js +972 -0
  59. data/js/lib/gherkin/lexer/i18n/lu.js +1057 -0
  60. data/js/lib/gherkin/lexer/i18n/lv.js +1092 -0
  61. data/js/lib/gherkin/lexer/i18n/nl.js +1036 -0
  62. data/js/lib/gherkin/lexer/i18n/no.js +986 -0
  63. data/js/lib/gherkin/lexer/i18n/pl.js +1140 -0
  64. data/js/lib/gherkin/lexer/i18n/pt.js +1000 -0
  65. data/js/lib/gherkin/lexer/i18n/ro.js +1089 -0
  66. data/js/lib/gherkin/lexer/i18n/ru.js +1560 -0
  67. data/js/lib/gherkin/lexer/i18n/sk.js +972 -0
  68. data/js/lib/gherkin/lexer/i18n/sr_cyrl.js +1728 -0
  69. data/js/lib/gherkin/lexer/i18n/sr_latn.js +1220 -0
  70. data/js/lib/gherkin/lexer/i18n/sv.js +997 -0
  71. data/js/lib/gherkin/lexer/i18n/tr.js +1014 -0
  72. data/js/lib/gherkin/lexer/i18n/uk.js +1572 -0
  73. data/js/lib/gherkin/lexer/i18n/uz.js +1302 -0
  74. data/js/lib/gherkin/lexer/i18n/vi.js +1124 -0
  75. data/js/lib/gherkin/lexer/i18n/zh_cn.js +902 -0
  76. data/js/lib/gherkin/lexer/i18n/zh_tw.js +940 -0
  77. data/lib/.gitignore +4 -0
  78. data/lib/gherkin.rb +2 -0
  79. data/lib/gherkin/c_lexer.rb +17 -0
  80. data/lib/gherkin/formatter/ansi_escapes.rb +95 -0
  81. data/lib/gherkin/formatter/argument.rb +16 -0
  82. data/lib/gherkin/formatter/escaping.rb +15 -0
  83. data/lib/gherkin/formatter/filter_formatter.rb +136 -0
  84. data/lib/gherkin/formatter/hashable.rb +19 -0
  85. data/lib/gherkin/formatter/json_formatter.rb +102 -0
  86. data/lib/gherkin/formatter/line_filter.rb +26 -0
  87. data/lib/gherkin/formatter/model.rb +236 -0
  88. data/lib/gherkin/formatter/pretty_formatter.rb +243 -0
  89. data/lib/gherkin/formatter/regexp_filter.rb +21 -0
  90. data/lib/gherkin/formatter/step_printer.rb +17 -0
  91. data/lib/gherkin/formatter/tag_count_formatter.rb +47 -0
  92. data/lib/gherkin/formatter/tag_filter.rb +19 -0
  93. data/lib/gherkin/i18n.rb +175 -0
  94. data/lib/gherkin/i18n.yml +588 -0
  95. data/lib/gherkin/json_parser.rb +137 -0
  96. data/lib/gherkin/lexer/i18n_lexer.rb +47 -0
  97. data/lib/gherkin/listener/event.rb +45 -0
  98. data/lib/gherkin/listener/formatter_listener.rb +113 -0
  99. data/lib/gherkin/native.rb +7 -0
  100. data/lib/gherkin/native/ikvm.rb +55 -0
  101. data/lib/gherkin/native/java.rb +55 -0
  102. data/lib/gherkin/native/null.rb +9 -0
  103. data/lib/gherkin/parser/meta.txt +5 -0
  104. data/lib/gherkin/parser/parser.rb +164 -0
  105. data/lib/gherkin/parser/root.txt +11 -0
  106. data/lib/gherkin/parser/steps.txt +4 -0
  107. data/lib/gherkin/rb_lexer.rb +8 -0
  108. data/lib/gherkin/rb_lexer/.gitignore +1 -0
  109. data/lib/gherkin/rb_lexer/README.rdoc +8 -0
  110. data/lib/gherkin/rubify.rb +24 -0
  111. data/lib/gherkin/tag_expression.rb +62 -0
  112. data/lib/gherkin/version.rb +3 -0
  113. data/ragel/i18n/.gitignore +1 -0
  114. data/ragel/lexer.c.rl.erb +439 -0
  115. data/ragel/lexer.java.rl.erb +208 -0
  116. data/ragel/lexer.rb.rl.erb +167 -0
  117. data/ragel/lexer_common.rl.erb +50 -0
  118. data/spec/gherkin/c_lexer_spec.rb +21 -0
  119. data/spec/gherkin/fixtures/1.feature +8 -0
  120. data/spec/gherkin/fixtures/comments_in_table.feature +9 -0
  121. data/spec/gherkin/fixtures/complex.feature +45 -0
  122. data/spec/gherkin/fixtures/complex.json +143 -0
  123. data/spec/gherkin/fixtures/complex_for_filtering.feature +60 -0
  124. data/spec/gherkin/fixtures/complex_with_tags.feature +61 -0
  125. data/spec/gherkin/fixtures/dos_line_endings.feature +45 -0
  126. data/spec/gherkin/fixtures/hantu_pisang.feature +35 -0
  127. data/spec/gherkin/fixtures/i18n_fr.feature +14 -0
  128. data/spec/gherkin/fixtures/i18n_no.feature +7 -0
  129. data/spec/gherkin/fixtures/i18n_zh-CN.feature +9 -0
  130. data/spec/gherkin/fixtures/scenario_outline_with_tags.feature +13 -0
  131. data/spec/gherkin/fixtures/scenario_without_steps.feature +5 -0
  132. data/spec/gherkin/fixtures/simple_with_comments.feature +7 -0
  133. data/spec/gherkin/fixtures/simple_with_tags.feature +11 -0
  134. data/spec/gherkin/fixtures/with_bom.feature +3 -0
  135. data/spec/gherkin/formatter/ansi_escapes_spec.rb +19 -0
  136. data/spec/gherkin/formatter/filter_formatter_spec.rb +165 -0
  137. data/spec/gherkin/formatter/model_spec.rb +28 -0
  138. data/spec/gherkin/formatter/pretty_formatter_spec.rb +158 -0
  139. data/spec/gherkin/formatter/spaces.feature +9 -0
  140. data/spec/gherkin/formatter/step_printer_spec.rb +55 -0
  141. data/spec/gherkin/formatter/tabs.feature +9 -0
  142. data/spec/gherkin/formatter/tag_count_formatter_spec.rb +30 -0
  143. data/spec/gherkin/i18n_spec.rb +152 -0
  144. data/spec/gherkin/java_lexer_spec.rb +20 -0
  145. data/spec/gherkin/java_libs.rb +20 -0
  146. data/spec/gherkin/json_parser_spec.rb +113 -0
  147. data/spec/gherkin/lexer/i18n_lexer_spec.rb +43 -0
  148. data/spec/gherkin/output_stream_string_io.rb +20 -0
  149. data/spec/gherkin/parser/parser_spec.rb +16 -0
  150. data/spec/gherkin/rb_lexer_spec.rb +19 -0
  151. data/spec/gherkin/sexp_recorder.rb +56 -0
  152. data/spec/gherkin/shared/lexer_group.rb +593 -0
  153. data/spec/gherkin/shared/py_string_group.rb +153 -0
  154. data/spec/gherkin/shared/row_group.rb +125 -0
  155. data/spec/gherkin/shared/tags_group.rb +54 -0
  156. data/spec/gherkin/tag_expression_spec.rb +137 -0
  157. data/spec/spec_helper.rb +69 -0
  158. data/tasks/bench.rake +184 -0
  159. data/tasks/bench/feature_builder.rb +49 -0
  160. data/tasks/bench/generated/.gitignore +1 -0
  161. data/tasks/bench/null_listener.rb +4 -0
  162. data/tasks/compile.rake +102 -0
  163. data/tasks/cucumber.rake +20 -0
  164. data/tasks/gems.rake +35 -0
  165. data/tasks/ikvm.rake +79 -0
  166. data/tasks/ragel_task.rb +70 -0
  167. data/tasks/rdoc.rake +9 -0
  168. data/tasks/release.rake +30 -0
  169. data/tasks/rspec.rake +8 -0
  170. metadata +609 -0
@@ -0,0 +1,9 @@
1
+ class Class
2
+ def implements(java_class_name)
3
+ # no-op
4
+ end
5
+
6
+ def native_impl(lib)
7
+ # no-op
8
+ end
9
+ end
@@ -0,0 +1,5 @@
1
+ | | feature | background | scenario | scenario_outline | examples | step | row | py_string | eof | comment | tag |
2
+ | meta | E | E | E | E | E | E | E | E | eof | comment | tag |
3
+ | comment | pop() | pop() | pop() | pop() | pop() | pop() | pop() | pop() | eof | pop() | tag |
4
+ | tag | pop() | E | pop() | pop() | pop() | E | E | E | E | E | tag |
5
+ | eof | E | E | E | E | E | E | E | E | E | E | E |
@@ -0,0 +1,164 @@
1
+ require 'gherkin/i18n'
2
+ require 'gherkin/lexer/i18n_lexer'
3
+ require 'gherkin/native'
4
+ require 'gherkin/listener/formatter_listener'
5
+
6
+ module Gherkin
7
+ module Parser
8
+ class ParseError < StandardError
9
+ def initialize(state, new_state, expected_states, uri, line)
10
+ super("Parse error at #{uri}:#{line}. Found #{new_state} when expecting one of: #{expected_states.join(', ')}. (Current state: #{state}).")
11
+ end
12
+ end
13
+
14
+ class Parser
15
+ native_impl('gherkin')
16
+
17
+ # Initialize the parser. +machine_name+ refers to a state machine table.
18
+ def initialize(formatter, raise_on_error=true, machine_name='root', force_ruby=false)
19
+ @formatter = formatter
20
+ @listener = Listener::FormatterListener.new(@formatter)
21
+ @raise_on_error = raise_on_error
22
+ @machine_name = machine_name
23
+ @machines = []
24
+ push_machine(@machine_name)
25
+ @lexer = Gherkin::Lexer::I18nLexer.new(self, force_ruby)
26
+ end
27
+
28
+ def parse(gherkin, feature_uri, line_offset)
29
+ @formatter.uri(feature_uri)
30
+ @line_offset = line_offset
31
+ @lexer.scan(gherkin)
32
+ end
33
+
34
+ def i18n_language
35
+ @lexer.i18n_language
36
+ end
37
+
38
+ def errors
39
+ @lexer.errors
40
+ end
41
+
42
+ # Doesn't yet fall back to super
43
+ def method_missing(method, *args)
44
+ # TODO: Catch exception and call super
45
+ event(method.to_s, args[-1])
46
+ @listener.__send__(method, *args)
47
+ if method == :eof
48
+ pop_machine
49
+ push_machine(@machine_name)
50
+ end
51
+ end
52
+
53
+ def event(ev, line)
54
+ l = line ? @line_offset+line : nil
55
+ machine.event(ev, l) do |state, legal_events|
56
+ if @raise_on_error
57
+ raise ParseError.new(state, ev, legal_events, @feature_uri, l)
58
+ else
59
+ # Only used for testing
60
+ @listener.syntax_error(state, ev, legal_events, @feature_uri, l)
61
+ end
62
+ end
63
+ end
64
+
65
+ def push_machine(name)
66
+ @machines.push(Machine.new(self, name))
67
+ end
68
+
69
+ def pop_machine
70
+ @machines.pop
71
+ end
72
+
73
+ def machine
74
+ @machines[-1]
75
+ end
76
+
77
+ def expected
78
+ machine.expected
79
+ end
80
+
81
+ def force_state(state)
82
+ machine.instance_variable_set('@state', state)
83
+ end
84
+
85
+ class Machine
86
+ def initialize(parser, name)
87
+ @parser = parser
88
+ @name = name
89
+ @transition_map = transition_map(name)
90
+ @state = name
91
+ end
92
+
93
+ def event(ev, line)
94
+ states = @transition_map[@state]
95
+ raise "Unknown state: #{@state.inspect} for machine #{@name}" if states.nil?
96
+ new_state = states[ev]
97
+ case new_state
98
+ when "E"
99
+ yield @state, expected
100
+ when /push\((.+)\)/
101
+ @parser.push_machine($1)
102
+ @parser.event(ev, line)
103
+ when "pop()"
104
+ @parser.pop_machine()
105
+ @parser.event(ev, line)
106
+ else
107
+ raise "Unknown transition: #{ev.inspect} among #{states.inspect} for machine #{@name}" if new_state.nil?
108
+ @state = new_state
109
+ end
110
+ end
111
+
112
+ def expected
113
+ allowed = @transition_map[@state].find_all { |_, action| action != "E" }
114
+ allowed.collect { |state| state[0] }.sort - ['eof']
115
+ end
116
+
117
+ private
118
+
119
+ @@transition_maps = {}
120
+
121
+ def transition_map(name)
122
+ @@transition_maps[name] ||= build_transition_map(name)
123
+ end
124
+
125
+ def build_transition_map(name)
126
+ table = transition_table(name)
127
+ events = table.shift[1..-1]
128
+ table.inject({}) do |machine, actions|
129
+ state = actions.shift
130
+ machine[state] = Hash[*events.zip(actions).flatten]
131
+ machine
132
+ end
133
+ end
134
+
135
+ def transition_table(name)
136
+ state_machine_reader = StateMachineReader.new
137
+ lexer = Gherkin::I18n.new('en').lexer(state_machine_reader)
138
+ machine = File.dirname(__FILE__) + "/#{name}.txt"
139
+ lexer.scan(File.read(machine))
140
+ state_machine_reader.rows
141
+ end
142
+
143
+ class StateMachineReader
144
+ attr_reader :rows
145
+
146
+ def initialize
147
+ @rows = []
148
+ end
149
+
150
+ def uri(uri)
151
+ end
152
+
153
+ def row(row, line_number)
154
+ @rows << row
155
+ end
156
+
157
+ def eof
158
+ end
159
+ end
160
+
161
+ end
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,11 @@
1
+ | | feature | background | scenario | scenario_outline | examples | step | row | py_string | eof | comment | tag |
2
+ | root | feature | E | E | E | E | E | E | E | eof | push(meta) | push(meta) |
3
+ | feature | E | background | scenario | scenario_outline | E | E | E | E | eof | push(meta) | push(meta) |
4
+ | step | E | E | scenario | scenario_outline | E | step | step | step | eof | push(meta) | push(meta) |
5
+ | outline_step | E | E | scenario | scenario_outline | examples | outline_step | outline_step | outline_step | eof | push(meta) | push(meta) |
6
+ | background | E | E | scenario | scenario_outline | E | step | E | E | eof | push(meta) | push(meta) |
7
+ | scenario | E | E | scenario | scenario_outline | E | step | E | E | eof | push(meta) | push(meta) |
8
+ | scenario_outline | E | E | E | E | E | outline_step | E | E | eof | push(meta) | push(meta) |
9
+ | examples | E | E | E | E | E | E | examples_table | E | eof | push(meta) | push(meta) |
10
+ | examples_table | E | E | scenario | scenario_outline | examples | E | examples_table | E | eof | push(meta) | push(meta) |
11
+ | eof | E | E | E | E | E | E | E | E | E | E | E |
@@ -0,0 +1,4 @@
1
+ | | feature | background | scenario | scenario_outline | examples | step | row | py_string | eof | comment | tag |
2
+ | steps | E | E | E | E | E | step | E | E | eof | E | E |
3
+ | step | E | E | E | E | E | step | step | steps | eof | E | E |
4
+ | eof | E | E | E | E | E | E | E | E | E | E | E |
@@ -0,0 +1,8 @@
1
+ module Gherkin
2
+ module RbLexer
3
+ def self.[](i18n_underscored_iso_code)
4
+ require "gherkin/rb_lexer/#{i18n_underscored_iso_code}"
5
+ const_get(i18n_underscored_iso_code.capitalize)
6
+ end
7
+ end
8
+ end
@@ -0,0 +1 @@
1
+ *.rb
@@ -0,0 +1,8 @@
1
+ = Lexers
2
+
3
+ Gherkin support lexing of lots of natural languages, defined by gherkin/i18n.yml
4
+ The lexers are generated with the following command:
5
+
6
+ rake ragel:i18n
7
+
8
+ You have to run this command if you modify gherkin/i18n.yml
@@ -0,0 +1,24 @@
1
+ module Gherkin
2
+ module Rubify
3
+ if defined?(JRUBY_VERSION)
4
+ # Translate Java objects to Ruby.
5
+ # This is especially important to convert java.util.List coming
6
+ # from Java and back to a Ruby Array.
7
+ def rubify(o)
8
+ case(o)
9
+ when Java.java.util.Collection, Array
10
+ o.map{|e| rubify(e)}
11
+ when Java.gherkin.formatter.model.PyString
12
+ require 'gherkin/formatter/model'
13
+ Formatter::Model::PyString.new(o.value, o.line)
14
+ else
15
+ o
16
+ end
17
+ end
18
+ else
19
+ def rubify(o)
20
+ o
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,62 @@
1
+ require 'gherkin/native'
2
+
3
+ module Gherkin
4
+ class TagExpression
5
+ native_impl('gherkin')
6
+
7
+ attr_reader :limits
8
+
9
+ def initialize(tag_expressions)
10
+ @ands = []
11
+ @limits = {}
12
+ tag_expressions.each do |expr|
13
+ add(expr.strip.split(/\s*,\s*/))
14
+ end
15
+ end
16
+
17
+ def empty?
18
+ @ands.empty?
19
+ end
20
+
21
+ def eval(tags)
22
+ return true if @ands.flatten.empty?
23
+ vars = Hash[*tags.map{|tag| [tag, true]}.flatten]
24
+ !!Kernel.eval(ruby_expression)
25
+ end
26
+
27
+ private
28
+
29
+ def add(tags_with_negation_and_limits)
30
+ negatives, positives = tags_with_negation_and_limits.partition{|tag| tag =~ /^~/}
31
+ @ands << (store_and_extract_limits(negatives, true) + store_and_extract_limits(positives, false))
32
+ end
33
+
34
+ def store_and_extract_limits(tags_with_negation_and_limits, negated)
35
+ tags_with_negation = []
36
+ tags_with_negation_and_limits.each do |tag_with_negation_and_limit|
37
+ tag_with_negation, limit = tag_with_negation_and_limit.split(':')
38
+ tags_with_negation << tag_with_negation
39
+ if limit
40
+ tag_without_negation = negated ? tag_with_negation[1..-1] : tag_with_negation
41
+ if @limits[tag_without_negation] && @limits[tag_without_negation] != limit.to_i
42
+ raise "Inconsistent tag limits for #{tag_without_negation}: #{@limits[tag_without_negation]} and #{limit.to_i}"
43
+ end
44
+ @limits[tag_without_negation] = limit.to_i
45
+ end
46
+ end
47
+ tags_with_negation
48
+ end
49
+
50
+ def ruby_expression
51
+ "(" + @ands.map do |ors|
52
+ ors.map do |tag|
53
+ if tag =~ /^~(.*)/
54
+ "!vars['#{$1}']"
55
+ else
56
+ "vars['#{tag}']"
57
+ end
58
+ end.join("||")
59
+ end.join(")&&(") + ")"
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,3 @@
1
+ module Gherkin
2
+ VERSION = IO.read(File.dirname(__FILE__) + '/../../VERSION').strip
3
+ end
@@ -0,0 +1 @@
1
+ *.rl
@@ -0,0 +1,439 @@
1
+ #include <assert.h>
2
+ #include <ruby.h>
3
+
4
+ #if defined(_WIN32)
5
+ #include <stddef.h>
6
+ #endif
7
+
8
+ #ifdef HAVE_RUBY_RE_H
9
+ #include <ruby/re.h>
10
+ #else
11
+ #include <re.h>
12
+ #endif
13
+
14
+ #ifdef HAVE_RUBY_ENCODING_H
15
+ #include <ruby/encoding.h>
16
+ #define ENCODED_STR_NEW(ptr, len) \
17
+ rb_enc_str_new(ptr, len, rb_utf8_encoding())
18
+ #else
19
+ #define ENCODED_STR_NEW(ptr, len) \
20
+ rb_str_new(ptr, len)
21
+ #endif
22
+
23
+ #ifndef RSTRING_PTR
24
+ #define RSTRING_PTR(s) (RSTRING(s)->ptr)
25
+ #endif
26
+
27
+ #ifndef RSTRING_LEN
28
+ #define RSTRING_LEN(s) (RSTRING(s)->len)
29
+ #endif
30
+
31
+ #define DATA_GET(FROM, TYPE, NAME) \
32
+ Data_Get_Struct(FROM, TYPE, NAME); \
33
+ if (NAME == NULL) { \
34
+ rb_raise(rb_eArgError, "NULL found for " # NAME " when it shouldn't be."); \
35
+ }
36
+
37
+ typedef struct lexer_state {
38
+ int content_len;
39
+ int line_number;
40
+ int current_line;
41
+ int start_col;
42
+ size_t mark;
43
+ size_t keyword_start;
44
+ size_t keyword_end;
45
+ size_t next_keyword_start;
46
+ size_t content_start;
47
+ size_t content_end;
48
+ size_t query_start;
49
+ size_t last_newline;
50
+ size_t final_newline;
51
+ } lexer_state;
52
+
53
+ static VALUE mGherkin;
54
+ static VALUE mGherkinLexer;
55
+ static VALUE mCLexer;
56
+ static VALUE cI18nLexer;
57
+ static VALUE rb_eGherkinLexingError;
58
+
59
+ #define LEN(AT, P) (P - data - lexer->AT)
60
+ #define MARK(M, P) (lexer->M = (P) - data)
61
+ #define PTR_TO(P) (data + lexer->P)
62
+
63
+ #define STORE_KW_END_CON(EVENT) \
64
+ store_multiline_kw_con(listener, # EVENT, \
65
+ PTR_TO(keyword_start), LEN(keyword_start, PTR_TO(keyword_end - 1)), \
66
+ PTR_TO(content_start), LEN(content_start, PTR_TO(content_end)), \
67
+ lexer->current_line, lexer->start_col); \
68
+ if (lexer->content_end != 0) { \
69
+ p = PTR_TO(content_end - 1); \
70
+ } \
71
+ lexer->content_end = 0
72
+
73
+ #define STORE_ATTR(ATTR) \
74
+ store_attr(listener, # ATTR, \
75
+ PTR_TO(content_start), LEN(content_start, p), \
76
+ lexer->line_number)
77
+
78
+ %%{
79
+ machine lexer;
80
+
81
+ action begin_content {
82
+ MARK(content_start, p);
83
+ lexer->current_line = lexer->line_number;
84
+ lexer->start_col = lexer->content_start - lexer->last_newline - (lexer->keyword_end - lexer->keyword_start) + 2;
85
+ }
86
+
87
+ action begin_pystring_content {
88
+ MARK(content_start, p);
89
+ }
90
+
91
+ action start_pystring {
92
+ lexer->current_line = lexer->line_number;
93
+ lexer->start_col = p - data - lexer->last_newline;
94
+ }
95
+
96
+ action store_pystring_content {
97
+ int len = LEN(content_start, PTR_TO(final_newline));
98
+
99
+ if (len < 0) len = 0;
100
+
101
+ store_pystring_content(listener, lexer->start_col, PTR_TO(content_start), len, lexer->current_line);
102
+ }
103
+
104
+ action store_feature_content {
105
+ STORE_KW_END_CON(feature);
106
+ }
107
+
108
+ action store_background_content {
109
+ STORE_KW_END_CON(background);
110
+ }
111
+
112
+ action store_scenario_content {
113
+ STORE_KW_END_CON(scenario);
114
+ }
115
+
116
+ action store_scenario_outline_content {
117
+ STORE_KW_END_CON(scenario_outline);
118
+ }
119
+
120
+ action store_examples_content {
121
+ STORE_KW_END_CON(examples);
122
+ }
123
+
124
+ action store_step_content {
125
+ store_kw_con(listener, "step",
126
+ PTR_TO(keyword_start), LEN(keyword_start, PTR_TO(keyword_end)),
127
+ PTR_TO(content_start), LEN(content_start, p),
128
+ lexer->current_line);
129
+ }
130
+
131
+ action store_comment_content {
132
+ STORE_ATTR(comment);
133
+ lexer->mark = 0;
134
+ }
135
+
136
+ action store_tag_content {
137
+ STORE_ATTR(tag);
138
+ lexer->mark = 0;
139
+ }
140
+
141
+ action inc_line_number {
142
+ lexer->line_number += 1;
143
+ MARK(final_newline, p);
144
+ }
145
+
146
+ action last_newline {
147
+ MARK(last_newline, p + 1);
148
+ }
149
+
150
+ action start_keyword {
151
+ if (lexer->mark == 0) {
152
+ MARK(mark, p);
153
+ }
154
+ }
155
+
156
+ action end_keyword {
157
+ MARK(keyword_end, p);
158
+ MARK(keyword_start, PTR_TO(mark));
159
+ MARK(content_start, p + 1);
160
+ lexer->mark = 0;
161
+ }
162
+
163
+ action next_keyword_start {
164
+ MARK(content_end, p);
165
+ }
166
+
167
+ action start_row {
168
+ p = p - 1;
169
+ lexer->current_line = lexer->line_number;
170
+ current_row = rb_ary_new();
171
+ }
172
+
173
+ action begin_cell_content {
174
+ MARK(content_start, p);
175
+ }
176
+
177
+ action store_cell_content {
178
+ VALUE re_pipe, re_newline, re_backslash;
179
+ VALUE con = ENCODED_STR_NEW(PTR_TO(content_start), LEN(content_start, p));
180
+ rb_funcall(con, rb_intern("strip!"), 0);
181
+ re_pipe = rb_reg_regcomp(rb_str_new2("\\\\\\|"));
182
+ re_newline = rb_reg_regcomp(rb_str_new2("\\\\n"));
183
+ re_backslash = rb_reg_regcomp(rb_str_new2("\\\\\\\\"));
184
+ rb_funcall(con, rb_intern("gsub!"), 2, re_pipe, rb_str_new2("|"));
185
+ rb_funcall(con, rb_intern("gsub!"), 2, re_newline, rb_str_new2("\n"));
186
+ rb_funcall(con, rb_intern("gsub!"), 2, re_backslash, rb_str_new2("\\"));
187
+
188
+ rb_ary_push(current_row, con);
189
+ }
190
+
191
+ action store_row {
192
+ rb_funcall(listener, rb_intern("row"), 2, current_row, INT2FIX(lexer->current_line));
193
+ }
194
+
195
+ action end_feature {
196
+ int line;
197
+ if (cs < lexer_first_final) {
198
+ size_t count = 0;
199
+ VALUE newstr_val;
200
+ char *newstr;
201
+ int newstr_count = 0;
202
+ size_t len;
203
+ const char *buff;
204
+ if (lexer->last_newline != 0) {
205
+ len = LEN(last_newline, eof);
206
+ buff = PTR_TO(last_newline);
207
+ } else {
208
+ len = strlen(data);
209
+ buff = data;
210
+ }
211
+
212
+ // Allocate as a ruby string so that it gets cleaned up by GC
213
+ newstr_val = rb_str_new(buff, len);
214
+ newstr = RSTRING_PTR(newstr_val);
215
+
216
+
217
+ for (count = 0; count < len; count++) {
218
+ if(buff[count] == 10) {
219
+ newstr[newstr_count] = '\0'; // terminate new string at first newline found
220
+ break;
221
+ } else {
222
+ if (buff[count] == '%') {
223
+ newstr[newstr_count++] = buff[count];
224
+ newstr[newstr_count] = buff[count];
225
+ } else {
226
+ newstr[newstr_count] = buff[count];
227
+ }
228
+ }
229
+ newstr_count++;
230
+ }
231
+
232
+ line = lexer->line_number;
233
+ lexer_init(lexer); // Re-initialize so we can scan again with the same lexer
234
+ raise_lexer_error(newstr, line);
235
+ } else {
236
+ rb_funcall(listener, rb_intern("eof"), 0);
237
+ }
238
+ }
239
+
240
+ include lexer_common "lexer_common.<%= @i18n.underscored_iso_code %>.rl";
241
+
242
+ }%%
243
+
244
+ /** Data **/
245
+ %% write data;
246
+
247
+ static VALUE
248
+ unindent(VALUE con, int start_col)
249
+ {
250
+ VALUE re;
251
+ // Gherkin will crash gracefully if the string representation of start_col pushes the pattern past 32 characters
252
+ char pat[32];
253
+ snprintf(pat, 32, "^[\t ]{0,%d}", start_col);
254
+ re = rb_reg_regcomp(rb_str_new2(pat));
255
+ rb_funcall(con, rb_intern("gsub!"), 2, re, rb_str_new2(""));
256
+
257
+ return Qnil;
258
+
259
+ }
260
+
261
+ static void
262
+ store_kw_con(VALUE listener, const char * event_name,
263
+ const char * keyword_at, size_t keyword_length,
264
+ const char * at, size_t length,
265
+ int current_line)
266
+ {
267
+ VALUE con = Qnil, kw = Qnil;
268
+ kw = ENCODED_STR_NEW(keyword_at, keyword_length);
269
+ con = ENCODED_STR_NEW(at, length);
270
+ rb_funcall(con, rb_intern("strip!"), 0);
271
+ rb_funcall(listener, rb_intern(event_name), 3, kw, con, INT2FIX(current_line));
272
+ }
273
+
274
+ static void
275
+ store_multiline_kw_con(VALUE listener, const char * event_name,
276
+ const char * keyword_at, size_t keyword_length,
277
+ const char * at, size_t length,
278
+ int current_line, int start_col)
279
+ {
280
+ VALUE split;
281
+ VALUE con = Qnil, kw = Qnil, name = Qnil, desc = Qnil;
282
+
283
+ kw = ENCODED_STR_NEW(keyword_at, keyword_length);
284
+ con = ENCODED_STR_NEW(at, length);
285
+
286
+ unindent(con, start_col);
287
+
288
+ split = rb_str_split(con, "\n");
289
+
290
+ name = rb_funcall(split, rb_intern("shift"), 0);
291
+ desc = rb_ary_join(split, rb_str_new2( "\n" ));
292
+
293
+ if( name == Qnil )
294
+ {
295
+ name = rb_str_new2("");
296
+ }
297
+ if( rb_funcall(desc, rb_intern("size"), 0) == 0)
298
+ {
299
+ desc = rb_str_new2("");
300
+ }
301
+ rb_funcall(name, rb_intern("strip!"), 0);
302
+ rb_funcall(desc, rb_intern("rstrip!"), 0);
303
+ rb_funcall(listener, rb_intern(event_name), 4, kw, name, desc, INT2FIX(current_line));
304
+ }
305
+
306
+ static void
307
+ store_attr(VALUE listener, const char * attr_type,
308
+ const char * at, size_t length,
309
+ int line)
310
+ {
311
+ VALUE val = ENCODED_STR_NEW(at, length);
312
+ rb_funcall(listener, rb_intern(attr_type), 2, val, INT2FIX(line));
313
+ }
314
+
315
+ static void
316
+ store_pystring_content(VALUE listener,
317
+ int start_col,
318
+ const char *at, size_t length,
319
+ int current_line)
320
+ {
321
+ VALUE re2;
322
+ VALUE unescape_escaped_quotes;
323
+ VALUE con = ENCODED_STR_NEW(at, length);
324
+
325
+ unindent(con, start_col);
326
+
327
+ re2 = rb_reg_regcomp(rb_str_new2("\r\\Z"));
328
+ unescape_escaped_quotes = rb_reg_regcomp(rb_str_new2("\\\\\"\\\\\"\\\\\""));
329
+ rb_funcall(con, rb_intern("sub!"), 2, re2, rb_str_new2(""));
330
+ rb_funcall(con, rb_intern("gsub!"), 2, unescape_escaped_quotes, rb_str_new2("\"\"\""));
331
+ rb_funcall(listener, rb_intern("py_string"), 2, con, INT2FIX(current_line));
332
+ }
333
+
334
+ static void
335
+ raise_lexer_error(const char * at, int line)
336
+ {
337
+ rb_raise(rb_eGherkinLexingError, "Lexing error on line %d: '%s'. See http://wiki.github.com/aslakhellesoy/gherkin/lexingerror for more information.", line, at);
338
+ }
339
+
340
+ static void lexer_init(lexer_state *lexer) {
341
+ lexer->content_start = 0;
342
+ lexer->content_end = 0;
343
+ lexer->content_len = 0;
344
+ lexer->mark = 0;
345
+ lexer->keyword_start = 0;
346
+ lexer->keyword_end = 0;
347
+ lexer->next_keyword_start = 0;
348
+ lexer->line_number = 1;
349
+ lexer->last_newline = 0;
350
+ lexer->final_newline = 0;
351
+ lexer->start_col = 0;
352
+ }
353
+
354
+ static VALUE CLexer_alloc(VALUE klass)
355
+ {
356
+ VALUE obj;
357
+ lexer_state *lxr = ALLOC(lexer_state);
358
+ lexer_init(lxr);
359
+
360
+ obj = Data_Wrap_Struct(klass, NULL, -1, lxr);
361
+
362
+ return obj;
363
+ }
364
+
365
+ static VALUE CLexer_init(VALUE self, VALUE listener)
366
+ {
367
+ lexer_state *lxr;
368
+ rb_iv_set(self, "@listener", listener);
369
+
370
+ lxr = NULL;
371
+ DATA_GET(self, lexer_state, lxr);
372
+ lexer_init(lxr);
373
+
374
+ return self;
375
+ }
376
+
377
+ static VALUE CLexer_scan(VALUE self, VALUE input)
378
+ {
379
+ VALUE input_copy;
380
+ char *data;
381
+ size_t len;
382
+ VALUE listener = rb_iv_get(self, "@listener");
383
+
384
+ lexer_state *lexer;
385
+ lexer = NULL;
386
+ DATA_GET(self, lexer_state, lexer);
387
+
388
+ input_copy = rb_str_dup(input);
389
+
390
+ rb_str_append(input_copy, rb_str_new2("\n%_FEATURE_END_%"));
391
+ data = RSTRING_PTR(input_copy);
392
+ len = RSTRING_LEN(input_copy);
393
+
394
+ if (len == 0) {
395
+ rb_raise(rb_eGherkinLexingError, "No content to lex.");
396
+ } else {
397
+
398
+ const char *p, *pe, *eof;
399
+ int cs = 0;
400
+
401
+ VALUE current_row = Qnil;
402
+
403
+ p = data;
404
+ pe = data + len;
405
+ eof = pe;
406
+
407
+ assert(*pe == '\0' && "pointer does not end on NULL");
408
+
409
+ %% write init;
410
+ %% write exec;
411
+
412
+ assert(p <= pe && "data overflow after parsing execute");
413
+ assert(lexer->content_start <= len && "content starts after data end");
414
+ assert(lexer->mark < len && "mark is after data end");
415
+
416
+ // Reset lexer by re-initializing the whole thing
417
+ lexer_init(lexer);
418
+
419
+ if (cs == lexer_error) {
420
+ rb_raise(rb_eGherkinLexingError, "Invalid format, lexing fails.");
421
+ } else {
422
+ return Qtrue;
423
+ }
424
+ }
425
+ }
426
+
427
+ void Init_gherkin_lexer_<%= @i18n.underscored_iso_code %>()
428
+ {
429
+ mGherkin = rb_define_module("Gherkin");
430
+ mGherkinLexer = rb_define_module_under(mGherkin, "Lexer");
431
+ rb_eGherkinLexingError = rb_const_get(mGherkinLexer, rb_intern("LexingError"));
432
+
433
+ mCLexer = rb_define_module_under(mGherkin, "CLexer");
434
+ cI18nLexer = rb_define_class_under(mCLexer, "<%= @i18n.underscored_iso_code.capitalize %>", rb_cObject);
435
+ rb_define_alloc_func(cI18nLexer, CLexer_alloc);
436
+ rb_define_method(cI18nLexer, "initialize", CLexer_init, 1);
437
+ rb_define_method(cI18nLexer, "scan", CLexer_scan, 1);
438
+ }
439
+