ripper_ruby_parser 1.7.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +76 -0
  3. data/README.md +6 -4
  4. data/lib/ripper_ruby_parser/commenting_ripper_parser.rb +24 -12
  5. data/lib/ripper_ruby_parser/sexp_handlers.rb +2 -0
  6. data/lib/ripper_ruby_parser/sexp_handlers/assignment.rb +9 -4
  7. data/lib/ripper_ruby_parser/sexp_handlers/blocks.rb +40 -52
  8. data/lib/ripper_ruby_parser/sexp_handlers/conditionals.rb +17 -19
  9. data/lib/ripper_ruby_parser/sexp_handlers/helper_methods.rb +35 -2
  10. data/lib/ripper_ruby_parser/sexp_handlers/literals.rb +15 -242
  11. data/lib/ripper_ruby_parser/sexp_handlers/method_calls.rb +9 -5
  12. data/lib/ripper_ruby_parser/sexp_handlers/methods.rb +22 -17
  13. data/lib/ripper_ruby_parser/sexp_handlers/operators.rb +3 -3
  14. data/lib/ripper_ruby_parser/sexp_handlers/string_literals.rb +256 -0
  15. data/lib/ripper_ruby_parser/sexp_processor.rb +12 -56
  16. data/lib/ripper_ruby_parser/unescape.rb +89 -43
  17. data/lib/ripper_ruby_parser/version.rb +1 -1
  18. metadata +125 -76
  19. data/Rakefile +0 -33
  20. data/test/end_to_end/comments_test.rb +0 -59
  21. data/test/end_to_end/comparison_test.rb +0 -104
  22. data/test/end_to_end/lib_comparison_test.rb +0 -29
  23. data/test/end_to_end/line_numbering_test.rb +0 -31
  24. data/test/end_to_end/samples_comparison_test.rb +0 -13
  25. data/test/end_to_end/test_comparison_test.rb +0 -32
  26. data/test/pt_testcase/pt_test.rb +0 -44
  27. data/test/ripper_ruby_parser/commenting_ripper_parser_test.rb +0 -200
  28. data/test/ripper_ruby_parser/parser_test.rb +0 -553
  29. data/test/ripper_ruby_parser/sexp_handlers/assignment_test.rb +0 -613
  30. data/test/ripper_ruby_parser/sexp_handlers/blocks_test.rb +0 -679
  31. data/test/ripper_ruby_parser/sexp_handlers/conditionals_test.rb +0 -536
  32. data/test/ripper_ruby_parser/sexp_handlers/literals_test.rb +0 -1106
  33. data/test/ripper_ruby_parser/sexp_handlers/loops_test.rb +0 -209
  34. data/test/ripper_ruby_parser/sexp_handlers/method_calls_test.rb +0 -267
  35. data/test/ripper_ruby_parser/sexp_handlers/methods_test.rb +0 -421
  36. data/test/ripper_ruby_parser/sexp_handlers/operators_test.rb +0 -399
  37. data/test/ripper_ruby_parser/sexp_processor_test.rb +0 -303
  38. data/test/ripper_ruby_parser/version_test.rb +0 -7
  39. data/test/samples/assignment.rb +0 -17
  40. data/test/samples/comments.rb +0 -13
  41. data/test/samples/conditionals.rb +0 -23
  42. data/test/samples/lambdas.rb +0 -5
  43. data/test/samples/loops.rb +0 -36
  44. data/test/samples/misc.rb +0 -281
  45. data/test/samples/number.rb +0 -7
  46. data/test/samples/operators.rb +0 -18
  47. data/test/samples/strings.rb +0 -147
  48. data/test/test_helper.rb +0 -107
@@ -0,0 +1,256 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RipperRubyParser
4
+ module SexpHandlers
5
+ # Sexp handlers for string and stringlike literals
6
+ module StringLiterals
7
+ def process_string_literal(exp)
8
+ _, content = exp.shift 2
9
+ process(content)
10
+ end
11
+
12
+ def process_string_content(exp)
13
+ _, *rest = shift_all exp
14
+ line, string, rest = extract_string_parts(rest)
15
+
16
+ if rest.empty?
17
+ with_line_number(line, s(:str, string))
18
+ else
19
+ s(:dstr, string, *rest)
20
+ end
21
+ end
22
+
23
+ alias process_word process_string_content
24
+
25
+ def process_string_embexpr(exp)
26
+ _, list = exp.shift 2
27
+
28
+ val = process(list.sexp_body.first)
29
+
30
+ case val.sexp_type
31
+ when :str, :dstr
32
+ val
33
+ when :void_stmt
34
+ s(:dstr, "", s(:evstr))
35
+ else
36
+ s(:dstr, "", s(:evstr, val))
37
+ end
38
+ end
39
+
40
+ def process_string_dvar(exp)
41
+ _, list = exp.shift 2
42
+ val = process(list)
43
+ s(:dstr, "", s(:evstr, val))
44
+ end
45
+
46
+ def process_string_concat(exp)
47
+ _, left, right = exp.shift 3
48
+
49
+ left = process(left)
50
+ right = process(right)
51
+
52
+ if left.sexp_type == :str
53
+ merge_left_into_right(left, right)
54
+ else
55
+ merge_right_into_left(left, right)
56
+ end
57
+ end
58
+
59
+ def process_xstring_literal(exp)
60
+ _, content = exp.shift 2
61
+ process(content)
62
+ end
63
+
64
+ def process_xstring(exp)
65
+ _, *rest = shift_all exp
66
+ line, string, rest = extract_string_parts(rest)
67
+ result = if rest.empty?
68
+ s(:xstr, string)
69
+ else
70
+ s(:dxstr, string, *rest)
71
+ end
72
+ result.line = line
73
+ result
74
+ end
75
+
76
+ def process_regexp_literal(exp)
77
+ _, content, (_, flags,) = exp.shift 3
78
+
79
+ content = process(content)
80
+ numflags = character_flags_to_numerical flags
81
+
82
+ if content.length == 2
83
+ return with_line_number(content.line, s(:lit, Regexp.new(content.last, numflags)))
84
+ end
85
+
86
+ content.sexp_type = :dregx_once if /o/.match?(flags)
87
+ content << numflags unless numflags == 0
88
+ content
89
+ end
90
+
91
+ def process_regexp(exp)
92
+ _, *rest = shift_all exp
93
+ line, string, rest = extract_string_parts(rest)
94
+ with_line_number(line, s(:dregx, string, *rest))
95
+ end
96
+
97
+ def process_symbol_literal(exp)
98
+ _, symbol = exp.shift 2
99
+ handle_symbol_content(symbol)
100
+ end
101
+
102
+ def process_symbol(exp)
103
+ _, node = exp.shift 2
104
+ handle_symbol_content(node)
105
+ end
106
+
107
+ def process_dyna_symbol(exp)
108
+ _, node = exp.shift 2
109
+ handle_dyna_symbol_content(node)
110
+ end
111
+
112
+ def process_qsymbols(exp)
113
+ _, *items = shift_all(exp)
114
+ items = items.map { |item| handle_symbol_content(item) }
115
+ s(:qsymbols, *items)
116
+ end
117
+
118
+ def process_symbols(exp)
119
+ _, *items = shift_all(exp)
120
+ items = items.map { |item| handle_dyna_symbol_content(item) }
121
+ s(:symbols, *items)
122
+ end
123
+
124
+ def process_at_tstring_content(exp)
125
+ _, content, pos, delim = exp.shift 4
126
+ string = fix_encoding handle_string_unescaping(content, delim)
127
+ with_position(pos, s(:str, string))
128
+ end
129
+
130
+ private
131
+
132
+ def extract_string_parts(list)
133
+ return nil, "", [] if list.empty?
134
+
135
+ list = merge_raw_string_literals list
136
+ list = map_process_list list
137
+ parts = unpack_dstr list
138
+ merge_initial_string_literals(parts)
139
+ end
140
+
141
+ def merge_raw_string_literals(list)
142
+ chunks = list.chunk { |it| it.sexp_type == :@tstring_content }
143
+ chunks.flat_map do |is_simple, items|
144
+ if is_simple && items.count > 1
145
+ head = items.first
146
+ contents = items.map { |it| it[1] }.join
147
+ [s(:@tstring_content, contents, head[2], head[3])]
148
+ else
149
+ items
150
+ end
151
+ end
152
+ end
153
+
154
+ def unpack_dstr(list)
155
+ list.flat_map do |item|
156
+ type, val, *rest = item
157
+ if type == :dstr
158
+ if val.empty?
159
+ rest
160
+ else
161
+ [s(:str, val), *rest]
162
+ end
163
+ else
164
+ [item]
165
+ end
166
+ end
167
+ end
168
+
169
+ def merge_initial_string_literals(parts)
170
+ string = ""
171
+ while parts.first&.sexp_type == :str
172
+ str = parts.shift
173
+ line ||= str.line
174
+ string += str.last
175
+ end
176
+
177
+ return line, string, parts
178
+ end
179
+
180
+ def character_flags_to_numerical(flags)
181
+ numflags = 0
182
+
183
+ numflags = Regexp::MULTILINE if /m/.match?(flags)
184
+ numflags |= Regexp::EXTENDED if /x/.match?(flags)
185
+ numflags |= Regexp::IGNORECASE if /i/.match?(flags)
186
+
187
+ numflags |= Regexp::NOENCODING if /n/.match?(flags)
188
+ numflags |= Regexp::FIXEDENCODING if /[ues]/.match?(flags)
189
+
190
+ numflags
191
+ end
192
+
193
+ def handle_dyna_symbol_content(node)
194
+ type, *body = *process(node)
195
+ case type
196
+ when :str, :xstr
197
+ s(:lit, body.first.to_sym)
198
+ when :dstr, :dxstr
199
+ s(:dsym, *body)
200
+ end
201
+ end
202
+
203
+ def handle_symbol_content(node)
204
+ if node.sexp_type == :@kw
205
+ symbol, position = extract_node_symbol_with_position(node)
206
+ with_position(position, s(:lit, symbol))
207
+ else
208
+ processed = process(node)
209
+ symbol = processed.last.to_sym
210
+ line = processed.line
211
+ with_line_number(line, s(:lit, symbol))
212
+ end
213
+ end
214
+
215
+ def merge_left_into_right(left, right)
216
+ right[1] = left.last + right[1]
217
+ right
218
+ end
219
+
220
+ def merge_right_into_left(left, right)
221
+ if right.sexp_type == :str
222
+ left.push right
223
+ else
224
+ _, first, *rest = right
225
+ left.push s(:str, first) unless first.empty?
226
+ left.push(*rest)
227
+ end
228
+ end
229
+
230
+ INTERPOLATING_HEREDOC = /^<<[-~]?[^-~']/.freeze
231
+ NON_INTERPOLATING_HEREDOC = /^<<[-~]?'/.freeze
232
+ INTERPOLATING_STRINGS = ['"', "`", ':"', /^%Q.$/, /^%.$/].freeze
233
+ NON_INTERPOLATING_STRINGS = ["'", ":'", /^%q.$/].freeze
234
+ INTERPOLATING_WORD_LIST = /^%[WI].$/.freeze
235
+ NON_INTERPOLATING_WORD_LIST = /^%[wi].$/.freeze
236
+ REGEXP_LITERALS = ["/", /^%r.$/].freeze
237
+
238
+ def handle_string_unescaping(content, delim)
239
+ case delim
240
+ when INTERPOLATING_HEREDOC, *INTERPOLATING_STRINGS
241
+ unescape(content)
242
+ when INTERPOLATING_WORD_LIST
243
+ unescape_wordlist_word(content)
244
+ when *NON_INTERPOLATING_STRINGS
245
+ simple_unescape(content, delim)
246
+ when *REGEXP_LITERALS
247
+ unescape_regexp(content)
248
+ when NON_INTERPOLATING_WORD_LIST
249
+ simple_unescape_wordlist_word(content, delim)
250
+ else
251
+ content
252
+ end
253
+ end
254
+ end
255
+ end
256
+ end
@@ -11,31 +11,16 @@ module RipperRubyParser
11
11
  class SexpProcessor < ::SexpProcessor
12
12
  include Unescape
13
13
 
14
- attr_reader :filename
15
- attr_reader :extra_compatible
14
+ attr_reader :filename, :extra_compatible
16
15
 
17
16
  def initialize(filename: nil, extra_compatible: nil)
18
17
  super()
19
18
 
20
- @processors[:@int] = :process_at_int
21
- @processors[:@float] = :process_at_float
22
- @processors[:@rational] = :process_at_rational
23
- @processors[:@CHAR] = :process_at_CHAR
24
- @processors[:@label] = :process_at_label
25
-
26
- @processors[:@const] = :process_at_const
27
- @processors[:@ident] = :process_at_ident
28
- @processors[:@cvar] = :process_at_cvar
29
- @processors[:@gvar] = :process_at_gvar
30
- @processors[:@ivar] = :process_at_ivar
31
- @processors[:@kw] = :process_at_kw
32
- @processors[:@op] = :process_at_op
33
- @processors[:@backref] = :process_at_backref
34
-
35
- @processors[:@backtick] = :process_at_backtick
36
- @processors[:@period] = :process_at_period
37
-
38
- @processors[:@tstring_content] = :process_at_tstring_content
19
+ public_methods.each do |name|
20
+ if name =~ /^process_at_(.*)/
21
+ @processors["@#{Regexp.last_match(1)}".to_sym] = name.to_sym
22
+ end
23
+ end
39
24
 
40
25
  @filename = filename
41
26
  @extra_compatible = extra_compatible
@@ -79,16 +64,10 @@ module RipperRubyParser
79
64
 
80
65
  def process_stmts(exp)
81
66
  _, *statements = shift_all(exp)
82
- statements = map_process_list_compact statements
83
- case statements.count
84
- when 0
85
- s(:void_stmt)
86
- when 1
87
- statements.first
88
- else
89
- first = statements.shift
90
- s(:block, *unwrap_block(first), *statements)
91
- end
67
+ statements = map_unwrap_begin_list map_process_list statements
68
+ line = statements.first.line
69
+ statements = reject_void_stmt statements
70
+ wrap_in_block(statements, line)
92
71
  end
93
72
 
94
73
  def process_var_ref(exp)
@@ -137,11 +116,7 @@ module RipperRubyParser
137
116
  def process_paren(exp)
138
117
  _, body = exp.shift 2
139
118
  result = process body
140
- if result.sexp_type == :void_stmt
141
- s(:nil)
142
- else
143
- result
144
- end
119
+ convert_void_stmt_to_nil_symbol result
145
120
  end
146
121
 
147
122
  def process_comment(exp)
@@ -170,25 +145,6 @@ module RipperRubyParser
170
145
  with_position pos, s(:iter, s(:postexe), 0, *body)
171
146
  end
172
147
 
173
- # number literals
174
- def process_at_int(exp)
175
- make_literal(exp) { |val| Integer(val) }
176
- end
177
-
178
- def process_at_float(exp)
179
- make_literal(exp, &:to_f)
180
- end
181
-
182
- def process_at_rational(exp)
183
- make_literal(exp, &:to_r)
184
- end
185
-
186
- # character literals
187
- def process_at_CHAR(exp)
188
- _, val, pos = exp.shift 3
189
- with_position(pos, s(:str, unescape(val[1..-1])))
190
- end
191
-
192
148
  def process_at_label(exp)
193
149
  make_literal(exp) { |val| val.chop.to_sym }
194
150
  end
@@ -265,7 +221,7 @@ module RipperRubyParser
265
221
  def class_or_module_body(exp)
266
222
  body = process(exp)
267
223
 
268
- return body if body.empty?
224
+ return [] if body.sexp_type == :void_stmt
269
225
 
270
226
  unwrap_block body
271
227
  end
@@ -7,19 +7,19 @@ module RipperRubyParser
7
7
  module Unescape
8
8
  ESCAPE_SEQUENCE_REGEXP =
9
9
  /\\(
10
- [0-7]{1,3} | # octal character
11
- x[0-9a-fA-F]{1,2} | # hex byte
12
- u[0-9a-fA-F]+ | # unicode character
13
- u{[0-9a-fA-F]{4}} | # unicode character
14
- M-\\C-. | # meta-ctrl
15
- C-\\M-. | # ctrl-meta
16
- M-\\c. | # meta-ctrl (shorthand)
17
- c\\M-. | # ctrl-meta (shorthand)
18
- C-. | # control (regular)
19
- c. | # control (shorthand)
20
- M-. | # meta
21
- \n | # line continuation
22
- . # single-character
10
+ [0-7]{1,3} | # octal character
11
+ x[0-9a-fA-F]{1,2} | # hex byte
12
+ u[0-9a-fA-F]{4} | # unicode character
13
+ u{[0-9a-fA-F]{4,6}} | # unicode character
14
+ M-\\C-. | # meta-ctrl
15
+ C-\\M-. | # ctrl-meta
16
+ M-\\c. | # meta-ctrl (shorthand)
17
+ c\\M-. | # ctrl-meta (shorthand)
18
+ C-. | # control (regular)
19
+ c. | # control (shorthand)
20
+ M-. | # meta
21
+ \n | # line break
22
+ . # other single character
23
23
  )/x.freeze
24
24
 
25
25
  SINGLE_LETTER_ESCAPES = {
@@ -37,33 +37,49 @@ module RipperRubyParser
37
37
  SINGLE_LETTER_ESCAPES_REGEXP =
38
38
  Regexp.new("^[#{SINGLE_LETTER_ESCAPES.keys.join}]$")
39
39
 
40
- def simple_unescape(string)
41
- string.gsub(/\\(
42
- ' | # single quote
43
- \\ # backslash
44
- )/x) do
45
- Regexp.last_match[1]
46
- end
40
+ DELIMITER_PAIRS = {
41
+ "(" => "()",
42
+ "<" => "<>",
43
+ "[" => "[]",
44
+ "{" => "{}"
45
+ }.freeze
46
+
47
+ def simple_unescape(string, delimiter)
48
+ delimiters = delimiter_regexp_pattern(delimiter)
49
+ string.gsub(/
50
+ \\ # a backslash
51
+ ( # followed by a
52
+ #{delimiters} | # delimiter or
53
+ \\ # backslash
54
+ )/x) do
55
+ Regexp.last_match[1]
56
+ end
47
57
  end
48
58
 
49
- def simple_unescape_wordlist_word(string)
50
- string.gsub(/\\(
51
- ' | # single quote
52
- \\ | # backslash
53
- [ ] | # space
54
- \n # newline
55
- )/x) do
56
- Regexp.last_match[1]
57
- end
59
+ def simple_unescape_wordlist_word(string, delimiter)
60
+ delimiters = delimiter_regexp_pattern(delimiter)
61
+ string.gsub(/
62
+ \\ # a backslash
63
+ ( # followed by a
64
+ #{delimiters} | # delimiter or
65
+ \\ | # backslash or
66
+ [ ] | # space or
67
+ \n # newline
68
+ )
69
+ /x) do
70
+ Regexp.last_match[1]
71
+ end
58
72
  end
59
73
 
60
74
  def unescape(string)
75
+ string = string.dup if string.frozen?
76
+ string.force_encoding("ASCII-8BIT")
61
77
  string.gsub(ESCAPE_SEQUENCE_REGEXP) do
62
78
  bare = Regexp.last_match[1]
63
79
  if bare == "\n"
64
80
  ""
65
81
  else
66
- unescaped_value(bare)
82
+ unescaped_value(bare).force_encoding("ASCII-8BIT")
67
83
  end
68
84
  end
69
85
  end
@@ -90,7 +106,7 @@ module RipperRubyParser
90
106
  when "\n"
91
107
  ""
92
108
  else
93
- '\\\\'
109
+ "\\\\"
94
110
  end
95
111
  end
96
112
  end
@@ -100,26 +116,50 @@ module RipperRubyParser
100
116
  def unescaped_value(bare)
101
117
  case bare
102
118
  when SINGLE_LETTER_ESCAPES_REGEXP
103
- SINGLE_LETTER_ESCAPES[bare]
119
+ SINGLE_LETTER_ESCAPES[bare].dup
104
120
  when /^x/
105
- hex_to_char(bare[1..-1])
106
- when /^u\{/
107
- hex_to_unicode_char(bare[2..-2])
121
+ unescape_hex_char bare
108
122
  when /^u/
109
- hex_to_unicode_char(bare[1..4]) + bare[5..-1]
110
- when /^(c|C-).$/
111
- control(bare[-1].ord).chr
112
- when /^M-.$/
113
- meta(bare[-1].ord).chr
114
- when /^(M-\\C-|C-\\M-|M-\\c|c\\M-).$/
115
- meta(control(bare[-1].ord)).chr
123
+ unescape_unicode_char bare
124
+ when /^(c|C-|M-|M-\\C-|C-\\M-|M-\\c|c\\M-).$/
125
+ unescape_meta_control bare
116
126
  when /^[0-7]+/
117
- bare.to_i(8).chr
127
+ unescape_octal bare
118
128
  else
119
129
  bare
120
130
  end
121
131
  end
122
132
 
133
+ def unescape_hex_char(bare)
134
+ hex_to_char(bare[1..-1])
135
+ end
136
+
137
+ def unescape_unicode_char(bare)
138
+ hex_chars = if bare.start_with? "u{"
139
+ bare[2..-2]
140
+ else
141
+ bare[1..4]
142
+ end
143
+ hex_to_unicode_char(hex_chars)
144
+ end
145
+
146
+ def unescape_meta_control(bare)
147
+ base_value = bare[-1].ord
148
+ value = case bare
149
+ when /^(c|C-).$/
150
+ control(base_value)
151
+ when /^M-.$/
152
+ meta(base_value)
153
+ when /^(M-\\C-|C-\\M-|M-\\c|c\\M-).$/
154
+ meta(control(base_value))
155
+ end
156
+ value.chr
157
+ end
158
+
159
+ def unescape_octal(bare)
160
+ bare.to_i(8).chr
161
+ end
162
+
123
163
  def hex_to_unicode_char(str)
124
164
  str.to_i(16).chr(Encoding::UTF_8)
125
165
  end
@@ -135,5 +175,11 @@ module RipperRubyParser
135
175
  def meta(val)
136
176
  val | 0b1000_0000
137
177
  end
178
+
179
+ def delimiter_regexp_pattern(delimiter)
180
+ delimiter = delimiter[-1]
181
+ delimiters = DELIMITER_PAIRS.fetch(delimiter, delimiter)
182
+ delimiters.each_char.map { |it| Regexp.escape it }.join(" | ")
183
+ end
138
184
  end
139
185
  end