ripper_ruby_parser 1.7.0 → 1.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +76 -0
  3. data/README.md +6 -4
  4. data/lib/ripper_ruby_parser/commenting_ripper_parser.rb +24 -12
  5. data/lib/ripper_ruby_parser/sexp_handlers.rb +2 -0
  6. data/lib/ripper_ruby_parser/sexp_handlers/assignment.rb +9 -4
  7. data/lib/ripper_ruby_parser/sexp_handlers/blocks.rb +40 -52
  8. data/lib/ripper_ruby_parser/sexp_handlers/conditionals.rb +17 -19
  9. data/lib/ripper_ruby_parser/sexp_handlers/helper_methods.rb +35 -2
  10. data/lib/ripper_ruby_parser/sexp_handlers/literals.rb +15 -242
  11. data/lib/ripper_ruby_parser/sexp_handlers/method_calls.rb +9 -5
  12. data/lib/ripper_ruby_parser/sexp_handlers/methods.rb +22 -17
  13. data/lib/ripper_ruby_parser/sexp_handlers/operators.rb +3 -3
  14. data/lib/ripper_ruby_parser/sexp_handlers/string_literals.rb +256 -0
  15. data/lib/ripper_ruby_parser/sexp_processor.rb +12 -56
  16. data/lib/ripper_ruby_parser/unescape.rb +89 -43
  17. data/lib/ripper_ruby_parser/version.rb +1 -1
  18. metadata +125 -76
  19. data/Rakefile +0 -33
  20. data/test/end_to_end/comments_test.rb +0 -59
  21. data/test/end_to_end/comparison_test.rb +0 -104
  22. data/test/end_to_end/lib_comparison_test.rb +0 -29
  23. data/test/end_to_end/line_numbering_test.rb +0 -31
  24. data/test/end_to_end/samples_comparison_test.rb +0 -13
  25. data/test/end_to_end/test_comparison_test.rb +0 -32
  26. data/test/pt_testcase/pt_test.rb +0 -44
  27. data/test/ripper_ruby_parser/commenting_ripper_parser_test.rb +0 -200
  28. data/test/ripper_ruby_parser/parser_test.rb +0 -553
  29. data/test/ripper_ruby_parser/sexp_handlers/assignment_test.rb +0 -613
  30. data/test/ripper_ruby_parser/sexp_handlers/blocks_test.rb +0 -679
  31. data/test/ripper_ruby_parser/sexp_handlers/conditionals_test.rb +0 -536
  32. data/test/ripper_ruby_parser/sexp_handlers/literals_test.rb +0 -1106
  33. data/test/ripper_ruby_parser/sexp_handlers/loops_test.rb +0 -209
  34. data/test/ripper_ruby_parser/sexp_handlers/method_calls_test.rb +0 -267
  35. data/test/ripper_ruby_parser/sexp_handlers/methods_test.rb +0 -421
  36. data/test/ripper_ruby_parser/sexp_handlers/operators_test.rb +0 -399
  37. data/test/ripper_ruby_parser/sexp_processor_test.rb +0 -303
  38. data/test/ripper_ruby_parser/version_test.rb +0 -7
  39. data/test/samples/assignment.rb +0 -17
  40. data/test/samples/comments.rb +0 -13
  41. data/test/samples/conditionals.rb +0 -23
  42. data/test/samples/lambdas.rb +0 -5
  43. data/test/samples/loops.rb +0 -36
  44. data/test/samples/misc.rb +0 -281
  45. data/test/samples/number.rb +0 -7
  46. data/test/samples/operators.rb +0 -18
  47. data/test/samples/strings.rb +0 -147
  48. data/test/test_helper.rb +0 -107
@@ -0,0 +1,256 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RipperRubyParser
4
+ module SexpHandlers
5
+ # Sexp handlers for string and stringlike literals
6
+ module StringLiterals
7
+ def process_string_literal(exp)
8
+ _, content = exp.shift 2
9
+ process(content)
10
+ end
11
+
12
+ def process_string_content(exp)
13
+ _, *rest = shift_all exp
14
+ line, string, rest = extract_string_parts(rest)
15
+
16
+ if rest.empty?
17
+ with_line_number(line, s(:str, string))
18
+ else
19
+ s(:dstr, string, *rest)
20
+ end
21
+ end
22
+
23
+ alias process_word process_string_content
24
+
25
+ def process_string_embexpr(exp)
26
+ _, list = exp.shift 2
27
+
28
+ val = process(list.sexp_body.first)
29
+
30
+ case val.sexp_type
31
+ when :str, :dstr
32
+ val
33
+ when :void_stmt
34
+ s(:dstr, "", s(:evstr))
35
+ else
36
+ s(:dstr, "", s(:evstr, val))
37
+ end
38
+ end
39
+
40
+ def process_string_dvar(exp)
41
+ _, list = exp.shift 2
42
+ val = process(list)
43
+ s(:dstr, "", s(:evstr, val))
44
+ end
45
+
46
+ def process_string_concat(exp)
47
+ _, left, right = exp.shift 3
48
+
49
+ left = process(left)
50
+ right = process(right)
51
+
52
+ if left.sexp_type == :str
53
+ merge_left_into_right(left, right)
54
+ else
55
+ merge_right_into_left(left, right)
56
+ end
57
+ end
58
+
59
+ def process_xstring_literal(exp)
60
+ _, content = exp.shift 2
61
+ process(content)
62
+ end
63
+
64
+ def process_xstring(exp)
65
+ _, *rest = shift_all exp
66
+ line, string, rest = extract_string_parts(rest)
67
+ result = if rest.empty?
68
+ s(:xstr, string)
69
+ else
70
+ s(:dxstr, string, *rest)
71
+ end
72
+ result.line = line
73
+ result
74
+ end
75
+
76
+ def process_regexp_literal(exp)
77
+ _, content, (_, flags,) = exp.shift 3
78
+
79
+ content = process(content)
80
+ numflags = character_flags_to_numerical flags
81
+
82
+ if content.length == 2
83
+ return with_line_number(content.line, s(:lit, Regexp.new(content.last, numflags)))
84
+ end
85
+
86
+ content.sexp_type = :dregx_once if /o/.match?(flags)
87
+ content << numflags unless numflags == 0
88
+ content
89
+ end
90
+
91
+ def process_regexp(exp)
92
+ _, *rest = shift_all exp
93
+ line, string, rest = extract_string_parts(rest)
94
+ with_line_number(line, s(:dregx, string, *rest))
95
+ end
96
+
97
+ def process_symbol_literal(exp)
98
+ _, symbol = exp.shift 2
99
+ handle_symbol_content(symbol)
100
+ end
101
+
102
+ def process_symbol(exp)
103
+ _, node = exp.shift 2
104
+ handle_symbol_content(node)
105
+ end
106
+
107
+ def process_dyna_symbol(exp)
108
+ _, node = exp.shift 2
109
+ handle_dyna_symbol_content(node)
110
+ end
111
+
112
+ def process_qsymbols(exp)
113
+ _, *items = shift_all(exp)
114
+ items = items.map { |item| handle_symbol_content(item) }
115
+ s(:qsymbols, *items)
116
+ end
117
+
118
+ def process_symbols(exp)
119
+ _, *items = shift_all(exp)
120
+ items = items.map { |item| handle_dyna_symbol_content(item) }
121
+ s(:symbols, *items)
122
+ end
123
+
124
+ def process_at_tstring_content(exp)
125
+ _, content, pos, delim = exp.shift 4
126
+ string = fix_encoding handle_string_unescaping(content, delim)
127
+ with_position(pos, s(:str, string))
128
+ end
129
+
130
+ private
131
+
132
+ def extract_string_parts(list)
133
+ return nil, "", [] if list.empty?
134
+
135
+ list = merge_raw_string_literals list
136
+ list = map_process_list list
137
+ parts = unpack_dstr list
138
+ merge_initial_string_literals(parts)
139
+ end
140
+
141
+ def merge_raw_string_literals(list)
142
+ chunks = list.chunk { |it| it.sexp_type == :@tstring_content }
143
+ chunks.flat_map do |is_simple, items|
144
+ if is_simple && items.count > 1
145
+ head = items.first
146
+ contents = items.map { |it| it[1] }.join
147
+ [s(:@tstring_content, contents, head[2], head[3])]
148
+ else
149
+ items
150
+ end
151
+ end
152
+ end
153
+
154
+ def unpack_dstr(list)
155
+ list.flat_map do |item|
156
+ type, val, *rest = item
157
+ if type == :dstr
158
+ if val.empty?
159
+ rest
160
+ else
161
+ [s(:str, val), *rest]
162
+ end
163
+ else
164
+ [item]
165
+ end
166
+ end
167
+ end
168
+
169
+ def merge_initial_string_literals(parts)
170
+ string = ""
171
+ while parts.first&.sexp_type == :str
172
+ str = parts.shift
173
+ line ||= str.line
174
+ string += str.last
175
+ end
176
+
177
+ return line, string, parts
178
+ end
179
+
180
+ def character_flags_to_numerical(flags)
181
+ numflags = 0
182
+
183
+ numflags = Regexp::MULTILINE if /m/.match?(flags)
184
+ numflags |= Regexp::EXTENDED if /x/.match?(flags)
185
+ numflags |= Regexp::IGNORECASE if /i/.match?(flags)
186
+
187
+ numflags |= Regexp::NOENCODING if /n/.match?(flags)
188
+ numflags |= Regexp::FIXEDENCODING if /[ues]/.match?(flags)
189
+
190
+ numflags
191
+ end
192
+
193
+ def handle_dyna_symbol_content(node)
194
+ type, *body = *process(node)
195
+ case type
196
+ when :str, :xstr
197
+ s(:lit, body.first.to_sym)
198
+ when :dstr, :dxstr
199
+ s(:dsym, *body)
200
+ end
201
+ end
202
+
203
+ def handle_symbol_content(node)
204
+ if node.sexp_type == :@kw
205
+ symbol, position = extract_node_symbol_with_position(node)
206
+ with_position(position, s(:lit, symbol))
207
+ else
208
+ processed = process(node)
209
+ symbol = processed.last.to_sym
210
+ line = processed.line
211
+ with_line_number(line, s(:lit, symbol))
212
+ end
213
+ end
214
+
215
+ def merge_left_into_right(left, right)
216
+ right[1] = left.last + right[1]
217
+ right
218
+ end
219
+
220
+ def merge_right_into_left(left, right)
221
+ if right.sexp_type == :str
222
+ left.push right
223
+ else
224
+ _, first, *rest = right
225
+ left.push s(:str, first) unless first.empty?
226
+ left.push(*rest)
227
+ end
228
+ end
229
+
230
+ INTERPOLATING_HEREDOC = /^<<[-~]?[^-~']/.freeze
231
+ NON_INTERPOLATING_HEREDOC = /^<<[-~]?'/.freeze
232
+ INTERPOLATING_STRINGS = ['"', "`", ':"', /^%Q.$/, /^%.$/].freeze
233
+ NON_INTERPOLATING_STRINGS = ["'", ":'", /^%q.$/].freeze
234
+ INTERPOLATING_WORD_LIST = /^%[WI].$/.freeze
235
+ NON_INTERPOLATING_WORD_LIST = /^%[wi].$/.freeze
236
+ REGEXP_LITERALS = ["/", /^%r.$/].freeze
237
+
238
+ def handle_string_unescaping(content, delim)
239
+ case delim
240
+ when INTERPOLATING_HEREDOC, *INTERPOLATING_STRINGS
241
+ unescape(content)
242
+ when INTERPOLATING_WORD_LIST
243
+ unescape_wordlist_word(content)
244
+ when *NON_INTERPOLATING_STRINGS
245
+ simple_unescape(content, delim)
246
+ when *REGEXP_LITERALS
247
+ unescape_regexp(content)
248
+ when NON_INTERPOLATING_WORD_LIST
249
+ simple_unescape_wordlist_word(content, delim)
250
+ else
251
+ content
252
+ end
253
+ end
254
+ end
255
+ end
256
+ end
@@ -11,31 +11,16 @@ module RipperRubyParser
11
11
  class SexpProcessor < ::SexpProcessor
12
12
  include Unescape
13
13
 
14
- attr_reader :filename
15
- attr_reader :extra_compatible
14
+ attr_reader :filename, :extra_compatible
16
15
 
17
16
  def initialize(filename: nil, extra_compatible: nil)
18
17
  super()
19
18
 
20
- @processors[:@int] = :process_at_int
21
- @processors[:@float] = :process_at_float
22
- @processors[:@rational] = :process_at_rational
23
- @processors[:@CHAR] = :process_at_CHAR
24
- @processors[:@label] = :process_at_label
25
-
26
- @processors[:@const] = :process_at_const
27
- @processors[:@ident] = :process_at_ident
28
- @processors[:@cvar] = :process_at_cvar
29
- @processors[:@gvar] = :process_at_gvar
30
- @processors[:@ivar] = :process_at_ivar
31
- @processors[:@kw] = :process_at_kw
32
- @processors[:@op] = :process_at_op
33
- @processors[:@backref] = :process_at_backref
34
-
35
- @processors[:@backtick] = :process_at_backtick
36
- @processors[:@period] = :process_at_period
37
-
38
- @processors[:@tstring_content] = :process_at_tstring_content
19
+ public_methods.each do |name|
20
+ if name =~ /^process_at_(.*)/
21
+ @processors["@#{Regexp.last_match(1)}".to_sym] = name.to_sym
22
+ end
23
+ end
39
24
 
40
25
  @filename = filename
41
26
  @extra_compatible = extra_compatible
@@ -79,16 +64,10 @@ module RipperRubyParser
79
64
 
80
65
  def process_stmts(exp)
81
66
  _, *statements = shift_all(exp)
82
- statements = map_process_list_compact statements
83
- case statements.count
84
- when 0
85
- s(:void_stmt)
86
- when 1
87
- statements.first
88
- else
89
- first = statements.shift
90
- s(:block, *unwrap_block(first), *statements)
91
- end
67
+ statements = map_unwrap_begin_list map_process_list statements
68
+ line = statements.first.line
69
+ statements = reject_void_stmt statements
70
+ wrap_in_block(statements, line)
92
71
  end
93
72
 
94
73
  def process_var_ref(exp)
@@ -137,11 +116,7 @@ module RipperRubyParser
137
116
  def process_paren(exp)
138
117
  _, body = exp.shift 2
139
118
  result = process body
140
- if result.sexp_type == :void_stmt
141
- s(:nil)
142
- else
143
- result
144
- end
119
+ convert_void_stmt_to_nil_symbol result
145
120
  end
146
121
 
147
122
  def process_comment(exp)
@@ -170,25 +145,6 @@ module RipperRubyParser
170
145
  with_position pos, s(:iter, s(:postexe), 0, *body)
171
146
  end
172
147
 
173
- # number literals
174
- def process_at_int(exp)
175
- make_literal(exp) { |val| Integer(val) }
176
- end
177
-
178
- def process_at_float(exp)
179
- make_literal(exp, &:to_f)
180
- end
181
-
182
- def process_at_rational(exp)
183
- make_literal(exp, &:to_r)
184
- end
185
-
186
- # character literals
187
- def process_at_CHAR(exp)
188
- _, val, pos = exp.shift 3
189
- with_position(pos, s(:str, unescape(val[1..-1])))
190
- end
191
-
192
148
  def process_at_label(exp)
193
149
  make_literal(exp) { |val| val.chop.to_sym }
194
150
  end
@@ -265,7 +221,7 @@ module RipperRubyParser
265
221
  def class_or_module_body(exp)
266
222
  body = process(exp)
267
223
 
268
- return body if body.empty?
224
+ return [] if body.sexp_type == :void_stmt
269
225
 
270
226
  unwrap_block body
271
227
  end
@@ -7,19 +7,19 @@ module RipperRubyParser
7
7
  module Unescape
8
8
  ESCAPE_SEQUENCE_REGEXP =
9
9
  /\\(
10
- [0-7]{1,3} | # octal character
11
- x[0-9a-fA-F]{1,2} | # hex byte
12
- u[0-9a-fA-F]+ | # unicode character
13
- u{[0-9a-fA-F]{4}} | # unicode character
14
- M-\\C-. | # meta-ctrl
15
- C-\\M-. | # ctrl-meta
16
- M-\\c. | # meta-ctrl (shorthand)
17
- c\\M-. | # ctrl-meta (shorthand)
18
- C-. | # control (regular)
19
- c. | # control (shorthand)
20
- M-. | # meta
21
- \n | # line continuation
22
- . # single-character
10
+ [0-7]{1,3} | # octal character
11
+ x[0-9a-fA-F]{1,2} | # hex byte
12
+ u[0-9a-fA-F]{4} | # unicode character
13
+ u{[0-9a-fA-F]{4,6}} | # unicode character
14
+ M-\\C-. | # meta-ctrl
15
+ C-\\M-. | # ctrl-meta
16
+ M-\\c. | # meta-ctrl (shorthand)
17
+ c\\M-. | # ctrl-meta (shorthand)
18
+ C-. | # control (regular)
19
+ c. | # control (shorthand)
20
+ M-. | # meta
21
+ \n | # line break
22
+ . # other single character
23
23
  )/x.freeze
24
24
 
25
25
  SINGLE_LETTER_ESCAPES = {
@@ -37,33 +37,49 @@ module RipperRubyParser
37
37
  SINGLE_LETTER_ESCAPES_REGEXP =
38
38
  Regexp.new("^[#{SINGLE_LETTER_ESCAPES.keys.join}]$")
39
39
 
40
- def simple_unescape(string)
41
- string.gsub(/\\(
42
- ' | # single quote
43
- \\ # backslash
44
- )/x) do
45
- Regexp.last_match[1]
46
- end
40
+ DELIMITER_PAIRS = {
41
+ "(" => "()",
42
+ "<" => "<>",
43
+ "[" => "[]",
44
+ "{" => "{}"
45
+ }.freeze
46
+
47
+ def simple_unescape(string, delimiter)
48
+ delimiters = delimiter_regexp_pattern(delimiter)
49
+ string.gsub(/
50
+ \\ # a backslash
51
+ ( # followed by a
52
+ #{delimiters} | # delimiter or
53
+ \\ # backslash
54
+ )/x) do
55
+ Regexp.last_match[1]
56
+ end
47
57
  end
48
58
 
49
- def simple_unescape_wordlist_word(string)
50
- string.gsub(/\\(
51
- ' | # single quote
52
- \\ | # backslash
53
- [ ] | # space
54
- \n # newline
55
- )/x) do
56
- Regexp.last_match[1]
57
- end
59
+ def simple_unescape_wordlist_word(string, delimiter)
60
+ delimiters = delimiter_regexp_pattern(delimiter)
61
+ string.gsub(/
62
+ \\ # a backslash
63
+ ( # followed by a
64
+ #{delimiters} | # delimiter or
65
+ \\ | # backslash or
66
+ [ ] | # space or
67
+ \n # newline
68
+ )
69
+ /x) do
70
+ Regexp.last_match[1]
71
+ end
58
72
  end
59
73
 
60
74
  def unescape(string)
75
+ string = string.dup if string.frozen?
76
+ string.force_encoding("ASCII-8BIT")
61
77
  string.gsub(ESCAPE_SEQUENCE_REGEXP) do
62
78
  bare = Regexp.last_match[1]
63
79
  if bare == "\n"
64
80
  ""
65
81
  else
66
- unescaped_value(bare)
82
+ unescaped_value(bare).force_encoding("ASCII-8BIT")
67
83
  end
68
84
  end
69
85
  end
@@ -90,7 +106,7 @@ module RipperRubyParser
90
106
  when "\n"
91
107
  ""
92
108
  else
93
- '\\\\'
109
+ "\\\\"
94
110
  end
95
111
  end
96
112
  end
@@ -100,26 +116,50 @@ module RipperRubyParser
100
116
  def unescaped_value(bare)
101
117
  case bare
102
118
  when SINGLE_LETTER_ESCAPES_REGEXP
103
- SINGLE_LETTER_ESCAPES[bare]
119
+ SINGLE_LETTER_ESCAPES[bare].dup
104
120
  when /^x/
105
- hex_to_char(bare[1..-1])
106
- when /^u\{/
107
- hex_to_unicode_char(bare[2..-2])
121
+ unescape_hex_char bare
108
122
  when /^u/
109
- hex_to_unicode_char(bare[1..4]) + bare[5..-1]
110
- when /^(c|C-).$/
111
- control(bare[-1].ord).chr
112
- when /^M-.$/
113
- meta(bare[-1].ord).chr
114
- when /^(M-\\C-|C-\\M-|M-\\c|c\\M-).$/
115
- meta(control(bare[-1].ord)).chr
123
+ unescape_unicode_char bare
124
+ when /^(c|C-|M-|M-\\C-|C-\\M-|M-\\c|c\\M-).$/
125
+ unescape_meta_control bare
116
126
  when /^[0-7]+/
117
- bare.to_i(8).chr
127
+ unescape_octal bare
118
128
  else
119
129
  bare
120
130
  end
121
131
  end
122
132
 
133
+ def unescape_hex_char(bare)
134
+ hex_to_char(bare[1..-1])
135
+ end
136
+
137
+ def unescape_unicode_char(bare)
138
+ hex_chars = if bare.start_with? "u{"
139
+ bare[2..-2]
140
+ else
141
+ bare[1..4]
142
+ end
143
+ hex_to_unicode_char(hex_chars)
144
+ end
145
+
146
+ def unescape_meta_control(bare)
147
+ base_value = bare[-1].ord
148
+ value = case bare
149
+ when /^(c|C-).$/
150
+ control(base_value)
151
+ when /^M-.$/
152
+ meta(base_value)
153
+ when /^(M-\\C-|C-\\M-|M-\\c|c\\M-).$/
154
+ meta(control(base_value))
155
+ end
156
+ value.chr
157
+ end
158
+
159
+ def unescape_octal(bare)
160
+ bare.to_i(8).chr
161
+ end
162
+
123
163
  def hex_to_unicode_char(str)
124
164
  str.to_i(16).chr(Encoding::UTF_8)
125
165
  end
@@ -135,5 +175,11 @@ module RipperRubyParser
135
175
  def meta(val)
136
176
  val | 0b1000_0000
137
177
  end
178
+
179
+ def delimiter_regexp_pattern(delimiter)
180
+ delimiter = delimiter[-1]
181
+ delimiters = DELIMITER_PAIRS.fetch(delimiter, delimiter)
182
+ delimiters.each_char.map { |it| Regexp.escape it }.join(" | ")
183
+ end
138
184
  end
139
185
  end