sourcify 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. data/.document +5 -0
  2. data/.gitignore +21 -0
  3. data/HISTORY.txt +4 -0
  4. data/LICENSE +20 -0
  5. data/README.rdoc +154 -0
  6. data/Rakefile +114 -0
  7. data/VERSION +1 -0
  8. data/lib/sourcify.rb +12 -0
  9. data/lib/sourcify/proc.rb +53 -0
  10. data/lib/sourcify/proc/counter.rb +41 -0
  11. data/lib/sourcify/proc/lexer.rb +40 -0
  12. data/lib/sourcify/proc/lexer18.rb +224 -0
  13. data/lib/sourcify/proc/lexer19.rb +195 -0
  14. data/lib/sourcify/proc/parser.rb +74 -0
  15. data/sourcify.gemspec +109 -0
  16. data/spec/proc/19x_extras.rb +27 -0
  17. data/spec/proc/readme +5 -0
  18. data/spec/proc/to_sexp_variables_spec.rb +146 -0
  19. data/spec/proc/to_source_from_braced_block_w_nested_braced_block_spec.rb +33 -0
  20. data/spec/proc/to_source_from_braced_block_w_nested_hash_spec.rb +34 -0
  21. data/spec/proc/to_source_from_braced_block_wo_nesting_complication_spec.rb +46 -0
  22. data/spec/proc/to_source_from_do_end_block_w_nested_begin_spec.rb +35 -0
  23. data/spec/proc/to_source_from_do_end_block_w_nested_case_spec.rb +35 -0
  24. data/spec/proc/to_source_from_do_end_block_w_nested_class_spec.rb +89 -0
  25. data/spec/proc/to_source_from_do_end_block_w_nested_do_end_block_spec.rb +33 -0
  26. data/spec/proc/to_source_from_do_end_block_w_nested_for_spec.rb +132 -0
  27. data/spec/proc/to_source_from_do_end_block_w_nested_if_spec.rb +73 -0
  28. data/spec/proc/to_source_from_do_end_block_w_nested_method_spec.rb +33 -0
  29. data/spec/proc/to_source_from_do_end_block_w_nested_module_spec.rb +49 -0
  30. data/spec/proc/to_source_from_do_end_block_w_nested_unless_spec.rb +73 -0
  31. data/spec/proc/to_source_from_do_end_block_w_nested_until_spec.rb +176 -0
  32. data/spec/proc/to_source_from_do_end_block_w_nested_while_spec.rb +176 -0
  33. data/spec/proc/to_source_from_do_end_block_wo_nesting_complication_spec.rb +46 -0
  34. data/spec/proc/to_source_from_multi_blocks_w_many_matches_spec.rb +73 -0
  35. data/spec/proc/to_source_from_multi_blocks_w_single_match_spec.rb +31 -0
  36. data/spec/proc/to_source_from_multi_do_end_blocks_w_single_match_spec.rb +31 -0
  37. data/spec/proc/to_source_magic_file_var_spec.rb +127 -0
  38. data/spec/proc/to_source_magic_line_var_spec.rb +127 -0
  39. data/spec/proc/to_source_variables_spec.rb +29 -0
  40. data/spec/spec_helper.rb +41 -0
  41. metadata +159 -0
@@ -0,0 +1,224 @@
1
+ require 'irb/ruby-lex'
2
+ require 'irb/ruby-token'
3
+
4
+ module Sourcify
5
+ module Proc
6
+
7
+ class Lexer18
8
+
9
+ # Implementation of this class has been inspired by the discussion at
10
+ # http://www.justskins.com/forums/breaking-ruby-code-into-117453.html
11
+
12
+ include Lexer::Commons
13
+
14
+ def initialize(io, file, line)
15
+ @file, @line, @io, @pos = file, line, io, io.pos
16
+ @lex = RubyLex.new
17
+ @lex.set_input(@io)
18
+ @lex.get_readed
19
+ end
20
+
21
+ def lex
22
+ (@tokens = []).extend(Extensions)
23
+ @magic_lines = []
24
+
25
+ while @tk = @lex.token
26
+ tkc = @tk.class.to_s.sub(/\ARubyToken::/, '').downcase.to_sym
27
+ @tokens << [@tk.line_no, @tk.char_no, tkc]
28
+ post_qstring if @qstring_data
29
+ send(:"on_#{tkc}") rescue NoMethodError
30
+ @lex.get_readed if tkc == :tknl
31
+ end
32
+ end
33
+
34
+ def on_tknl
35
+ raise EndOfLine unless @results.empty?
36
+ end
37
+
38
+ def on_tk__line__
39
+ @magic_lines << [@tk.seek, @tk.line_no + @line]
40
+ end
41
+
42
+ def on_tkdstring
43
+ @qstring_data = [@tk.seek, @tk.line_no + @line]
44
+ end
45
+
46
+ alias_method :on_tkstring, :on_tkdstring # heredoc
47
+ alias_method :on_tkdregexp, :on_tkdstring # regexp
48
+ alias_method :on_tkdxstring, :on_tkdstring # ` command
49
+
50
+ def post_qstring
51
+ seek, line = @qstring_data
52
+ @magic_lines << [(seek .. @tk.seek), line]
53
+ @qstring_data = nil
54
+ end
55
+
56
+ def on_tkdo
57
+ if !@do_end_counter.started?
58
+ @do_end_counter.marker = @tk.seek
59
+ @do_end_counter.increment_start
60
+ elsif @tokens.same_as_curr_line.keywords(:tkfor, :tkwhile, :tkuntil).empty?
61
+ # It is possible for a 'for', 'while' or 'until' to have an attached 'do',
62
+ # for such a case, we want to skip it
63
+ @do_end_counter.increment_start
64
+ end
65
+ end
66
+
67
+ def on_tkend
68
+ if @do_end_counter.started? && @do_end_counter.increment_end.telly?
69
+ @result = grab_result_and_reset_lex(@do_end_counter.marker, 3)
70
+ @is_multiline_block = @tokens.multiline?
71
+ raise EndOfBlock
72
+ end
73
+ end
74
+
75
+ def on_tkclass
76
+ # Pretty straightforward for these, each of them will consume an 'end' close it
77
+ @do_end_counter.increment_start if @do_end_counter.started?
78
+ end
79
+
80
+ # These work the same as 'class', the exception is 'for', which can have an optional
81
+ # 'do' attached:
82
+ # * for a in [1,2] do ... end
83
+ # * for a in [1,2] \n ... end
84
+ %w{def module begin case for}.each{|kw| alias_method :"on_tk#{kw}", :on_tkclass }
85
+
86
+ def on_tkwhile
87
+ # This has optional trailing 'do', and can work as a modifier as well, eg:
88
+ # * while true do ... end # => 'do' must be on the same line as 'while'
89
+ # * while true \n ... end
90
+ # * ... while true # => 'while' is pre-pended with non-spaces
91
+ if @do_end_counter.started? && @tokens.start_of_line?
92
+ @do_end_counter.increment_start
93
+ end
94
+ end
95
+
96
+ # These work exactly the same as 'while'.
97
+ %w{until if unless}.each{|kw| alias_method :"on_tk#{kw}", :on_tkwhile }
98
+
99
+ def on_tklbrace
100
+ unless @do_end_counter.started?
101
+ @braced_counter.marker = @tk.seek unless @braced_counter.started?
102
+ @braced_counter.increment_start
103
+ end
104
+ end
105
+
106
+ def on_tkrbrace
107
+ if @braced_counter.started? && @braced_counter.increment_end.telly?
108
+ @result = grab_result_and_reset_lex(@braced_counter.marker, 1)
109
+ @is_multiline_block = @tokens.multiline?
110
+ raise EndOfBlock
111
+ end
112
+ end
113
+
114
+ alias_method :on_tkflbrace, :on_tklbrace
115
+ alias_method :on_tkfrbrace, :on_tkrbrace
116
+
117
+ def on_tkassoc
118
+ if @braced_counter.started? && @braced_counter[:start] == 1
119
+ @braced_counter.decrement_start
120
+ end
121
+ end
122
+
123
+ def on_tkgt
124
+ on_tkassoc if @tokens[-2 .. -1].map(&:last) == [:tkassign, :tkgt]
125
+ end
126
+
127
+ def grab_result_and_reset_lex(marker, offset)
128
+ @io.seek(@pos+marker)
129
+ diff = @tk.seek - marker + offset
130
+ result = replace_magic_lines(@io.read(diff), marker)
131
+ @io.seek(@pos + diff)
132
+ @lex.set_input(@io)
133
+ @lex.get_readed
134
+ result
135
+ end
136
+
137
+ def replace_magic_lines(result, marker, offset = 0)
138
+ @magic_lines.inject(result) do |rs, (pos,val)|
139
+ meth = :"replace_magic_line_by_#{pos.class.to_s.downcase}"
140
+ n_rs = send(meth, rs, marker + offset, pos, val)
141
+ offset = result.length - n_rs.length
142
+ n_rs
143
+ end
144
+ end
145
+
146
+ def replace_magic_line_by_fixnum(rs, offset, pos, val)
147
+ m = rs.match(/^(.{#{pos - offset}})__LINE__(.*)$/m)
148
+ m[1] + val.pred.to_s + m[2]
149
+ end
150
+
151
+ def replace_magic_line_by_range(rs, offset, pos, lineno)
152
+ @io.seek(@pos + pos.begin)
153
+ subject = @io.read(pos.end - pos.begin)
154
+ return rs if %w{' %q %w}.any?{|q| subject.start_with?(q) }
155
+ prepend, append = rs.match(/^(.*?)#{Regexp.quote(subject)}(.*)$/m)[1..2]
156
+ middle = subject.split("\n").each_with_index do |line, i|
157
+ line.gsub!(pattern = /(.*?\#\{)__LINE__(\})/) do |s|
158
+ (m = s.match(pattern)[1..2])[0].end_with?('\\#{') ?
159
+ s : m.insert(1, (lineno + i).pred.to_s).join
160
+ end
161
+ end.join("\n")
162
+ prepend + middle + append
163
+ end
164
+
165
+ # Ease working with the hybrid token set collected from RubyLex
166
+ module Extensions
167
+
168
+ ROW, COL, TYP = 0, 1, 2
169
+
170
+ def same_as_curr_line
171
+ same_line(curr_line)
172
+ end
173
+
174
+ def multiline?
175
+ self[0][ROW] != self[-1][ROW]
176
+ end
177
+
178
+ def curr_line
179
+ curr[ROW]
180
+ end
181
+
182
+ def curr
183
+ self[-1]
184
+ end
185
+
186
+ def same_line(line)
187
+ (
188
+ # ignore the current node
189
+ self[0..-2].reverse.take_while do |e|
190
+ if e[TYP] == :tsemi
191
+ false
192
+ elsif e[ROW] == line
193
+ true
194
+ elsif e[ROW] == line.pred && e[TYP] != :tknl
195
+ line -= 1
196
+ true
197
+ end
198
+ end.reverse
199
+ ).extend(Extensions)
200
+ end
201
+
202
+ def keywords(*types)
203
+ (
204
+ types = [types].flatten
205
+ select{|e| types.include?(e[TYP]) }
206
+ ).extend(Extensions)
207
+ end
208
+
209
+ def non_spaces(*types)
210
+ (
211
+ types = [types].flatten
212
+ reject{|e| types.empty? or types.include?(e[TYP]) }
213
+ ).extend(Extensions)
214
+ end
215
+
216
+ def start_of_line?
217
+ same_as_curr_line.non_spaces.empty?
218
+ end
219
+
220
+ end
221
+
222
+ end
223
+ end
224
+ end
@@ -0,0 +1,195 @@
1
+ require 'ripper'
2
+
3
+ module Sourcify
4
+ module Proc
5
+ class Lexer19 < Ripper::Lexer
6
+
7
+ include Lexer::Commons
8
+
9
+ def on_nl(token)
10
+ super.tap do |rs|
11
+ raise EndOfLine unless @results.empty?
12
+ end
13
+ end
14
+
15
+ alias_method :on_ignored_nl, :on_nl
16
+
17
+ def on_kw(token)
18
+ super.tap do |rs|
19
+ send(:"on_kw_#{token}", rs) rescue NoMethodError
20
+ end
21
+ end
22
+
23
+ def on_kw_class(rs)
24
+ # Pretty straightforward for these, each of them will consume an 'end' close it
25
+ @do_end_counter.increment_start if @do_end_counter.started?
26
+ end
27
+
28
+ # These work the same as 'class', the exception is 'for', which can have an optional
29
+ # 'do' attached:
30
+ # * for a in [1,2] do ... end
31
+ # * for a in [1,2] \n ... end
32
+ %w{def module begin case for}.each{|kw| alias_method :"on_kw_#{kw}", :on_kw_class }
33
+
34
+ def on_kw_while(rs)
35
+ # This has optional trailing 'do', and can work as a modifier as well, eg:
36
+ # * while true do ... end # => 'do' must be on the same line as 'while'
37
+ # * while true \n ... end
38
+ # * ... while true # => 'while' is pre-pended with non-spaces
39
+ if @do_end_counter.started? && (rs.start_of_line? or rs.within_block?)
40
+ @do_end_counter.increment_start
41
+ end
42
+ end
43
+
44
+ # These work exactly the same as 'while'.
45
+ %w{until if unless}.each{|kw| alias_method :"on_kw_#{kw}", :on_kw_while }
46
+
47
+ def on_kw_do(rs)
48
+ if !@do_end_counter.started?
49
+ rs.extend(Extensions) unless rs.respond_to?(:curr)
50
+ @do_end_counter.marker = rs.curr
51
+ @do_end_counter.increment_start
52
+ elsif rs.same_as_curr_line.keywords(%w{for while until}).empty?
53
+ # It is possible for a 'for', 'while' or 'until' to have an attached 'do',
54
+ # for such a case, we want to skip it
55
+ @do_end_counter.increment_start
56
+ end
57
+ end
58
+
59
+ def on_kw_end(rs)
60
+ if @do_end_counter.started? && @do_end_counter.increment_end.telly?
61
+ @result = rs.to_code(@do_end_counter.marker)
62
+ @is_multiline_block = rs.multiline?
63
+ raise EndOfBlock
64
+ end
65
+ end
66
+
67
+ def on_lbrace(token)
68
+ super.tap do |rs|
69
+ unless @do_end_counter.started?
70
+ rs.extend(Extensions) unless rs.respond_to?(:curr)
71
+ @braced_counter.marker = rs.curr unless @braced_counter.started?
72
+ @braced_counter.increment_start
73
+ end
74
+ end
75
+ end
76
+
77
+ def on_rbrace(token)
78
+ super.tap do |rs|
79
+ if @braced_counter.started? && @braced_counter.increment_end.telly?
80
+ @result = rs.to_code(@braced_counter.marker)
81
+ @is_multiline_block = rs.multiline?
82
+ raise EndOfBlock
83
+ end
84
+ end
85
+ end
86
+
87
+ def on_embexpr_beg(token)
88
+ super.tap do |rs|
89
+ @braced_counter.increment_start if @braced_counter.started?
90
+ end
91
+ end
92
+
93
+ def on_op(token)
94
+ super.tap do |rs|
95
+ if @braced_counter.started? && token == '=>' && @braced_counter[:start] == 1
96
+ @braced_counter.decrement_start
97
+ end
98
+ end
99
+ end
100
+
101
+ def on_label(token)
102
+ super.tap do |rs|
103
+ if @braced_counter.started? && @braced_counter[:start] == 1
104
+ @braced_counter.decrement_start
105
+ end
106
+ end
107
+ end
108
+
109
+ # Ease working with the result set generated by Ripper
110
+ module Extensions
111
+
112
+ POS, TYP, VAL = 0, 1, 2
113
+ ROW, COL= 0, 1
114
+
115
+ def same_as_curr_line
116
+ same_line(curr_line)
117
+ end
118
+
119
+ def multiline?
120
+ self[0][POS][ROW] != self[-1][POS][ROW]
121
+ end
122
+
123
+ def curr_line
124
+ curr[POS][ROW]
125
+ end
126
+
127
+ def curr
128
+ self[-1]
129
+ end
130
+
131
+ def same_line(line)
132
+ (
133
+ # ignore the current node
134
+ self[0..-2].reverse.take_while do |e|
135
+ if e[TYP] == :on_semicolon && e[VAL] == ';'
136
+ false
137
+ elsif e[POS][ROW] == line
138
+ true
139
+ elsif e[TYP] == :on_sp && e[VAL] == "\\\n"
140
+ line -= 1
141
+ true
142
+ end
143
+ end.reverse
144
+ ).extend(Extensions)
145
+ end
146
+
147
+ def keywords(*types)
148
+ (
149
+ types = [types].flatten.map(&:to_s)
150
+ select{|e| e[TYP] == :on_kw && (types.empty? or types.include?(e[VAL])) }
151
+ ).extend(Extensions)
152
+ end
153
+
154
+ def non_spaces(*types)
155
+ (
156
+ types = [types].flatten
157
+ reject{|e| e[TYP] == :on_sp && (types.empty? or types.include?(e[VAL])) }
158
+ ).extend(Extensions)
159
+ end
160
+
161
+ def start_of_line?
162
+ same_as_curr_line.non_spaces.empty?
163
+ end
164
+
165
+ def within_block?
166
+ same_as_curr_line.non_spaces[-1][TYP] == :on_lparen
167
+ end
168
+
169
+ def to_code(marker)
170
+ heredoc_beg = false # fixing mysteriously missing newline after :on_heredoc_begin
171
+ self[index(marker) .. -1].map do |e|
172
+ if e[TYP] == :on_heredoc_beg
173
+ heredoc_beg = true
174
+ e[VAL]
175
+ elsif heredoc_beg && e[TYP] != :on_nl
176
+ heredoc_beg = false
177
+ "\n" + e[VAL]
178
+ else
179
+ heredoc_beg = false
180
+ if e[TYP] == :on_label
181
+ ':%s => ' % e[VAL][0..-2]
182
+ elsif e[TYP] == :on_kw && e[VAL] == '__LINE__'
183
+ e[POS][ROW]
184
+ else
185
+ e[VAL]
186
+ end
187
+ end
188
+ end.join
189
+ end
190
+
191
+ end
192
+
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,74 @@
1
+ require 'sourcify/proc/lexer'
2
+ require 'sourcify/proc/counter'
3
+
4
+ module Sourcify
5
+ module Proc
6
+ class Parser
7
+
8
+ RUBY_PARSER = RubyParser.new
9
+ RUBY_2_RUBY = Ruby2Ruby.new
10
+
11
+ def initialize(_proc)
12
+ @binding, @arity = _proc.binding, _proc.arity
13
+ @file, @line = _proc.source_location
14
+ end
15
+
16
+ def source
17
+ RUBY_2_RUBY.process(sexp)
18
+ end
19
+
20
+ def sexp
21
+ @sexp ||= (
22
+ raw_sexp = RUBY_PARSER.parse(raw_source, @file)
23
+ Sexp.from_array(replace_with_lvars(raw_sexp.to_a))
24
+ )
25
+ end
26
+
27
+ private
28
+
29
+ def raw_source
30
+ @raw_source ||= (
31
+ frags = Sourcify::Proc::Lexer.new(raw_source_io, @file, @line).work.
32
+ select{|frag| eval('proc ' + frag).arity == @arity }
33
+ raise MultipleMatchingProcsPerLineError if frags.size > 1
34
+ 'proc %s' % frags[0]
35
+ )
36
+ end
37
+
38
+ def raw_source_io
39
+ File.open(@file, 'r') do |fh|
40
+ fh.extend(File::Tail).forward(@line.pred)
41
+ StringIO.new(fh.readlines.join, 'r')
42
+ end
43
+ end
44
+
45
+ def replace_with_lvars(array)
46
+ return array if [:class, :sclass, :defn, :module].include?(array[0])
47
+ array.map do |e|
48
+ if e.is_a?(Array)
49
+ no_arg_method_call_or_lvar(e) or replace_with_lvars(e)
50
+ else
51
+ e
52
+ end
53
+ end
54
+ end
55
+
56
+ def no_arg_method_call_or_lvar(e)
57
+ if represents_no_arg_call?(e)
58
+ has_as_local_var?(var = e[2]) ? [:lvar, var] : e
59
+ end
60
+ end
61
+
62
+ def represents_no_arg_call?(e)
63
+ e.size == 4 && e[0..1] == [:call, nil] &&
64
+ e[3] == [:arglist] && (var = e[2]).is_a?(Symbol)
65
+ end
66
+
67
+ def has_as_local_var?(var)
68
+ qvar = (@q ||= (RUBY_VERSION.include?('1.9.') ? ":%s" : "'%s'")) % var
69
+ @binding.eval("local_variables.include?(#{qvar})")
70
+ end
71
+
72
+ end
73
+ end
74
+ end