sourcify 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. data/.document +5 -0
  2. data/.gitignore +21 -0
  3. data/HISTORY.txt +4 -0
  4. data/LICENSE +20 -0
  5. data/README.rdoc +154 -0
  6. data/Rakefile +114 -0
  7. data/VERSION +1 -0
  8. data/lib/sourcify.rb +12 -0
  9. data/lib/sourcify/proc.rb +53 -0
  10. data/lib/sourcify/proc/counter.rb +41 -0
  11. data/lib/sourcify/proc/lexer.rb +40 -0
  12. data/lib/sourcify/proc/lexer18.rb +224 -0
  13. data/lib/sourcify/proc/lexer19.rb +195 -0
  14. data/lib/sourcify/proc/parser.rb +74 -0
  15. data/sourcify.gemspec +109 -0
  16. data/spec/proc/19x_extras.rb +27 -0
  17. data/spec/proc/readme +5 -0
  18. data/spec/proc/to_sexp_variables_spec.rb +146 -0
  19. data/spec/proc/to_source_from_braced_block_w_nested_braced_block_spec.rb +33 -0
  20. data/spec/proc/to_source_from_braced_block_w_nested_hash_spec.rb +34 -0
  21. data/spec/proc/to_source_from_braced_block_wo_nesting_complication_spec.rb +46 -0
  22. data/spec/proc/to_source_from_do_end_block_w_nested_begin_spec.rb +35 -0
  23. data/spec/proc/to_source_from_do_end_block_w_nested_case_spec.rb +35 -0
  24. data/spec/proc/to_source_from_do_end_block_w_nested_class_spec.rb +89 -0
  25. data/spec/proc/to_source_from_do_end_block_w_nested_do_end_block_spec.rb +33 -0
  26. data/spec/proc/to_source_from_do_end_block_w_nested_for_spec.rb +132 -0
  27. data/spec/proc/to_source_from_do_end_block_w_nested_if_spec.rb +73 -0
  28. data/spec/proc/to_source_from_do_end_block_w_nested_method_spec.rb +33 -0
  29. data/spec/proc/to_source_from_do_end_block_w_nested_module_spec.rb +49 -0
  30. data/spec/proc/to_source_from_do_end_block_w_nested_unless_spec.rb +73 -0
  31. data/spec/proc/to_source_from_do_end_block_w_nested_until_spec.rb +176 -0
  32. data/spec/proc/to_source_from_do_end_block_w_nested_while_spec.rb +176 -0
  33. data/spec/proc/to_source_from_do_end_block_wo_nesting_complication_spec.rb +46 -0
  34. data/spec/proc/to_source_from_multi_blocks_w_many_matches_spec.rb +73 -0
  35. data/spec/proc/to_source_from_multi_blocks_w_single_match_spec.rb +31 -0
  36. data/spec/proc/to_source_from_multi_do_end_blocks_w_single_match_spec.rb +31 -0
  37. data/spec/proc/to_source_magic_file_var_spec.rb +127 -0
  38. data/spec/proc/to_source_magic_line_var_spec.rb +127 -0
  39. data/spec/proc/to_source_variables_spec.rb +29 -0
  40. data/spec/spec_helper.rb +41 -0
  41. metadata +159 -0
@@ -0,0 +1,224 @@
1
+ require 'irb/ruby-lex'
2
+ require 'irb/ruby-token'
3
+
4
+ module Sourcify
5
+ module Proc
6
+
7
+ class Lexer18
8
+
9
+ # Implementation of this class has been inspired by the discussion at
10
+ # http://www.justskins.com/forums/breaking-ruby-code-into-117453.html
11
+
12
+ include Lexer::Commons
13
+
14
+ def initialize(io, file, line)
15
+ @file, @line, @io, @pos = file, line, io, io.pos
16
+ @lex = RubyLex.new
17
+ @lex.set_input(@io)
18
+ @lex.get_readed
19
+ end
20
+
21
+ def lex
22
+ (@tokens = []).extend(Extensions)
23
+ @magic_lines = []
24
+
25
+ while @tk = @lex.token
26
+ tkc = @tk.class.to_s.sub(/\ARubyToken::/, '').downcase.to_sym
27
+ @tokens << [@tk.line_no, @tk.char_no, tkc]
28
+ post_qstring if @qstring_data
29
+ send(:"on_#{tkc}") rescue NoMethodError
30
+ @lex.get_readed if tkc == :tknl
31
+ end
32
+ end
33
+
34
+ def on_tknl
35
+ raise EndOfLine unless @results.empty?
36
+ end
37
+
38
+ def on_tk__line__
39
+ @magic_lines << [@tk.seek, @tk.line_no + @line]
40
+ end
41
+
42
+ def on_tkdstring
43
+ @qstring_data = [@tk.seek, @tk.line_no + @line]
44
+ end
45
+
46
+ alias_method :on_tkstring, :on_tkdstring # heredoc
47
+ alias_method :on_tkdregexp, :on_tkdstring # regexp
48
+ alias_method :on_tkdxstring, :on_tkdstring # ` command
49
+
50
+ def post_qstring
51
+ seek, line = @qstring_data
52
+ @magic_lines << [(seek .. @tk.seek), line]
53
+ @qstring_data = nil
54
+ end
55
+
56
+ def on_tkdo
57
+ if !@do_end_counter.started?
58
+ @do_end_counter.marker = @tk.seek
59
+ @do_end_counter.increment_start
60
+ elsif @tokens.same_as_curr_line.keywords(:tkfor, :tkwhile, :tkuntil).empty?
61
+ # It is possible for a 'for', 'while' or 'until' to have an attached 'do',
62
+ # for such a case, we want to skip it
63
+ @do_end_counter.increment_start
64
+ end
65
+ end
66
+
67
+ def on_tkend
68
+ if @do_end_counter.started? && @do_end_counter.increment_end.telly?
69
+ @result = grab_result_and_reset_lex(@do_end_counter.marker, 3)
70
+ @is_multiline_block = @tokens.multiline?
71
+ raise EndOfBlock
72
+ end
73
+ end
74
+
75
+ def on_tkclass
76
+ # Pretty straightforward for these, each of them will consume an 'end' close it
77
+ @do_end_counter.increment_start if @do_end_counter.started?
78
+ end
79
+
80
+ # These work the same as 'class', the exception is 'for', which can have an optional
81
+ # 'do' attached:
82
+ # * for a in [1,2] do ... end
83
+ # * for a in [1,2] \n ... end
84
+ %w{def module begin case for}.each{|kw| alias_method :"on_tk#{kw}", :on_tkclass }
85
+
86
+ def on_tkwhile
87
+ # This has optional trailing 'do', and can work as a modifier as well, eg:
88
+ # * while true do ... end # => 'do' must be on the same line as 'while'
89
+ # * while true \n ... end
90
+ # * ... while true # => 'while' is pre-pended with non-spaces
91
+ if @do_end_counter.started? && @tokens.start_of_line?
92
+ @do_end_counter.increment_start
93
+ end
94
+ end
95
+
96
+ # These work exactly the same as 'while'.
97
+ %w{until if unless}.each{|kw| alias_method :"on_tk#{kw}", :on_tkwhile }
98
+
99
+ def on_tklbrace
100
+ unless @do_end_counter.started?
101
+ @braced_counter.marker = @tk.seek unless @braced_counter.started?
102
+ @braced_counter.increment_start
103
+ end
104
+ end
105
+
106
+ def on_tkrbrace
107
+ if @braced_counter.started? && @braced_counter.increment_end.telly?
108
+ @result = grab_result_and_reset_lex(@braced_counter.marker, 1)
109
+ @is_multiline_block = @tokens.multiline?
110
+ raise EndOfBlock
111
+ end
112
+ end
113
+
114
+ alias_method :on_tkflbrace, :on_tklbrace
115
+ alias_method :on_tkfrbrace, :on_tkrbrace
116
+
117
+ def on_tkassoc
118
+ if @braced_counter.started? && @braced_counter[:start] == 1
119
+ @braced_counter.decrement_start
120
+ end
121
+ end
122
+
123
+ def on_tkgt
124
+ on_tkassoc if @tokens[-2 .. -1].map(&:last) == [:tkassign, :tkgt]
125
+ end
126
+
127
+ def grab_result_and_reset_lex(marker, offset)
128
+ @io.seek(@pos+marker)
129
+ diff = @tk.seek - marker + offset
130
+ result = replace_magic_lines(@io.read(diff), marker)
131
+ @io.seek(@pos + diff)
132
+ @lex.set_input(@io)
133
+ @lex.get_readed
134
+ result
135
+ end
136
+
137
+ def replace_magic_lines(result, marker, offset = 0)
138
+ @magic_lines.inject(result) do |rs, (pos,val)|
139
+ meth = :"replace_magic_line_by_#{pos.class.to_s.downcase}"
140
+ n_rs = send(meth, rs, marker + offset, pos, val)
141
+ offset = result.length - n_rs.length
142
+ n_rs
143
+ end
144
+ end
145
+
146
+ def replace_magic_line_by_fixnum(rs, offset, pos, val)
147
+ m = rs.match(/^(.{#{pos - offset}})__LINE__(.*)$/m)
148
+ m[1] + val.pred.to_s + m[2]
149
+ end
150
+
151
+ def replace_magic_line_by_range(rs, offset, pos, lineno)
152
+ @io.seek(@pos + pos.begin)
153
+ subject = @io.read(pos.end - pos.begin)
154
+ return rs if %w{' %q %w}.any?{|q| subject.start_with?(q) }
155
+ prepend, append = rs.match(/^(.*?)#{Regexp.quote(subject)}(.*)$/m)[1..2]
156
+ middle = subject.split("\n").each_with_index do |line, i|
157
+ line.gsub!(pattern = /(.*?\#\{)__LINE__(\})/) do |s|
158
+ (m = s.match(pattern)[1..2])[0].end_with?('\\#{') ?
159
+ s : m.insert(1, (lineno + i).pred.to_s).join
160
+ end
161
+ end.join("\n")
162
+ prepend + middle + append
163
+ end
164
+
165
+ # Ease working with the hybrid token set collected from RubyLex
166
+ module Extensions
167
+
168
+ ROW, COL, TYP = 0, 1, 2
169
+
170
+ def same_as_curr_line
171
+ same_line(curr_line)
172
+ end
173
+
174
+ def multiline?
175
+ self[0][ROW] != self[-1][ROW]
176
+ end
177
+
178
+ def curr_line
179
+ curr[ROW]
180
+ end
181
+
182
+ def curr
183
+ self[-1]
184
+ end
185
+
186
+ def same_line(line)
187
+ (
188
+ # ignore the current node
189
+ self[0..-2].reverse.take_while do |e|
190
+ if e[TYP] == :tsemi
191
+ false
192
+ elsif e[ROW] == line
193
+ true
194
+ elsif e[ROW] == line.pred && e[TYP] != :tknl
195
+ line -= 1
196
+ true
197
+ end
198
+ end.reverse
199
+ ).extend(Extensions)
200
+ end
201
+
202
+ def keywords(*types)
203
+ (
204
+ types = [types].flatten
205
+ select{|e| types.include?(e[TYP]) }
206
+ ).extend(Extensions)
207
+ end
208
+
209
+ def non_spaces(*types)
210
+ (
211
+ types = [types].flatten
212
+ reject{|e| types.empty? or types.include?(e[TYP]) }
213
+ ).extend(Extensions)
214
+ end
215
+
216
+ def start_of_line?
217
+ same_as_curr_line.non_spaces.empty?
218
+ end
219
+
220
+ end
221
+
222
+ end
223
+ end
224
+ end
@@ -0,0 +1,195 @@
1
+ require 'ripper'
2
+
3
+ module Sourcify
4
+ module Proc
5
+ class Lexer19 < Ripper::Lexer
6
+
7
+ include Lexer::Commons
8
+
9
+ def on_nl(token)
10
+ super.tap do |rs|
11
+ raise EndOfLine unless @results.empty?
12
+ end
13
+ end
14
+
15
+ alias_method :on_ignored_nl, :on_nl
16
+
17
+ def on_kw(token)
18
+ super.tap do |rs|
19
+ send(:"on_kw_#{token}", rs) rescue NoMethodError
20
+ end
21
+ end
22
+
23
+ def on_kw_class(rs)
24
+ # Pretty straightforward for these, each of them will consume an 'end' close it
25
+ @do_end_counter.increment_start if @do_end_counter.started?
26
+ end
27
+
28
+ # These work the same as 'class', the exception is 'for', which can have an optional
29
+ # 'do' attached:
30
+ # * for a in [1,2] do ... end
31
+ # * for a in [1,2] \n ... end
32
+ %w{def module begin case for}.each{|kw| alias_method :"on_kw_#{kw}", :on_kw_class }
33
+
34
+ def on_kw_while(rs)
35
+ # This has optional trailing 'do', and can work as a modifier as well, eg:
36
+ # * while true do ... end # => 'do' must be on the same line as 'while'
37
+ # * while true \n ... end
38
+ # * ... while true # => 'while' is pre-pended with non-spaces
39
+ if @do_end_counter.started? && (rs.start_of_line? or rs.within_block?)
40
+ @do_end_counter.increment_start
41
+ end
42
+ end
43
+
44
+ # These work exactly the same as 'while'.
45
+ %w{until if unless}.each{|kw| alias_method :"on_kw_#{kw}", :on_kw_while }
46
+
47
+ def on_kw_do(rs)
48
+ if !@do_end_counter.started?
49
+ rs.extend(Extensions) unless rs.respond_to?(:curr)
50
+ @do_end_counter.marker = rs.curr
51
+ @do_end_counter.increment_start
52
+ elsif rs.same_as_curr_line.keywords(%w{for while until}).empty?
53
+ # It is possible for a 'for', 'while' or 'until' to have an attached 'do',
54
+ # for such a case, we want to skip it
55
+ @do_end_counter.increment_start
56
+ end
57
+ end
58
+
59
+ def on_kw_end(rs)
60
+ if @do_end_counter.started? && @do_end_counter.increment_end.telly?
61
+ @result = rs.to_code(@do_end_counter.marker)
62
+ @is_multiline_block = rs.multiline?
63
+ raise EndOfBlock
64
+ end
65
+ end
66
+
67
+ def on_lbrace(token)
68
+ super.tap do |rs|
69
+ unless @do_end_counter.started?
70
+ rs.extend(Extensions) unless rs.respond_to?(:curr)
71
+ @braced_counter.marker = rs.curr unless @braced_counter.started?
72
+ @braced_counter.increment_start
73
+ end
74
+ end
75
+ end
76
+
77
+ def on_rbrace(token)
78
+ super.tap do |rs|
79
+ if @braced_counter.started? && @braced_counter.increment_end.telly?
80
+ @result = rs.to_code(@braced_counter.marker)
81
+ @is_multiline_block = rs.multiline?
82
+ raise EndOfBlock
83
+ end
84
+ end
85
+ end
86
+
87
+ def on_embexpr_beg(token)
88
+ super.tap do |rs|
89
+ @braced_counter.increment_start if @braced_counter.started?
90
+ end
91
+ end
92
+
93
+ def on_op(token)
94
+ super.tap do |rs|
95
+ if @braced_counter.started? && token == '=>' && @braced_counter[:start] == 1
96
+ @braced_counter.decrement_start
97
+ end
98
+ end
99
+ end
100
+
101
+ def on_label(token)
102
+ super.tap do |rs|
103
+ if @braced_counter.started? && @braced_counter[:start] == 1
104
+ @braced_counter.decrement_start
105
+ end
106
+ end
107
+ end
108
+
109
+ # Ease working with the result set generated by Ripper
110
+ module Extensions
111
+
112
+ POS, TYP, VAL = 0, 1, 2
113
+ ROW, COL= 0, 1
114
+
115
+ def same_as_curr_line
116
+ same_line(curr_line)
117
+ end
118
+
119
+ def multiline?
120
+ self[0][POS][ROW] != self[-1][POS][ROW]
121
+ end
122
+
123
+ def curr_line
124
+ curr[POS][ROW]
125
+ end
126
+
127
+ def curr
128
+ self[-1]
129
+ end
130
+
131
+ def same_line(line)
132
+ (
133
+ # ignore the current node
134
+ self[0..-2].reverse.take_while do |e|
135
+ if e[TYP] == :on_semicolon && e[VAL] == ';'
136
+ false
137
+ elsif e[POS][ROW] == line
138
+ true
139
+ elsif e[TYP] == :on_sp && e[VAL] == "\\\n"
140
+ line -= 1
141
+ true
142
+ end
143
+ end.reverse
144
+ ).extend(Extensions)
145
+ end
146
+
147
+ def keywords(*types)
148
+ (
149
+ types = [types].flatten.map(&:to_s)
150
+ select{|e| e[TYP] == :on_kw && (types.empty? or types.include?(e[VAL])) }
151
+ ).extend(Extensions)
152
+ end
153
+
154
+ def non_spaces(*types)
155
+ (
156
+ types = [types].flatten
157
+ reject{|e| e[TYP] == :on_sp && (types.empty? or types.include?(e[VAL])) }
158
+ ).extend(Extensions)
159
+ end
160
+
161
+ def start_of_line?
162
+ same_as_curr_line.non_spaces.empty?
163
+ end
164
+
165
+ def within_block?
166
+ same_as_curr_line.non_spaces[-1][TYP] == :on_lparen
167
+ end
168
+
169
+ def to_code(marker)
170
+ heredoc_beg = false # fixing mysteriously missing newline after :on_heredoc_begin
171
+ self[index(marker) .. -1].map do |e|
172
+ if e[TYP] == :on_heredoc_beg
173
+ heredoc_beg = true
174
+ e[VAL]
175
+ elsif heredoc_beg && e[TYP] != :on_nl
176
+ heredoc_beg = false
177
+ "\n" + e[VAL]
178
+ else
179
+ heredoc_beg = false
180
+ if e[TYP] == :on_label
181
+ ':%s => ' % e[VAL][0..-2]
182
+ elsif e[TYP] == :on_kw && e[VAL] == '__LINE__'
183
+ e[POS][ROW]
184
+ else
185
+ e[VAL]
186
+ end
187
+ end
188
+ end.join
189
+ end
190
+
191
+ end
192
+
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,74 @@
1
+ require 'sourcify/proc/lexer'
2
+ require 'sourcify/proc/counter'
3
+
4
+ module Sourcify
5
+ module Proc
6
+ class Parser
7
+
8
+ RUBY_PARSER = RubyParser.new
9
+ RUBY_2_RUBY = Ruby2Ruby.new
10
+
11
+ def initialize(_proc)
12
+ @binding, @arity = _proc.binding, _proc.arity
13
+ @file, @line = _proc.source_location
14
+ end
15
+
16
+ def source
17
+ RUBY_2_RUBY.process(sexp)
18
+ end
19
+
20
+ def sexp
21
+ @sexp ||= (
22
+ raw_sexp = RUBY_PARSER.parse(raw_source, @file)
23
+ Sexp.from_array(replace_with_lvars(raw_sexp.to_a))
24
+ )
25
+ end
26
+
27
+ private
28
+
29
+ def raw_source
30
+ @raw_source ||= (
31
+ frags = Sourcify::Proc::Lexer.new(raw_source_io, @file, @line).work.
32
+ select{|frag| eval('proc ' + frag).arity == @arity }
33
+ raise MultipleMatchingProcsPerLineError if frags.size > 1
34
+ 'proc %s' % frags[0]
35
+ )
36
+ end
37
+
38
+ def raw_source_io
39
+ File.open(@file, 'r') do |fh|
40
+ fh.extend(File::Tail).forward(@line.pred)
41
+ StringIO.new(fh.readlines.join, 'r')
42
+ end
43
+ end
44
+
45
+ def replace_with_lvars(array)
46
+ return array if [:class, :sclass, :defn, :module].include?(array[0])
47
+ array.map do |e|
48
+ if e.is_a?(Array)
49
+ no_arg_method_call_or_lvar(e) or replace_with_lvars(e)
50
+ else
51
+ e
52
+ end
53
+ end
54
+ end
55
+
56
+ def no_arg_method_call_or_lvar(e)
57
+ if represents_no_arg_call?(e)
58
+ has_as_local_var?(var = e[2]) ? [:lvar, var] : e
59
+ end
60
+ end
61
+
62
+ def represents_no_arg_call?(e)
63
+ e.size == 4 && e[0..1] == [:call, nil] &&
64
+ e[3] == [:arglist] && (var = e[2]).is_a?(Symbol)
65
+ end
66
+
67
+ def has_as_local_var?(var)
68
+ qvar = (@q ||= (RUBY_VERSION.include?('1.9.') ? ":%s" : "'%s'")) % var
69
+ @binding.eval("local_variables.include?(#{qvar})")
70
+ end
71
+
72
+ end
73
+ end
74
+ end