comment-ripper 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :gemcutter
2
+
3
+ gem "rspec", :group => :test
data/Rakefile CHANGED
@@ -10,12 +10,7 @@ begin
10
10
  gem.email = "nanodeath@gmail.com"
11
11
  gem.homepage = "http://github.com/nanodeath/comment-ripper"
12
12
  gem.authors = ["Max Aller"]
13
- gem.add_dependency "treetop", ">= 1.4"
14
13
  gem.add_development_dependency "rspec", ">= 1.2.9"
15
-
16
- # This doesn't actually build a native extension, but eh, it works.
17
- gem.extensions << "Rakefile"
18
- gem.files = Dir['lib/grammars/**/*.tt']
19
14
  end
20
15
  Jeweler::GemcutterTasks.new
21
16
  rescue LoadError
@@ -36,20 +31,7 @@ end
36
31
 
37
32
  task :spec => :check_dependencies
38
33
 
39
- task :default do
40
- # Build grammars
41
- generated_directory = "lib/grammars/generated_parsers"
42
- mkdir_p File.join(File.dirname(__FILE__), generated_directory)
43
-
44
- grammars = Dir[File.expand_path(File.dirname(__FILE__) + "/lib/grammars/*.tt")]
45
- puts "Building #{grammars.length} grammar parsers..."
46
- grammars.each do |g|
47
- outfile = File.join(File.dirname(__FILE__), generated_directory, File.basename(g, ".tt") + ".rb")
48
- print "Processing grammar #{File.basename(g)}"
49
- `tt #{g} -o #{outfile}`
50
- puts "...Success!"
51
- end
52
- end
34
+ task :default => :spec
53
35
 
54
36
  require 'rake/rdoctask'
55
37
  Rake::RDocTask.new do |rdoc|
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,40 @@
1
+
2
+ *** These modified files were found in JOE when it aborted on Sun Mar 28 22:54:36 2010
3
+ *** JOE was aborted because the terminal closed
4
+
5
+ *** File '(Unnamed)'
6
+ markaby
7
+ Markaby
8
+ slate
9
+ Bad
10
+ Indifferent
11
+ Good
12
+ act[feeling]
13
+ span-6
14
+ span-4
15
+ column
16
+
17
+ *** File '(Unnamed)'
18
+ easy_pool
19
+ @stubs
20
+ reset
21
+ reset
22
+ response
23
+ timed
24
+ primary
25
+ $debug
26
+ OneLine
27
+ file
28
+
29
+ *** File '(Unnamed)'
30
+ rip-comments
31
+ rip-comments
32
+ rip-comments
33
+ rip-comments
34
+ bin/rip-comments
35
+ bin/rip-comments
36
+ bin/rip-comments
37
+ bin/rip-comments
38
+ bin/rip-comments
39
+ rip-comments
40
+ rip-comments
@@ -12,20 +12,20 @@ end
12
12
 
13
13
  Bundler.require
14
14
 
15
+ $:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')))
16
+
15
17
  require 'optparse'
16
18
 
17
19
  options = {}
18
20
 
19
- $:.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib/grammars/generated_parsers"))
20
-
21
21
  class Loader
22
- def self.load(filename, options={})
22
+ def load(filename, options={})
23
23
  raise "self.load on #{self.class} is abstract"
24
24
  end
25
25
  end
26
26
 
27
27
  class UnsupportedLoader < Loader
28
- def self.load(filename, options={})
28
+ def load(filename, options={})
29
29
  raise "File extension #{File.extname(filename)} is not supported"
30
30
  end
31
31
  end
@@ -35,26 +35,23 @@ class JavascriptLoader < Loader
35
35
  include Singleton
36
36
 
37
37
  def initialize
38
- require 'javascript'
39
- @parser = JavascriptParser.new
38
+ require 'javascript/comment_stripper'
39
+ @parser = CommentRipper::Javascript.new
40
40
  end
41
41
 
42
42
  def load(filename, options={})
43
- require 'javascript'
44
43
  File.open(filename) do |file|
45
44
  input = file.read
46
- output = @parser.parse(input)
47
- if(output.nil?)
48
- $stderr.puts output.failure_reason
49
- elsif(!options.nil? && options.has_key?(:outfile))
45
+ output = @parser.strip(input)
46
+ if(!options.nil? && options.has_key?(:outfile))
50
47
  outfile = options[:outfile]
51
48
  if(outfile != filename)
52
- File.open(outfile, 'w') {|f| f.write(output.value) }
49
+ File.open(outfile, 'w') {|f| f.write(output) }
53
50
  else
54
51
  $stderr.puts("Infile and outfile are the same (#{outfile}), and won't overwrite without force flag.")
55
52
  end
56
53
  else
57
- puts output.value
54
+ puts output
58
55
  end
59
56
  end
60
57
  end
@@ -77,11 +74,13 @@ end
77
74
  optparse.parse!
78
75
 
79
76
  if(options[:input_files].nil? ^ options[:output_files].nil?)
80
- raise "Can't specify input files or output files and not the other"
77
+ raise "Can't specify one of input files or output files and not the other"
81
78
  end
82
79
 
83
80
  if(!options[:input_files].nil?)
84
- raise "Output list must have the same count as input list (#{options[:input_files].length} inputs, #{options[:output_files].length} outputs)" if options[:input_files].length != options[:output_files].length
81
+ if options[:input_files].length != options[:output_files].length
82
+ raise "Output list must have the same count as input list (#{options[:input_files].length} inputs, #{options[:output_files].length} outputs)"
83
+ end
85
84
  until options[:input_files].empty?
86
85
  infile = options[:input_files].shift
87
86
  outfile = options[:output_files].shift
@@ -89,8 +88,12 @@ if(!options[:input_files].nil?)
89
88
  loaders[ext_symbol].instance.load(infile, :outfile => outfile)
90
89
  end
91
90
  elsif(ARGV.length == 1)
92
- ext_symbol = File.extname(ARGV[0]).to_sym
93
- loaders[ext_symbol].load(ARGV[0])
91
+ extension = File.extname(ARGV[0])
92
+ if(extension.nil? || extension.empty?)
93
+ raise "Extension expected for file"
94
+ end
95
+ ext_symbol = extension.to_sym
96
+ loaders[ext_symbol].instance.load(ARGV[0])
94
97
  else
95
98
  $stderr.puts "No file given and no input/output lists given."
96
99
  raise optparse.banner
@@ -12,20 +12,21 @@ end
12
12
 
13
13
  Bundler.require
14
14
 
15
+ $:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')))
16
+ puts $:.inspect
17
+
15
18
  require 'optparse'
16
19
 
17
20
  options = {}
18
21
 
19
- $:.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib/grammars/generated_parsers"))
20
-
21
22
  class Loader
22
- def self.load(filename, options={})
23
+ def load(filename, options={})
23
24
  raise "self.load on #{self.class} is abstract"
24
25
  end
25
26
  end
26
27
 
27
28
  class UnsupportedLoader < Loader
28
- def self.load(filename, options={})
29
+ def load(filename, options={})
29
30
  raise "File extension #{File.extname(filename)} is not supported"
30
31
  end
31
32
  end
@@ -35,28 +36,23 @@ class JavascriptLoader < Loader
35
36
  include Singleton
36
37
 
37
38
  def initialize
38
- require 'javascript'
39
- @parser = JavascriptParser.new
39
+ require 'javascript/comment_stripper'
40
+ @parser = CommentRipper::Javascript.new
40
41
  end
41
42
 
42
43
  def load(filename, options={})
43
- require 'javascript'
44
44
  File.open(filename) do |file|
45
- #p = JavascriptParser.new
46
45
  input = file.read
47
- #output = p.parse(input)
48
- output = @@parser.parse(input)
49
- if(output.nil?)
50
- $stderr.puts output.failure_reason
51
- elsif(!options.nil? && options.has_key?(:outfile))
46
+ output = @parser.strip(input)
47
+ if(!options.nil? && options.has_key?(:outfile))
52
48
  outfile = options[:outfile]
53
49
  if(outfile != filename)
54
- File.open(outfile, 'w') {|f| f.write(output.value) }
50
+ File.open(outfile, 'w') {|f| f.write(output) }
55
51
  else
56
52
  $stderr.puts("Infile and outfile are the same (#{outfile}), and won't overwrite without force flag.")
57
53
  end
58
54
  else
59
- puts output.value
55
+ puts output
60
56
  end
61
57
  end
62
58
  end
@@ -79,11 +75,13 @@ end
79
75
  optparse.parse!
80
76
 
81
77
  if(options[:input_files].nil? ^ options[:output_files].nil?)
82
- raise "Can't specify input files or output files and not the other"
78
+ raise "Can't specify one of input files or output files and not the other"
83
79
  end
84
80
 
85
81
  if(!options[:input_files].nil?)
86
- raise "Output list must have the same count as input list (#{options[:input_files].length} inputs, #{options[:output_files].length} outputs)" if options[:input_files].length != options[:output_files].length
82
+ if options[:input_files].length != options[:output_files].length
83
+ raise "Output list must have the same count as input list (#{options[:input_files].length} inputs, #{options[:output_files].length} outputs)"
84
+ end
87
85
  until options[:input_files].empty?
88
86
  infile = options[:input_files].shift
89
87
  outfile = options[:output_files].shift
@@ -91,8 +89,12 @@ if(!options[:input_files].nil?)
91
89
  loaders[ext_symbol].instance.load(infile, :outfile => outfile)
92
90
  end
93
91
  elsif(ARGV.length == 1)
94
- ext_symbol = File.extname(ARGV[0]).to_sym
95
- loaders[ext_symbol].load(ARGV[0])
92
+ extension = File.extname(ARGV[0])
93
+ if(extension.nil? || extension.empty?)
94
+ raise "Extension expected for file"
95
+ end
96
+ ext_symbol = extension.to_sym
97
+ loaders[ext_symbol].instance.load(ARGV[0])
96
98
  else
97
99
  $stderr.puts "No file given and no input/output lists given."
98
100
  raise optparse.banner
@@ -0,0 +1,11 @@
1
+ module CommentRipper
2
+ class UnmatchedTokenException < RuntimeError
3
+ def initialize(token)
4
+ @token = token
5
+ end
6
+
7
+ def to_s
8
+ "Unmatched token: #{@token.symbol} at L#{@token.position.line} C#{@token.position.character}"
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,25 @@
1
+ require 'javascript/lexer'
2
+ require 'exceptions'
3
+
4
+ module CommentRipper
5
+ class Javascript
6
+ def initialize(opts={})
7
+ # nothing here yet
8
+ end
9
+
10
+ def strip(tree_or_string)
11
+ if(tree_or_string.is_a? String)
12
+ return strip(JavascriptLexer.new(tree_or_string).lex)
13
+ end
14
+ tree = tree_or_string
15
+ tree.map do |t|
16
+ case t
17
+ when SingleLineComment, MultiLineComment
18
+ ""
19
+ else
20
+ t.to_s
21
+ end
22
+ end.join
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,182 @@
1
+ require 'tokens/token'
2
+ require 'tokens/words'
3
+ require 'tokens/whitespace'
4
+ require 'tokens/comments'
5
+ require 'tokens/quotes'
6
+ require 'tokens/special'
7
+
8
+ module CommentRipper
9
+ class JavascriptLexer
10
+ def initialize(string)
11
+ @string = string
12
+ end
13
+
14
+ def lex
15
+ ret = @string
16
+ [:lex_1, :lex_2, :lex_3].each do |l|
17
+ ret = send(l, ret)
18
+ end
19
+ ret
20
+ end
21
+
22
+ private
23
+ def lex_1(string)
24
+ i = 0
25
+ tokens = []
26
+ line = 1
27
+ char = 1
28
+ until string[i] == nil
29
+ match = nil
30
+ basic_lexer_order = [SingleLineCommentStart, PreservedCommentStart, MultiLineCommentStart,
31
+ MultiLineCommentEnd, Newline, Tab, Space, SingleQuote, DoubleQuote, Slash,
32
+ EscapeCharacter]
33
+
34
+ basic_lexer_order.each do |token|
35
+ token_length = token.symbol.length
36
+ str = string[i..(i+token_length-1)]
37
+ if(token.match(str))
38
+ match = token.new(line, char)
39
+ i += token_length-1
40
+ char += token_length-1
41
+ break
42
+ end
43
+ end
44
+ if(tokens.last.is_a?(EscapeCharacter) and (match.is_a? Comment or match.is_a? Newline))
45
+ tokens.pop
46
+ i -= (match.symbol.length-1)
47
+ tokens << Character.new(line, char, string[i].chr)
48
+ else
49
+ if(match.is_a? Newline)
50
+ line += 1
51
+ char = 0
52
+ end
53
+ if !match.nil?
54
+ tokens << match
55
+ else
56
+ tokens << Character.new(line, char, string[i].chr)
57
+ end
58
+ end
59
+ i += 1
60
+ char += 1
61
+ end
62
+ tokens
63
+ end
64
+
65
+ def lex_2(stream)
66
+ tree = []
67
+ until stream.empty?
68
+ token = stream.shift
69
+ if(token.is_a? Character)
70
+ word = Word.new
71
+ word << token
72
+ while true
73
+ if(stream.first.is_a? Character)
74
+ word << stream.shift
75
+ else
76
+ break
77
+ end
78
+ end
79
+ tree << word
80
+ elsif(token.is_a? PreservedCommentStart)
81
+ comment = PreservedComment.new
82
+ comment << token
83
+ while true
84
+ peek = stream.first
85
+ if(stream.first.is_a? MultiLineCommentEnd)
86
+ comment << stream.shift
87
+ break
88
+ elsif(stream.first.nil?)
89
+ raise UnmatchedTokenException.new(token)
90
+ else
91
+ comment << stream.shift
92
+ end
93
+ end
94
+ tree << comment
95
+ elsif(token.is_a? MultiLineCommentStart)
96
+ comment = MultiLineComment.new
97
+ comment << token
98
+ while true
99
+ peek = stream.first
100
+ if(stream.first.is_a? MultiLineCommentEnd)
101
+ comment << stream.shift
102
+ break
103
+ elsif(stream.first.nil?)
104
+ raise "Unmatched multiline comment"
105
+ else
106
+ comment << stream.shift
107
+ end
108
+ end
109
+ tree << comment
110
+ elsif(token.is_a? SingleLineCommentStart)
111
+ comment = SingleLineComment.new
112
+ comment << token
113
+ while true
114
+ if(stream.first.is_a?(Newline) || stream.first.nil?)
115
+ break
116
+ else
117
+ comment << stream.shift
118
+ end
119
+ end
120
+ tree << comment
121
+ elsif(token.is_a? QuoteCharacter)
122
+ string = QuotedString.new
123
+ klass = token.class
124
+ string << token
125
+ while true
126
+ next_token = stream.shift
127
+ string << next_token
128
+ if(next_token.nil?)
129
+ raise UnmatchedTokenException.new(token)
130
+ elsif(next_token.is_a? klass)
131
+ break
132
+ end
133
+ end
134
+ tree << string
135
+ elsif(token.is_a? Slash)
136
+ regex = RegularExpression.new
137
+ regex << token
138
+ while true
139
+ next_token = stream.shift
140
+ regex << next_token
141
+ if(next_token.nil?)
142
+ raise UnmatchedTokenException.new(token)
143
+ elsif(next_token.is_a? Slash)
144
+ break
145
+ end
146
+ end
147
+ tree << regex
148
+ else
149
+ tree << token
150
+ end
151
+ end
152
+ tree
153
+ end
154
+
155
+ # Finally, do some comment-processing
156
+ def lex_3(tree)
157
+ i = 0
158
+ while((current = tree[i]) != nil)
159
+ if(current.is_a?(Comment))
160
+ j = i - 1
161
+ until(tree[j].nil?)
162
+ prev = tree[j]
163
+ if(prev.is_a? Space or prev.is_a? Tab)
164
+ current.unshift tree.slice!(j)
165
+ else
166
+ break
167
+ end
168
+ j -= 1
169
+ i -= 1
170
+ end
171
+ if(tree[i+1].is_a?(Newline))
172
+ if(i < 1 or tree[i-1].is_a? Newline)
173
+ current << tree.slice!(i+1)
174
+ end
175
+ end
176
+ end
177
+ i += 1
178
+ end
179
+ tree
180
+ end
181
+ end
182
+ end