comment-ripper 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/Gemfile +3 -0
- data/Rakefile +1 -19
- data/VERSION +1 -0
- data/bin/DEADJOE +40 -0
- data/bin/rip-comments +20 -17
- data/bin/rip-comments~ +21 -19
- data/lib/exceptions.rb +11 -0
- data/lib/javascript/comment_stripper.rb +25 -0
- data/lib/javascript/lexer.rb +182 -0
- data/lib/tokens/comments.rb +24 -0
- data/lib/tokens/quotes.rb +12 -0
- data/lib/tokens/special.rb +5 -0
- data/lib/tokens/token.rb +80 -0
- data/lib/tokens/whitespace.rb +11 -0
- data/lib/tokens/words.rb +13 -0
- data/spec/comment-ripper_spec.rb +213 -11
- data/spec/sample_data/swfobject_src.expected.js +708 -0
- data/spec/sample_data/swfobject_src.js +777 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +1 -2
- metadata +28 -21
- data/lib/grammars/javascript.tt +0 -35
data/.document
ADDED
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
CHANGED
@@ -10,12 +10,7 @@ begin
|
|
10
10
|
gem.email = "nanodeath@gmail.com"
|
11
11
|
gem.homepage = "http://github.com/nanodeath/comment-ripper"
|
12
12
|
gem.authors = ["Max Aller"]
|
13
|
-
gem.add_dependency "treetop", ">= 1.4"
|
14
13
|
gem.add_development_dependency "rspec", ">= 1.2.9"
|
15
|
-
|
16
|
-
# This doesn't actually build a native extension, but eh, it works.
|
17
|
-
gem.extensions << "Rakefile"
|
18
|
-
gem.files = Dir['lib/grammars/**/*.tt']
|
19
14
|
end
|
20
15
|
Jeweler::GemcutterTasks.new
|
21
16
|
rescue LoadError
|
@@ -36,20 +31,7 @@ end
|
|
36
31
|
|
37
32
|
task :spec => :check_dependencies
|
38
33
|
|
39
|
-
task :default
|
40
|
-
# Build grammars
|
41
|
-
generated_directory = "lib/grammars/generated_parsers"
|
42
|
-
mkdir_p File.join(File.dirname(__FILE__), generated_directory)
|
43
|
-
|
44
|
-
grammars = Dir[File.expand_path(File.dirname(__FILE__) + "/lib/grammars/*.tt")]
|
45
|
-
puts "Building #{grammars.length} grammar parsers..."
|
46
|
-
grammars.each do |g|
|
47
|
-
outfile = File.join(File.dirname(__FILE__), generated_directory, File.basename(g, ".tt") + ".rb")
|
48
|
-
print "Processing grammar #{File.basename(g)}"
|
49
|
-
`tt #{g} -o #{outfile}`
|
50
|
-
puts "...Success!"
|
51
|
-
end
|
52
|
-
end
|
34
|
+
task :default => :spec
|
53
35
|
|
54
36
|
require 'rake/rdoctask'
|
55
37
|
Rake::RDocTask.new do |rdoc|
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/bin/DEADJOE
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
|
2
|
+
*** These modified files were found in JOE when it aborted on Sun Mar 28 22:54:36 2010
|
3
|
+
*** JOE was aborted because the terminal closed
|
4
|
+
|
5
|
+
*** File '(Unnamed)'
|
6
|
+
markaby
|
7
|
+
Markaby
|
8
|
+
slate
|
9
|
+
Bad
|
10
|
+
Indifferent
|
11
|
+
Good
|
12
|
+
act[feeling]
|
13
|
+
span-6
|
14
|
+
span-4
|
15
|
+
column
|
16
|
+
|
17
|
+
*** File '(Unnamed)'
|
18
|
+
easy_pool
|
19
|
+
@stubs
|
20
|
+
reset
|
21
|
+
reset
|
22
|
+
response
|
23
|
+
timed
|
24
|
+
primary
|
25
|
+
$debug
|
26
|
+
OneLine
|
27
|
+
file
|
28
|
+
|
29
|
+
*** File '(Unnamed)'
|
30
|
+
rip-comments
|
31
|
+
rip-comments
|
32
|
+
rip-comments
|
33
|
+
rip-comments
|
34
|
+
bin/rip-comments
|
35
|
+
bin/rip-comments
|
36
|
+
bin/rip-comments
|
37
|
+
bin/rip-comments
|
38
|
+
bin/rip-comments
|
39
|
+
rip-comments
|
40
|
+
rip-comments
|
data/bin/rip-comments
CHANGED
@@ -12,20 +12,20 @@ end
|
|
12
12
|
|
13
13
|
Bundler.require
|
14
14
|
|
15
|
+
$:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')))
|
16
|
+
|
15
17
|
require 'optparse'
|
16
18
|
|
17
19
|
options = {}
|
18
20
|
|
19
|
-
$:.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib/grammars/generated_parsers"))
|
20
|
-
|
21
21
|
class Loader
|
22
|
-
def
|
22
|
+
def load(filename, options={})
|
23
23
|
raise "self.load on #{self.class} is abstract"
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
27
|
class UnsupportedLoader < Loader
|
28
|
-
def
|
28
|
+
def load(filename, options={})
|
29
29
|
raise "File extension #{File.extname(filename)} is not supported"
|
30
30
|
end
|
31
31
|
end
|
@@ -35,26 +35,23 @@ class JavascriptLoader < Loader
|
|
35
35
|
include Singleton
|
36
36
|
|
37
37
|
def initialize
|
38
|
-
require 'javascript'
|
39
|
-
@parser =
|
38
|
+
require 'javascript/comment_stripper'
|
39
|
+
@parser = CommentRipper::Javascript.new
|
40
40
|
end
|
41
41
|
|
42
42
|
def load(filename, options={})
|
43
|
-
require 'javascript'
|
44
43
|
File.open(filename) do |file|
|
45
44
|
input = file.read
|
46
|
-
output = @parser.
|
47
|
-
if(
|
48
|
-
$stderr.puts output.failure_reason
|
49
|
-
elsif(!options.nil? && options.has_key?(:outfile))
|
45
|
+
output = @parser.strip(input)
|
46
|
+
if(!options.nil? && options.has_key?(:outfile))
|
50
47
|
outfile = options[:outfile]
|
51
48
|
if(outfile != filename)
|
52
|
-
File.open(outfile, 'w') {|f| f.write(output
|
49
|
+
File.open(outfile, 'w') {|f| f.write(output) }
|
53
50
|
else
|
54
51
|
$stderr.puts("Infile and outfile are the same (#{outfile}), and won't overwrite without force flag.")
|
55
52
|
end
|
56
53
|
else
|
57
|
-
puts output
|
54
|
+
puts output
|
58
55
|
end
|
59
56
|
end
|
60
57
|
end
|
@@ -77,11 +74,13 @@ end
|
|
77
74
|
optparse.parse!
|
78
75
|
|
79
76
|
if(options[:input_files].nil? ^ options[:output_files].nil?)
|
80
|
-
raise "Can't specify input files or output files and not the other"
|
77
|
+
raise "Can't specify one of input files or output files and not the other"
|
81
78
|
end
|
82
79
|
|
83
80
|
if(!options[:input_files].nil?)
|
84
|
-
|
81
|
+
if options[:input_files].length != options[:output_files].length
|
82
|
+
raise "Output list must have the same count as input list (#{options[:input_files].length} inputs, #{options[:output_files].length} outputs)"
|
83
|
+
end
|
85
84
|
until options[:input_files].empty?
|
86
85
|
infile = options[:input_files].shift
|
87
86
|
outfile = options[:output_files].shift
|
@@ -89,8 +88,12 @@ if(!options[:input_files].nil?)
|
|
89
88
|
loaders[ext_symbol].instance.load(infile, :outfile => outfile)
|
90
89
|
end
|
91
90
|
elsif(ARGV.length == 1)
|
92
|
-
|
93
|
-
|
91
|
+
extension = File.extname(ARGV[0])
|
92
|
+
if(extension.nil? || extension.empty?)
|
93
|
+
raise "Extension expected for file"
|
94
|
+
end
|
95
|
+
ext_symbol = extension.to_sym
|
96
|
+
loaders[ext_symbol].instance.load(ARGV[0])
|
94
97
|
else
|
95
98
|
$stderr.puts "No file given and no input/output lists given."
|
96
99
|
raise optparse.banner
|
data/bin/rip-comments~
CHANGED
@@ -12,20 +12,21 @@ end
|
|
12
12
|
|
13
13
|
Bundler.require
|
14
14
|
|
15
|
+
$:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')))
|
16
|
+
puts $:.inspect
|
17
|
+
|
15
18
|
require 'optparse'
|
16
19
|
|
17
20
|
options = {}
|
18
21
|
|
19
|
-
$:.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib/grammars/generated_parsers"))
|
20
|
-
|
21
22
|
class Loader
|
22
|
-
def
|
23
|
+
def load(filename, options={})
|
23
24
|
raise "self.load on #{self.class} is abstract"
|
24
25
|
end
|
25
26
|
end
|
26
27
|
|
27
28
|
class UnsupportedLoader < Loader
|
28
|
-
def
|
29
|
+
def load(filename, options={})
|
29
30
|
raise "File extension #{File.extname(filename)} is not supported"
|
30
31
|
end
|
31
32
|
end
|
@@ -35,28 +36,23 @@ class JavascriptLoader < Loader
|
|
35
36
|
include Singleton
|
36
37
|
|
37
38
|
def initialize
|
38
|
-
require 'javascript'
|
39
|
-
@parser =
|
39
|
+
require 'javascript/comment_stripper'
|
40
|
+
@parser = CommentRipper::Javascript.new
|
40
41
|
end
|
41
42
|
|
42
43
|
def load(filename, options={})
|
43
|
-
require 'javascript'
|
44
44
|
File.open(filename) do |file|
|
45
|
-
#p = JavascriptParser.new
|
46
45
|
input = file.read
|
47
|
-
|
48
|
-
|
49
|
-
if(output.nil?)
|
50
|
-
$stderr.puts output.failure_reason
|
51
|
-
elsif(!options.nil? && options.has_key?(:outfile))
|
46
|
+
output = @parser.strip(input)
|
47
|
+
if(!options.nil? && options.has_key?(:outfile))
|
52
48
|
outfile = options[:outfile]
|
53
49
|
if(outfile != filename)
|
54
|
-
File.open(outfile, 'w') {|f| f.write(output
|
50
|
+
File.open(outfile, 'w') {|f| f.write(output) }
|
55
51
|
else
|
56
52
|
$stderr.puts("Infile and outfile are the same (#{outfile}), and won't overwrite without force flag.")
|
57
53
|
end
|
58
54
|
else
|
59
|
-
puts output
|
55
|
+
puts output
|
60
56
|
end
|
61
57
|
end
|
62
58
|
end
|
@@ -79,11 +75,13 @@ end
|
|
79
75
|
optparse.parse!
|
80
76
|
|
81
77
|
if(options[:input_files].nil? ^ options[:output_files].nil?)
|
82
|
-
raise "Can't specify input files or output files and not the other"
|
78
|
+
raise "Can't specify one of input files or output files and not the other"
|
83
79
|
end
|
84
80
|
|
85
81
|
if(!options[:input_files].nil?)
|
86
|
-
|
82
|
+
if options[:input_files].length != options[:output_files].length
|
83
|
+
raise "Output list must have the same count as input list (#{options[:input_files].length} inputs, #{options[:output_files].length} outputs)"
|
84
|
+
end
|
87
85
|
until options[:input_files].empty?
|
88
86
|
infile = options[:input_files].shift
|
89
87
|
outfile = options[:output_files].shift
|
@@ -91,8 +89,12 @@ if(!options[:input_files].nil?)
|
|
91
89
|
loaders[ext_symbol].instance.load(infile, :outfile => outfile)
|
92
90
|
end
|
93
91
|
elsif(ARGV.length == 1)
|
94
|
-
|
95
|
-
|
92
|
+
extension = File.extname(ARGV[0])
|
93
|
+
if(extension.nil? || extension.empty?)
|
94
|
+
raise "Extension expected for file"
|
95
|
+
end
|
96
|
+
ext_symbol = extension.to_sym
|
97
|
+
loaders[ext_symbol].instance.load(ARGV[0])
|
96
98
|
else
|
97
99
|
$stderr.puts "No file given and no input/output lists given."
|
98
100
|
raise optparse.banner
|
data/lib/exceptions.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'javascript/lexer'
|
2
|
+
require 'exceptions'
|
3
|
+
|
4
|
+
module CommentRipper
|
5
|
+
class Javascript
|
6
|
+
def initialize(opts={})
|
7
|
+
# nothing here yet
|
8
|
+
end
|
9
|
+
|
10
|
+
def strip(tree_or_string)
|
11
|
+
if(tree_or_string.is_a? String)
|
12
|
+
return strip(JavascriptLexer.new(tree_or_string).lex)
|
13
|
+
end
|
14
|
+
tree = tree_or_string
|
15
|
+
tree.map do |t|
|
16
|
+
case t
|
17
|
+
when SingleLineComment, MultiLineComment
|
18
|
+
""
|
19
|
+
else
|
20
|
+
t.to_s
|
21
|
+
end
|
22
|
+
end.join
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
require 'tokens/token'
|
2
|
+
require 'tokens/words'
|
3
|
+
require 'tokens/whitespace'
|
4
|
+
require 'tokens/comments'
|
5
|
+
require 'tokens/quotes'
|
6
|
+
require 'tokens/special'
|
7
|
+
|
8
|
+
module CommentRipper
|
9
|
+
class JavascriptLexer
|
10
|
+
def initialize(string)
|
11
|
+
@string = string
|
12
|
+
end
|
13
|
+
|
14
|
+
def lex
|
15
|
+
ret = @string
|
16
|
+
[:lex_1, :lex_2, :lex_3].each do |l|
|
17
|
+
ret = send(l, ret)
|
18
|
+
end
|
19
|
+
ret
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
def lex_1(string)
|
24
|
+
i = 0
|
25
|
+
tokens = []
|
26
|
+
line = 1
|
27
|
+
char = 1
|
28
|
+
until string[i] == nil
|
29
|
+
match = nil
|
30
|
+
basic_lexer_order = [SingleLineCommentStart, PreservedCommentStart, MultiLineCommentStart,
|
31
|
+
MultiLineCommentEnd, Newline, Tab, Space, SingleQuote, DoubleQuote, Slash,
|
32
|
+
EscapeCharacter]
|
33
|
+
|
34
|
+
basic_lexer_order.each do |token|
|
35
|
+
token_length = token.symbol.length
|
36
|
+
str = string[i..(i+token_length-1)]
|
37
|
+
if(token.match(str))
|
38
|
+
match = token.new(line, char)
|
39
|
+
i += token_length-1
|
40
|
+
char += token_length-1
|
41
|
+
break
|
42
|
+
end
|
43
|
+
end
|
44
|
+
if(tokens.last.is_a?(EscapeCharacter) and (match.is_a? Comment or match.is_a? Newline))
|
45
|
+
tokens.pop
|
46
|
+
i -= (match.symbol.length-1)
|
47
|
+
tokens << Character.new(line, char, string[i].chr)
|
48
|
+
else
|
49
|
+
if(match.is_a? Newline)
|
50
|
+
line += 1
|
51
|
+
char = 0
|
52
|
+
end
|
53
|
+
if !match.nil?
|
54
|
+
tokens << match
|
55
|
+
else
|
56
|
+
tokens << Character.new(line, char, string[i].chr)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
i += 1
|
60
|
+
char += 1
|
61
|
+
end
|
62
|
+
tokens
|
63
|
+
end
|
64
|
+
|
65
|
+
def lex_2(stream)
|
66
|
+
tree = []
|
67
|
+
until stream.empty?
|
68
|
+
token = stream.shift
|
69
|
+
if(token.is_a? Character)
|
70
|
+
word = Word.new
|
71
|
+
word << token
|
72
|
+
while true
|
73
|
+
if(stream.first.is_a? Character)
|
74
|
+
word << stream.shift
|
75
|
+
else
|
76
|
+
break
|
77
|
+
end
|
78
|
+
end
|
79
|
+
tree << word
|
80
|
+
elsif(token.is_a? PreservedCommentStart)
|
81
|
+
comment = PreservedComment.new
|
82
|
+
comment << token
|
83
|
+
while true
|
84
|
+
peek = stream.first
|
85
|
+
if(stream.first.is_a? MultiLineCommentEnd)
|
86
|
+
comment << stream.shift
|
87
|
+
break
|
88
|
+
elsif(stream.first.nil?)
|
89
|
+
raise UnmatchedTokenException.new(token)
|
90
|
+
else
|
91
|
+
comment << stream.shift
|
92
|
+
end
|
93
|
+
end
|
94
|
+
tree << comment
|
95
|
+
elsif(token.is_a? MultiLineCommentStart)
|
96
|
+
comment = MultiLineComment.new
|
97
|
+
comment << token
|
98
|
+
while true
|
99
|
+
peek = stream.first
|
100
|
+
if(stream.first.is_a? MultiLineCommentEnd)
|
101
|
+
comment << stream.shift
|
102
|
+
break
|
103
|
+
elsif(stream.first.nil?)
|
104
|
+
raise "Unmatched multiline comment"
|
105
|
+
else
|
106
|
+
comment << stream.shift
|
107
|
+
end
|
108
|
+
end
|
109
|
+
tree << comment
|
110
|
+
elsif(token.is_a? SingleLineCommentStart)
|
111
|
+
comment = SingleLineComment.new
|
112
|
+
comment << token
|
113
|
+
while true
|
114
|
+
if(stream.first.is_a?(Newline) || stream.first.nil?)
|
115
|
+
break
|
116
|
+
else
|
117
|
+
comment << stream.shift
|
118
|
+
end
|
119
|
+
end
|
120
|
+
tree << comment
|
121
|
+
elsif(token.is_a? QuoteCharacter)
|
122
|
+
string = QuotedString.new
|
123
|
+
klass = token.class
|
124
|
+
string << token
|
125
|
+
while true
|
126
|
+
next_token = stream.shift
|
127
|
+
string << next_token
|
128
|
+
if(next_token.nil?)
|
129
|
+
raise UnmatchedTokenException.new(token)
|
130
|
+
elsif(next_token.is_a? klass)
|
131
|
+
break
|
132
|
+
end
|
133
|
+
end
|
134
|
+
tree << string
|
135
|
+
elsif(token.is_a? Slash)
|
136
|
+
regex = RegularExpression.new
|
137
|
+
regex << token
|
138
|
+
while true
|
139
|
+
next_token = stream.shift
|
140
|
+
regex << next_token
|
141
|
+
if(next_token.nil?)
|
142
|
+
raise UnmatchedTokenException.new(token)
|
143
|
+
elsif(next_token.is_a? Slash)
|
144
|
+
break
|
145
|
+
end
|
146
|
+
end
|
147
|
+
tree << regex
|
148
|
+
else
|
149
|
+
tree << token
|
150
|
+
end
|
151
|
+
end
|
152
|
+
tree
|
153
|
+
end
|
154
|
+
|
155
|
+
# Finally, do some comment-processing
|
156
|
+
def lex_3(tree)
|
157
|
+
i = 0
|
158
|
+
while((current = tree[i]) != nil)
|
159
|
+
if(current.is_a?(Comment))
|
160
|
+
j = i - 1
|
161
|
+
until(tree[j].nil?)
|
162
|
+
prev = tree[j]
|
163
|
+
if(prev.is_a? Space or prev.is_a? Tab)
|
164
|
+
current.unshift tree.slice!(j)
|
165
|
+
else
|
166
|
+
break
|
167
|
+
end
|
168
|
+
j -= 1
|
169
|
+
i -= 1
|
170
|
+
end
|
171
|
+
if(tree[i+1].is_a?(Newline))
|
172
|
+
if(i < 1 or tree[i-1].is_a? Newline)
|
173
|
+
current << tree.slice!(i+1)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
i += 1
|
178
|
+
end
|
179
|
+
tree
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|