comment-ripper 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/Gemfile +3 -0
- data/Rakefile +1 -19
- data/VERSION +1 -0
- data/bin/DEADJOE +40 -0
- data/bin/rip-comments +20 -17
- data/bin/rip-comments~ +21 -19
- data/lib/exceptions.rb +11 -0
- data/lib/javascript/comment_stripper.rb +25 -0
- data/lib/javascript/lexer.rb +182 -0
- data/lib/tokens/comments.rb +24 -0
- data/lib/tokens/quotes.rb +12 -0
- data/lib/tokens/special.rb +5 -0
- data/lib/tokens/token.rb +80 -0
- data/lib/tokens/whitespace.rb +11 -0
- data/lib/tokens/words.rb +13 -0
- data/spec/comment-ripper_spec.rb +213 -11
- data/spec/sample_data/swfobject_src.expected.js +708 -0
- data/spec/sample_data/swfobject_src.js +777 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +1 -2
- metadata +28 -21
- data/lib/grammars/javascript.tt +0 -35
data/.document
ADDED
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
CHANGED
@@ -10,12 +10,7 @@ begin
|
|
10
10
|
gem.email = "nanodeath@gmail.com"
|
11
11
|
gem.homepage = "http://github.com/nanodeath/comment-ripper"
|
12
12
|
gem.authors = ["Max Aller"]
|
13
|
-
gem.add_dependency "treetop", ">= 1.4"
|
14
13
|
gem.add_development_dependency "rspec", ">= 1.2.9"
|
15
|
-
|
16
|
-
# This doesn't actually build a native extension, but eh, it works.
|
17
|
-
gem.extensions << "Rakefile"
|
18
|
-
gem.files = Dir['lib/grammars/**/*.tt']
|
19
14
|
end
|
20
15
|
Jeweler::GemcutterTasks.new
|
21
16
|
rescue LoadError
|
@@ -36,20 +31,7 @@ end
|
|
36
31
|
|
37
32
|
task :spec => :check_dependencies
|
38
33
|
|
39
|
-
task :default
|
40
|
-
# Build grammars
|
41
|
-
generated_directory = "lib/grammars/generated_parsers"
|
42
|
-
mkdir_p File.join(File.dirname(__FILE__), generated_directory)
|
43
|
-
|
44
|
-
grammars = Dir[File.expand_path(File.dirname(__FILE__) + "/lib/grammars/*.tt")]
|
45
|
-
puts "Building #{grammars.length} grammar parsers..."
|
46
|
-
grammars.each do |g|
|
47
|
-
outfile = File.join(File.dirname(__FILE__), generated_directory, File.basename(g, ".tt") + ".rb")
|
48
|
-
print "Processing grammar #{File.basename(g)}"
|
49
|
-
`tt #{g} -o #{outfile}`
|
50
|
-
puts "...Success!"
|
51
|
-
end
|
52
|
-
end
|
34
|
+
task :default => :spec
|
53
35
|
|
54
36
|
require 'rake/rdoctask'
|
55
37
|
Rake::RDocTask.new do |rdoc|
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/bin/DEADJOE
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
|
2
|
+
*** These modified files were found in JOE when it aborted on Sun Mar 28 22:54:36 2010
|
3
|
+
*** JOE was aborted because the terminal closed
|
4
|
+
|
5
|
+
*** File '(Unnamed)'
|
6
|
+
markaby
|
7
|
+
Markaby
|
8
|
+
slate
|
9
|
+
Bad
|
10
|
+
Indifferent
|
11
|
+
Good
|
12
|
+
act[feeling]
|
13
|
+
span-6
|
14
|
+
span-4
|
15
|
+
column
|
16
|
+
|
17
|
+
*** File '(Unnamed)'
|
18
|
+
easy_pool
|
19
|
+
@stubs
|
20
|
+
reset
|
21
|
+
reset
|
22
|
+
response
|
23
|
+
timed
|
24
|
+
primary
|
25
|
+
$debug
|
26
|
+
OneLine
|
27
|
+
file
|
28
|
+
|
29
|
+
*** File '(Unnamed)'
|
30
|
+
rip-comments
|
31
|
+
rip-comments
|
32
|
+
rip-comments
|
33
|
+
rip-comments
|
34
|
+
bin/rip-comments
|
35
|
+
bin/rip-comments
|
36
|
+
bin/rip-comments
|
37
|
+
bin/rip-comments
|
38
|
+
bin/rip-comments
|
39
|
+
rip-comments
|
40
|
+
rip-comments
|
data/bin/rip-comments
CHANGED
@@ -12,20 +12,20 @@ end
|
|
12
12
|
|
13
13
|
Bundler.require
|
14
14
|
|
15
|
+
$:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')))
|
16
|
+
|
15
17
|
require 'optparse'
|
16
18
|
|
17
19
|
options = {}
|
18
20
|
|
19
|
-
$:.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib/grammars/generated_parsers"))
|
20
|
-
|
21
21
|
class Loader
|
22
|
-
def
|
22
|
+
def load(filename, options={})
|
23
23
|
raise "self.load on #{self.class} is abstract"
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
27
|
class UnsupportedLoader < Loader
|
28
|
-
def
|
28
|
+
def load(filename, options={})
|
29
29
|
raise "File extension #{File.extname(filename)} is not supported"
|
30
30
|
end
|
31
31
|
end
|
@@ -35,26 +35,23 @@ class JavascriptLoader < Loader
|
|
35
35
|
include Singleton
|
36
36
|
|
37
37
|
def initialize
|
38
|
-
require 'javascript'
|
39
|
-
@parser =
|
38
|
+
require 'javascript/comment_stripper'
|
39
|
+
@parser = CommentRipper::Javascript.new
|
40
40
|
end
|
41
41
|
|
42
42
|
def load(filename, options={})
|
43
|
-
require 'javascript'
|
44
43
|
File.open(filename) do |file|
|
45
44
|
input = file.read
|
46
|
-
output = @parser.
|
47
|
-
if(
|
48
|
-
$stderr.puts output.failure_reason
|
49
|
-
elsif(!options.nil? && options.has_key?(:outfile))
|
45
|
+
output = @parser.strip(input)
|
46
|
+
if(!options.nil? && options.has_key?(:outfile))
|
50
47
|
outfile = options[:outfile]
|
51
48
|
if(outfile != filename)
|
52
|
-
File.open(outfile, 'w') {|f| f.write(output
|
49
|
+
File.open(outfile, 'w') {|f| f.write(output) }
|
53
50
|
else
|
54
51
|
$stderr.puts("Infile and outfile are the same (#{outfile}), and won't overwrite without force flag.")
|
55
52
|
end
|
56
53
|
else
|
57
|
-
puts output
|
54
|
+
puts output
|
58
55
|
end
|
59
56
|
end
|
60
57
|
end
|
@@ -77,11 +74,13 @@ end
|
|
77
74
|
optparse.parse!
|
78
75
|
|
79
76
|
if(options[:input_files].nil? ^ options[:output_files].nil?)
|
80
|
-
raise "Can't specify input files or output files and not the other"
|
77
|
+
raise "Can't specify one of input files or output files and not the other"
|
81
78
|
end
|
82
79
|
|
83
80
|
if(!options[:input_files].nil?)
|
84
|
-
|
81
|
+
if options[:input_files].length != options[:output_files].length
|
82
|
+
raise "Output list must have the same count as input list (#{options[:input_files].length} inputs, #{options[:output_files].length} outputs)"
|
83
|
+
end
|
85
84
|
until options[:input_files].empty?
|
86
85
|
infile = options[:input_files].shift
|
87
86
|
outfile = options[:output_files].shift
|
@@ -89,8 +88,12 @@ if(!options[:input_files].nil?)
|
|
89
88
|
loaders[ext_symbol].instance.load(infile, :outfile => outfile)
|
90
89
|
end
|
91
90
|
elsif(ARGV.length == 1)
|
92
|
-
|
93
|
-
|
91
|
+
extension = File.extname(ARGV[0])
|
92
|
+
if(extension.nil? || extension.empty?)
|
93
|
+
raise "Extension expected for file"
|
94
|
+
end
|
95
|
+
ext_symbol = extension.to_sym
|
96
|
+
loaders[ext_symbol].instance.load(ARGV[0])
|
94
97
|
else
|
95
98
|
$stderr.puts "No file given and no input/output lists given."
|
96
99
|
raise optparse.banner
|
data/bin/rip-comments~
CHANGED
@@ -12,20 +12,21 @@ end
|
|
12
12
|
|
13
13
|
Bundler.require
|
14
14
|
|
15
|
+
$:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')))
|
16
|
+
puts $:.inspect
|
17
|
+
|
15
18
|
require 'optparse'
|
16
19
|
|
17
20
|
options = {}
|
18
21
|
|
19
|
-
$:.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib/grammars/generated_parsers"))
|
20
|
-
|
21
22
|
class Loader
|
22
|
-
def
|
23
|
+
def load(filename, options={})
|
23
24
|
raise "self.load on #{self.class} is abstract"
|
24
25
|
end
|
25
26
|
end
|
26
27
|
|
27
28
|
class UnsupportedLoader < Loader
|
28
|
-
def
|
29
|
+
def load(filename, options={})
|
29
30
|
raise "File extension #{File.extname(filename)} is not supported"
|
30
31
|
end
|
31
32
|
end
|
@@ -35,28 +36,23 @@ class JavascriptLoader < Loader
|
|
35
36
|
include Singleton
|
36
37
|
|
37
38
|
def initialize
|
38
|
-
require 'javascript'
|
39
|
-
@parser =
|
39
|
+
require 'javascript/comment_stripper'
|
40
|
+
@parser = CommentRipper::Javascript.new
|
40
41
|
end
|
41
42
|
|
42
43
|
def load(filename, options={})
|
43
|
-
require 'javascript'
|
44
44
|
File.open(filename) do |file|
|
45
|
-
#p = JavascriptParser.new
|
46
45
|
input = file.read
|
47
|
-
|
48
|
-
|
49
|
-
if(output.nil?)
|
50
|
-
$stderr.puts output.failure_reason
|
51
|
-
elsif(!options.nil? && options.has_key?(:outfile))
|
46
|
+
output = @parser.strip(input)
|
47
|
+
if(!options.nil? && options.has_key?(:outfile))
|
52
48
|
outfile = options[:outfile]
|
53
49
|
if(outfile != filename)
|
54
|
-
File.open(outfile, 'w') {|f| f.write(output
|
50
|
+
File.open(outfile, 'w') {|f| f.write(output) }
|
55
51
|
else
|
56
52
|
$stderr.puts("Infile and outfile are the same (#{outfile}), and won't overwrite without force flag.")
|
57
53
|
end
|
58
54
|
else
|
59
|
-
puts output
|
55
|
+
puts output
|
60
56
|
end
|
61
57
|
end
|
62
58
|
end
|
@@ -79,11 +75,13 @@ end
|
|
79
75
|
optparse.parse!
|
80
76
|
|
81
77
|
if(options[:input_files].nil? ^ options[:output_files].nil?)
|
82
|
-
raise "Can't specify input files or output files and not the other"
|
78
|
+
raise "Can't specify one of input files or output files and not the other"
|
83
79
|
end
|
84
80
|
|
85
81
|
if(!options[:input_files].nil?)
|
86
|
-
|
82
|
+
if options[:input_files].length != options[:output_files].length
|
83
|
+
raise "Output list must have the same count as input list (#{options[:input_files].length} inputs, #{options[:output_files].length} outputs)"
|
84
|
+
end
|
87
85
|
until options[:input_files].empty?
|
88
86
|
infile = options[:input_files].shift
|
89
87
|
outfile = options[:output_files].shift
|
@@ -91,8 +89,12 @@ if(!options[:input_files].nil?)
|
|
91
89
|
loaders[ext_symbol].instance.load(infile, :outfile => outfile)
|
92
90
|
end
|
93
91
|
elsif(ARGV.length == 1)
|
94
|
-
|
95
|
-
|
92
|
+
extension = File.extname(ARGV[0])
|
93
|
+
if(extension.nil? || extension.empty?)
|
94
|
+
raise "Extension expected for file"
|
95
|
+
end
|
96
|
+
ext_symbol = extension.to_sym
|
97
|
+
loaders[ext_symbol].instance.load(ARGV[0])
|
96
98
|
else
|
97
99
|
$stderr.puts "No file given and no input/output lists given."
|
98
100
|
raise optparse.banner
|
data/lib/exceptions.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'javascript/lexer'
|
2
|
+
require 'exceptions'
|
3
|
+
|
4
|
+
module CommentRipper
|
5
|
+
class Javascript
|
6
|
+
def initialize(opts={})
|
7
|
+
# nothing here yet
|
8
|
+
end
|
9
|
+
|
10
|
+
def strip(tree_or_string)
|
11
|
+
if(tree_or_string.is_a? String)
|
12
|
+
return strip(JavascriptLexer.new(tree_or_string).lex)
|
13
|
+
end
|
14
|
+
tree = tree_or_string
|
15
|
+
tree.map do |t|
|
16
|
+
case t
|
17
|
+
when SingleLineComment, MultiLineComment
|
18
|
+
""
|
19
|
+
else
|
20
|
+
t.to_s
|
21
|
+
end
|
22
|
+
end.join
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
require 'tokens/token'
|
2
|
+
require 'tokens/words'
|
3
|
+
require 'tokens/whitespace'
|
4
|
+
require 'tokens/comments'
|
5
|
+
require 'tokens/quotes'
|
6
|
+
require 'tokens/special'
|
7
|
+
|
8
|
+
module CommentRipper
|
9
|
+
class JavascriptLexer
|
10
|
+
def initialize(string)
|
11
|
+
@string = string
|
12
|
+
end
|
13
|
+
|
14
|
+
def lex
|
15
|
+
ret = @string
|
16
|
+
[:lex_1, :lex_2, :lex_3].each do |l|
|
17
|
+
ret = send(l, ret)
|
18
|
+
end
|
19
|
+
ret
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
def lex_1(string)
|
24
|
+
i = 0
|
25
|
+
tokens = []
|
26
|
+
line = 1
|
27
|
+
char = 1
|
28
|
+
until string[i] == nil
|
29
|
+
match = nil
|
30
|
+
basic_lexer_order = [SingleLineCommentStart, PreservedCommentStart, MultiLineCommentStart,
|
31
|
+
MultiLineCommentEnd, Newline, Tab, Space, SingleQuote, DoubleQuote, Slash,
|
32
|
+
EscapeCharacter]
|
33
|
+
|
34
|
+
basic_lexer_order.each do |token|
|
35
|
+
token_length = token.symbol.length
|
36
|
+
str = string[i..(i+token_length-1)]
|
37
|
+
if(token.match(str))
|
38
|
+
match = token.new(line, char)
|
39
|
+
i += token_length-1
|
40
|
+
char += token_length-1
|
41
|
+
break
|
42
|
+
end
|
43
|
+
end
|
44
|
+
if(tokens.last.is_a?(EscapeCharacter) and (match.is_a? Comment or match.is_a? Newline))
|
45
|
+
tokens.pop
|
46
|
+
i -= (match.symbol.length-1)
|
47
|
+
tokens << Character.new(line, char, string[i].chr)
|
48
|
+
else
|
49
|
+
if(match.is_a? Newline)
|
50
|
+
line += 1
|
51
|
+
char = 0
|
52
|
+
end
|
53
|
+
if !match.nil?
|
54
|
+
tokens << match
|
55
|
+
else
|
56
|
+
tokens << Character.new(line, char, string[i].chr)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
i += 1
|
60
|
+
char += 1
|
61
|
+
end
|
62
|
+
tokens
|
63
|
+
end
|
64
|
+
|
65
|
+
def lex_2(stream)
|
66
|
+
tree = []
|
67
|
+
until stream.empty?
|
68
|
+
token = stream.shift
|
69
|
+
if(token.is_a? Character)
|
70
|
+
word = Word.new
|
71
|
+
word << token
|
72
|
+
while true
|
73
|
+
if(stream.first.is_a? Character)
|
74
|
+
word << stream.shift
|
75
|
+
else
|
76
|
+
break
|
77
|
+
end
|
78
|
+
end
|
79
|
+
tree << word
|
80
|
+
elsif(token.is_a? PreservedCommentStart)
|
81
|
+
comment = PreservedComment.new
|
82
|
+
comment << token
|
83
|
+
while true
|
84
|
+
peek = stream.first
|
85
|
+
if(stream.first.is_a? MultiLineCommentEnd)
|
86
|
+
comment << stream.shift
|
87
|
+
break
|
88
|
+
elsif(stream.first.nil?)
|
89
|
+
raise UnmatchedTokenException.new(token)
|
90
|
+
else
|
91
|
+
comment << stream.shift
|
92
|
+
end
|
93
|
+
end
|
94
|
+
tree << comment
|
95
|
+
elsif(token.is_a? MultiLineCommentStart)
|
96
|
+
comment = MultiLineComment.new
|
97
|
+
comment << token
|
98
|
+
while true
|
99
|
+
peek = stream.first
|
100
|
+
if(stream.first.is_a? MultiLineCommentEnd)
|
101
|
+
comment << stream.shift
|
102
|
+
break
|
103
|
+
elsif(stream.first.nil?)
|
104
|
+
raise "Unmatched multiline comment"
|
105
|
+
else
|
106
|
+
comment << stream.shift
|
107
|
+
end
|
108
|
+
end
|
109
|
+
tree << comment
|
110
|
+
elsif(token.is_a? SingleLineCommentStart)
|
111
|
+
comment = SingleLineComment.new
|
112
|
+
comment << token
|
113
|
+
while true
|
114
|
+
if(stream.first.is_a?(Newline) || stream.first.nil?)
|
115
|
+
break
|
116
|
+
else
|
117
|
+
comment << stream.shift
|
118
|
+
end
|
119
|
+
end
|
120
|
+
tree << comment
|
121
|
+
elsif(token.is_a? QuoteCharacter)
|
122
|
+
string = QuotedString.new
|
123
|
+
klass = token.class
|
124
|
+
string << token
|
125
|
+
while true
|
126
|
+
next_token = stream.shift
|
127
|
+
string << next_token
|
128
|
+
if(next_token.nil?)
|
129
|
+
raise UnmatchedTokenException.new(token)
|
130
|
+
elsif(next_token.is_a? klass)
|
131
|
+
break
|
132
|
+
end
|
133
|
+
end
|
134
|
+
tree << string
|
135
|
+
elsif(token.is_a? Slash)
|
136
|
+
regex = RegularExpression.new
|
137
|
+
regex << token
|
138
|
+
while true
|
139
|
+
next_token = stream.shift
|
140
|
+
regex << next_token
|
141
|
+
if(next_token.nil?)
|
142
|
+
raise UnmatchedTokenException.new(token)
|
143
|
+
elsif(next_token.is_a? Slash)
|
144
|
+
break
|
145
|
+
end
|
146
|
+
end
|
147
|
+
tree << regex
|
148
|
+
else
|
149
|
+
tree << token
|
150
|
+
end
|
151
|
+
end
|
152
|
+
tree
|
153
|
+
end
|
154
|
+
|
155
|
+
# Finally, do some comment-processing
|
156
|
+
def lex_3(tree)
|
157
|
+
i = 0
|
158
|
+
while((current = tree[i]) != nil)
|
159
|
+
if(current.is_a?(Comment))
|
160
|
+
j = i - 1
|
161
|
+
until(tree[j].nil?)
|
162
|
+
prev = tree[j]
|
163
|
+
if(prev.is_a? Space or prev.is_a? Tab)
|
164
|
+
current.unshift tree.slice!(j)
|
165
|
+
else
|
166
|
+
break
|
167
|
+
end
|
168
|
+
j -= 1
|
169
|
+
i -= 1
|
170
|
+
end
|
171
|
+
if(tree[i+1].is_a?(Newline))
|
172
|
+
if(i < 1 or tree[i-1].is_a? Newline)
|
173
|
+
current << tree.slice!(i+1)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
i += 1
|
178
|
+
end
|
179
|
+
tree
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|