RubyGems - comment-ripper - Versions diffs - 0.0.2 → 0.1.0 - Mend

comment-ripper 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

data/.document +5 -0
data/.gitignore +21 -0
data/Gemfile +3 -0
data/Rakefile +1 -19
data/VERSION +1 -0
data/bin/DEADJOE +40 -0
data/bin/rip-comments +20 -17
data/bin/rip-comments~ +21 -19
data/lib/exceptions.rb +11 -0
data/lib/javascript/comment_stripper.rb +25 -0
data/lib/javascript/lexer.rb +182 -0
data/lib/tokens/comments.rb +24 -0
data/lib/tokens/quotes.rb +12 -0
data/lib/tokens/special.rb +5 -0
data/lib/tokens/token.rb +80 -0
data/lib/tokens/whitespace.rb +11 -0
data/lib/tokens/words.rb +13 -0
data/spec/comment-ripper_spec.rb +213 -11
data/spec/sample_data/swfobject_src.expected.js +708 -0
data/spec/sample_data/swfobject_src.js +777 -0
data/spec/spec.opts +1 -0
data/spec/spec_helper.rb +1 -2
metadata +28 -21
data/lib/grammars/javascript.tt +0 -35

data/.document ADDED

@@ -0,0 +1,5 @@
+README.rdoc
+lib/**/*.rb
+bin/*
+features/**/*.feature
+LICENSE

data/.gitignore ADDED

@@ -0,0 +1,21 @@
+## MAC OS
+.DS_Store
+## TEXTMATE
+*.tmproj
+tmtags
+## EMACS
+*~
+\#*
+.\#*
+## VIM
+*.swp
+## PROJECT::GENERAL
+coverage
+rdoc
+pkg
+## PROJECT::SPECIFIC

data/Gemfile ADDED

@@ -0,0 +1,3 @@
+source :gemcutter
+gem "rspec", :group => :test

data/Rakefile CHANGED

@@ -10,12 +10,7 @@ begin
     gem.email = "nanodeath@gmail.com"
     gem.homepage = "http://github.com/nanodeath/comment-ripper"
     gem.authors = ["Max Aller"]
-    gem.add_dependency "treetop", ">= 1.4"
     gem.add_development_dependency "rspec", ">= 1.2.9"
-    # This doesn't actually build a native extension, but eh, it works.
-    gem.extensions << "Rakefile"
-    gem.files = Dir['lib/grammars/**/*.tt']
   end
   Jeweler::GemcutterTasks.new
 rescue LoadError
@@ -36,20 +31,7 @@ end
 task :spec => :check_dependencies
-task :default do
-	# Build grammars
-	generated_directory = "lib/grammars/generated_parsers"
-	mkdir_p File.join(File.dirname(__FILE__), generated_directory)
-	grammars = Dir[File.expand_path(File.dirname(__FILE__) + "/lib/grammars/*.tt")]
-	puts "Building #{grammars.length} grammar parsers..."
-	grammars.each do |g|
-		outfile = File.join(File.dirname(__FILE__), generated_directory, File.basename(g, ".tt") + ".rb")
-		print "Processing grammar #{File.basename(g)}"
-		`tt #{g} -o #{outfile}`
-		puts "...Success!"
-	end
-end
+task :default => :spec
 require 'rake/rdoctask'
 Rake::RDocTask.new do |rdoc|

data/VERSION ADDED

	@@ -0,0 +1 @@
1	+ 0.1.0

data/bin/DEADJOE ADDED

@@ -0,0 +1,40 @@
+*** These modified files were found in JOE when it aborted on Sun Mar 28 22:54:36 2010
+*** JOE was aborted because the terminal closed
+*** File '(Unnamed)'
+markaby
+Markaby
+slate
+Bad
+Indifferent
+Good
+act[feeling]
+span-6
+span-4
+column
+*** File '(Unnamed)'
+easy_pool
+@stubs
+reset
+reset
+response
+timed
+primary
+$debug
+OneLine
+file
+*** File '(Unnamed)'
+rip-comments
+rip-comments
+rip-comments
+rip-comments
+bin/rip-comments
+bin/rip-comments
+bin/rip-comments
+bin/rip-comments
+bin/rip-comments
+rip-comments
+rip-comments

data/bin/rip-comments CHANGED

@@ -12,20 +12,20 @@ end
 Bundler.require
+$:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')))
 require 'optparse'
 options = {}
-$:.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib/grammars/generated_parsers"))
 class Loader
-	def self.load(filename, options={})
+	def load(filename, options={})
 		raise "self.load on #{self.class} is abstract"
 	end
 end
 class UnsupportedLoader < Loader
-	def self.load(filename, options={})
+	def load(filename, options={})
 		raise "File extension #{File.extname(filename)} is not supported"
 	end
 end
@@ -35,26 +35,23 @@ class JavascriptLoader < Loader
 	include Singleton
 	def initialize
-		require 'javascript'
-		@parser = JavascriptParser.new
+		require 'javascript/comment_stripper'
+		@parser = CommentRipper::Javascript.new
 	end
 	def load(filename, options={})
-		require 'javascript'
 		File.open(filename) do |file|
 			input = file.read
-			output = @parser.parse(input)
-			if(output.nil?)
-				$stderr.puts output.failure_reason
-			elsif(!options.nil? && options.has_key?(:outfile))
+			output = @parser.strip(input)
+			if(!options.nil? && options.has_key?(:outfile))
 				outfile = options[:outfile]
 				if(outfile != filename)
-					File.open(outfile, 'w') {|f| f.write(output.value) }
+					File.open(outfile, 'w') {|f| f.write(output) }
 				else
 					$stderr.puts("Infile and outfile are the same (#{outfile}), and won't overwrite without force flag.")
 				end
 			else
-				puts output.value
+				puts output
 			end
 		end
 	end
@@ -77,11 +74,13 @@ end
 optparse.parse!
 if(options[:input_files].nil? ^ options[:output_files].nil?)
-	raise "Can't specify input files or output files and not the other"
+	raise "Can't specify one of input files or output files and not the other"
 end
 if(!options[:input_files].nil?)
-	raise "Output list must have the same count as input list (#{options[:input_files].length} inputs, #{options[:output_files].length} outputs)" if options[:input_files].length != options[:output_files].length
+	if options[:input_files].length != options[:output_files].length
+		raise "Output list must have the same count as input list (#{options[:input_files].length} inputs, #{options[:output_files].length} outputs)"
+	end
 	until options[:input_files].empty?
 		infile = options[:input_files].shift
 		outfile = options[:output_files].shift
@@ -89,8 +88,12 @@ if(!options[:input_files].nil?)
 		loaders[ext_symbol].instance.load(infile, :outfile => outfile)
 	end
 elsif(ARGV.length == 1)
-	ext_symbol = File.extname(ARGV[0]).to_sym
-	loaders[ext_symbol].load(ARGV[0])
+	extension = File.extname(ARGV[0])
+	if(extension.nil? || extension.empty?)
+		raise "Extension expected for file"
+	end
+	ext_symbol = extension.to_sym
+	loaders[ext_symbol].instance.load(ARGV[0])
 else
 	$stderr.puts "No file given and no input/output lists given."
 	raise optparse.banner

data/bin/rip-comments~ CHANGED

@@ -12,20 +12,21 @@ end
 Bundler.require
+$:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')))
+puts $:.inspect
 require 'optparse'
 options = {}
-$:.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib/grammars/generated_parsers"))
 class Loader
-	def self.load(filename, options={})
+	def load(filename, options={})
 		raise "self.load on #{self.class} is abstract"
 	end
 end
 class UnsupportedLoader < Loader
-	def self.load(filename, options={})
+	def load(filename, options={})
 		raise "File extension #{File.extname(filename)} is not supported"
 	end
 end
@@ -35,28 +36,23 @@ class JavascriptLoader < Loader
 	include Singleton
 	def initialize
-		require 'javascript'
-		@parser = JavascriptParser.new
+		require 'javascript/comment_stripper'
+		@parser = CommentRipper::Javascript.new
 	end
 	def load(filename, options={})
-		require 'javascript'
 		File.open(filename) do |file|
-			#p = JavascriptParser.new
 			input = file.read
-			#output = p.parse(input)
-			output = @@parser.parse(input)
-			if(output.nil?)
-				$stderr.puts output.failure_reason
-			elsif(!options.nil? && options.has_key?(:outfile))
+			output = @parser.strip(input)
+			if(!options.nil? && options.has_key?(:outfile))
 				outfile = options[:outfile]
 				if(outfile != filename)
-					File.open(outfile, 'w') {|f| f.write(output.value) }
+					File.open(outfile, 'w') {|f| f.write(output) }
 				else
 					$stderr.puts("Infile and outfile are the same (#{outfile}), and won't overwrite without force flag.")
 				end
 			else
-				puts output.value
+				puts output
 			end
 		end
 	end
@@ -79,11 +75,13 @@ end
 optparse.parse!
 if(options[:input_files].nil? ^ options[:output_files].nil?)
-	raise "Can't specify input files or output files and not the other"
+	raise "Can't specify one of input files or output files and not the other"
 end
 if(!options[:input_files].nil?)
-	raise "Output list must have the same count as input list (#{options[:input_files].length} inputs, #{options[:output_files].length} outputs)" if options[:input_files].length != options[:output_files].length
+	if options[:input_files].length != options[:output_files].length
+		raise "Output list must have the same count as input list (#{options[:input_files].length} inputs, #{options[:output_files].length} outputs)"
+	end
 	until options[:input_files].empty?
 		infile = options[:input_files].shift
 		outfile = options[:output_files].shift
@@ -91,8 +89,12 @@ if(!options[:input_files].nil?)
 		loaders[ext_symbol].instance.load(infile, :outfile => outfile)
 	end
 elsif(ARGV.length == 1)
-	ext_symbol = File.extname(ARGV[0]).to_sym
-	loaders[ext_symbol].load(ARGV[0])
+	extension = File.extname(ARGV[0])
+	if(extension.nil? || extension.empty?)
+		raise "Extension expected for file"
+	end
+	ext_symbol = extension.to_sym
+	loaders[ext_symbol].instance.load(ARGV[0])
 else
 	$stderr.puts "No file given and no input/output lists given."
 	raise optparse.banner

data/lib/exceptions.rb ADDED

@@ -0,0 +1,11 @@
+module CommentRipper
+	class UnmatchedTokenException < RuntimeError
+		def initialize(token)
+			@token = token
+		end
+		def to_s
+			"Unmatched token: #{@token.symbol} at L#{@token.position.line} C#{@token.position.character}"
+		end
+	end
+end

data/lib/javascript/comment_stripper.rb ADDED

@@ -0,0 +1,25 @@
+require 'javascript/lexer'
+require 'exceptions'
+module CommentRipper
+	class Javascript
+		def initialize(opts={})
+			# nothing here yet
+		end
+		def strip(tree_or_string)
+			if(tree_or_string.is_a? String)
+				return strip(JavascriptLexer.new(tree_or_string).lex)
+			end
+			tree = tree_or_string
+			tree.map do |t|
+				case t
+				when SingleLineComment, MultiLineComment
+					""
+				else
+					t.to_s
+				end
+			end.join
+		end
+	end
+end

data/lib/javascript/lexer.rb ADDED

@@ -0,0 +1,182 @@
+require 'tokens/token'
+require 'tokens/words'
+require 'tokens/whitespace'
+require 'tokens/comments'
+require 'tokens/quotes'
+require 'tokens/special'
+module CommentRipper
+	class JavascriptLexer
+		def initialize(string)
+			@string = string
+		end
+		def lex
+			ret = @string
+			[:lex_1, :lex_2, :lex_3].each do |l|
+				ret = send(l, ret)
+			end
+			ret
+		end
+		private
+		def lex_1(string)
+			i = 0
+			tokens = []
+			line = 1
+			char = 1
+			until string[i] == nil
+				match = nil
+				basic_lexer_order = [SingleLineCommentStart, PreservedCommentStart, MultiLineCommentStart,
+					MultiLineCommentEnd, Newline, Tab, Space, SingleQuote, DoubleQuote, Slash,
+					EscapeCharacter]
+				basic_lexer_order.each do |token|
+					token_length = token.symbol.length
+					str = string[i..(i+token_length-1)]
+					if(token.match(str))
+						match = token.new(line, char)
+						i += token_length-1
+						char += token_length-1
+						break
+					end
+				end
+				if(tokens.last.is_a?(EscapeCharacter) and (match.is_a? Comment or match.is_a? Newline))
+					tokens.pop
+					i -= (match.symbol.length-1)
+					tokens << Character.new(line, char, string[i].chr)
+				else
+					if(match.is_a? Newline)
+						line += 1
+						char = 0
+					end
+					if !match.nil?
+						tokens << match
+					else
+						tokens << Character.new(line, char, string[i].chr)
+					end
+				end
+				i += 1
+				char += 1
+			end
+			tokens
+		end
+		def lex_2(stream)
+			tree = []
+			until stream.empty?
+				token = stream.shift
+				if(token.is_a? Character)
+					word = Word.new
+					word << token
+					while true
+						if(stream.first.is_a? Character)
+							word << stream.shift
+						else
+							break
+						end
+					end
+					tree << word
+				elsif(token.is_a? PreservedCommentStart)
+					comment = PreservedComment.new
+					comment << token
+					while true
+						peek = stream.first
+						if(stream.first.is_a? MultiLineCommentEnd)
+							comment << stream.shift
+							break
+						elsif(stream.first.nil?)
+							raise UnmatchedTokenException.new(token)
+						else
+							comment << stream.shift
+						end
+					end
+					tree << comment
+				elsif(token.is_a? MultiLineCommentStart)
+					comment = MultiLineComment.new
+					comment << token
+					while true
+						peek = stream.first
+						if(stream.first.is_a? MultiLineCommentEnd)
+							comment << stream.shift
+							break
+						elsif(stream.first.nil?)
+							raise "Unmatched multiline comment"
+						else
+							comment << stream.shift
+						end
+					end
+					tree << comment
+				elsif(token.is_a? SingleLineCommentStart)
+					comment = SingleLineComment.new
+					comment << token
+					while true
+						if(stream.first.is_a?(Newline) || stream.first.nil?)
+							break
+						else
+							comment << stream.shift
+						end
+					end
+					tree << comment
+				elsif(token.is_a? QuoteCharacter)
+					string = QuotedString.new
+					klass = token.class
+					string << token
+					while true
+						next_token = stream.shift
+						string << next_token
+						if(next_token.nil?)
+							raise UnmatchedTokenException.new(token)
+						elsif(next_token.is_a? klass)
+							break
+						end
+					end
+					tree << string
+				elsif(token.is_a? Slash)
+					regex = RegularExpression.new
+					regex << token
+					while true
+						next_token = stream.shift
+						regex << next_token
+						if(next_token.nil?)
+							raise UnmatchedTokenException.new(token)
+						elsif(next_token.is_a? Slash)
+							break
+						end
+					end
+					tree << regex
+				else
+					tree << token
+				end
+			end
+			tree
+		end
+		# Finally, do some comment-processing
+		def lex_3(tree)
+			i = 0
+			while((current = tree[i]) != nil)
+				if(current.is_a?(Comment))
+					j = i - 1
+					until(tree[j].nil?)
+						prev = tree[j]
+						if(prev.is_a? Space or prev.is_a? Tab)
+							current.unshift tree.slice!(j)
+						else
+							break
+						end
+						j -= 1
+						i -= 1
+					end
+					if(tree[i+1].is_a?(Newline))
+						if(i < 1 or tree[i-1].is_a? Newline)
+							current << tree.slice!(i+1)
+						end
+					end
+				end
+				i += 1
+			end
+			tree
+		end
+	end
+end