RubyGems - lyrics - Versions diffs - 0.0.2 - Mend

lyrics 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

data/.document +5 -0
data/.gitignore +21 -0
data/LICENSE +20 -0
data/README.rdoc +17 -0
data/Rakefile +45 -0
data/VERSION +1 -0
data/bin/lyrics +66 -0
data/lib/lyrics.rb +4 -0
data/lib/lyrics/cli/application.rb +99 -0
data/lib/lyrics/cli/optionsparser.rb +228 -0
data/lib/lyrics/cli/pluginadapter.rb +56 -0
data/lib/lyrics/cli/plugins.rb +79 -0
data/lib/lyrics/cli/wikipluginadapter.rb +139 -0
data/lib/lyrics/i18n/README +1 -0
data/lib/lyrics/i18n/en.rb +181 -0
data/lib/lyrics/i18n/es.rb +181 -0
data/lib/lyrics/i18n/i18n.rb +126 -0
data/lib/lyrics/i18n/sk.rb +174 -0
data/lib/lyrics/itrans/COPYRIGHT +31 -0
data/lib/lyrics/itrans/itrans +0 -0
data/lib/lyrics/itrans/itrans.txt +8 -0
data/lib/lyrics/itrans/lyric.txt +23 -0
data/lib/lyrics/itrans/udvng.ifm +206 -0
data/lib/lyrics/lyrics.rb +567 -0
data/lib/lyrics/lyrics_AZLyrics.rb +113 -0
data/lib/lyrics/lyrics_DarkLyrics.rb +124 -0
data/lib/lyrics/lyrics_Giitaayan.rb +124 -0
data/lib/lyrics/lyrics_Jamendo.rb +166 -0
data/lib/lyrics/lyrics_LeosLyrics.rb +142 -0
data/lib/lyrics/lyrics_LoudSongs.rb +135 -0
data/lib/lyrics/lyrics_LyricWiki.rb +328 -0
data/lib/lyrics/lyrics_LyricsDownload.rb +118 -0
data/lib/lyrics/lyrics_LyricsMania.rb +141 -0
data/lib/lyrics/lyrics_Lyriki.rb +286 -0
data/lib/lyrics/lyrics_SeekLyrics.rb +108 -0
data/lib/lyrics/lyrics_Sing365.rb +103 -0
data/lib/lyrics/lyrics_TerraLetras.rb +126 -0
data/lib/lyrics/mediawikilyrics.rb +1417 -0
data/lib/lyrics/utils/formdata.rb +56 -0
data/lib/lyrics/utils/htmlentities.rb +291 -0
data/lib/lyrics/utils/http.rb +198 -0
data/lib/lyrics/utils/itrans.rb +160 -0
data/lib/lyrics/utils/logger.rb +123 -0
data/lib/lyrics/utils/strings.rb +378 -0
data/lib/lyrics/utils/xmlhash.rb +111 -0
data/lyrics.gemspec +98 -0
data/spec/lyrics_spec.rb +7 -0
data/spec/spec.opts +1 -0
data/spec/spec_helper.rb +9 -0
metadata +137 -0

data/lib/lyrics/lyrics_LyricsDownload.rb ADDED

@@ -0,0 +1,118 @@
+# Copyright (C) 2007 by Sergio Pistone
+# sergio_pistone@yahoo.com.ar
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+$LOAD_PATH << File.expand_path(File.dirname(__FILE__))
+require "utils/strings"
+require "utils/htmlentities"
+require "lyrics"
+class LyricsDownload < Lyrics
+	@@white_chars = "'\"¿?¡!()[].,;:-/& "
+	def LyricsDownload.site_host()
+		return "www.lyricsdownload.com"
+	end
+	def LyricsDownload.site_name()
+		return "Lyrics Download"
+	end
+	def LyricsDownload.lyrics_test_data()
+		return [
+			Request.new( "Nirvana", "Smells Like Teen Spirit", "Nevermind" ),
+			Request.new( "Radiohead", "Optimistic", "Kid A" ),
+			Request.new( "Massive Attack", "Protection", "Protection" ),
+			Request.new( "Portishead", "Wandering Star", "Dummy" ),
+		]
+	end
+	def cleanup_artist( artist )
+		artist = artist.gsub( /^the /i, "" )
+		Strings.remove_vocal_accents!( artist )
+		artist.gsub!( "&", "and" )
+		artist.tr!( @@white_chars, " " )
+		artist.strip!()
+		artist.tr!( " ", "-" )
+		return artist
+	end
+	def cleanup_title( title )
+		title = Strings.remove_vocal_accents( title )
+		title.gsub!( "&", "and" )
+		title.tr!( @@white_chars, " " )
+		title.strip!()
+		title.tr!( " ", "-" )
+		return title
+	end
+	def build_lyrics_fetch_data( request )
+		artist = cleanup_title( request.artist )
+		title = cleanup_title( request.title )
+		return FetchPageData.new( "http://#{site_host()}/#{artist}-#{title}-lyrics.html" )
+	end
+	def parse_lyrics( response, page_body )
+		page_body = Strings.latin12utf8( page_body )
+		page_body.tr_s!( " \n\r\t", " " )
+		page_body.tr_s!( "", "'" )
+# 		page_body.tr_s!( "‘", "'" )
+		if (md = /<title>([^<]+) - ([^<]+) LYRICS ?<\/title>/i.match( page_body ))
+			response.artist, response.title = md[1].strip(), md[2].strip()
+		end
+		return if ! page_body.gsub!( /^.*<div class="KonaBody" ><div id="div_customCSS">/, "" )
+		return if ! page_body.gsub!( /<\/div> ?<\/div>.*$/, "" )
+		page_body.gsub!( /\ ?<br ?\/?> ?/i, "\n" )
+		page_body.strip!()
+		response.lyrics = page_body
+	end
+	def build_suggestions_fetch_data( request )
+		artist = cleanup_artist( request.artist )
+		return FetchPageData.new( "http://#{site_host()}/#{artist}-lyrics.html" )
+	end
+	def parse_suggestions( request, page_body, page_url )
+		page_body = Strings.latin12utf8( page_body )
+		page_body.tr_s!( " \n\r\t", " " )
+		page_body.tr_s!( "", "'" )
+		suggestions = []
+		return suggestions if ! page_body.sub!( /^.*Lyrics list aplhabetically:<\/font><\/td>/, "" )
+		return suggestions if ! page_body.sub!( /<\/ul> ?<\/td> ?<\/tr> ?<\/table> ?<center><div>.*$/, "" )
+		page_body.split( "</li>" ).each() do |entry|
+			if (md = /<a class="txt_1" href="([^"]+)"><font size=2>([^<]+) Lyrics<\/font><\/a>/.match( entry ))
+				suggestions << Suggestion.new( request.artist, md[2], "http://#{site_host()}/#{md[1]}" )
+			end
+		end
+		return suggestions
+	end
+end

data/lib/lyrics/lyrics_LyricsMania.rb ADDED

@@ -0,0 +1,141 @@
+# Copyright (C) 2007-2008 by
+# Davide Lo Re <boyska@gmail.com>
+# Sergio Pistone <sergio_pistone@yahoo.com.ar>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+$LOAD_PATH << File.expand_path(File.dirname(__FILE__))
+require "utils/strings"
+require "lyrics"
+require "cgi"
+class LyricsMania < Lyrics
+	def LyricsMania.site_host()
+		return "www.lyricsmania.com"
+	end
+	def LyricsMania.site_name()
+		return "LyricsMania"
+	end
+	def LyricsMania.lyrics_test_data()
+		return [
+			Request.new( "Nirvana", "Lounge Act", "Nevermind" ),
+			Request.new( "Radiohead", "Idioteque", "Kid A" ),
+			Request.new( "Pearl Jam", "Porch", "Ten" ),
+			Request.new( "The Smashing Pumpkins", "Mayonaise", "Siamese Dream" ),
+		]
+	end
+	def LyricsMania.build_song_add_url( request )
+		return "http://#{site_host()}/add.html"
+	end
+	def LyricsMania.build_google_feeling_lucky_url( artist, title=nil )
+		query =  Strings.google_search_quote( artist )
+		query << " " << Strings.google_search_quote( title + " lyrics" ) if title
+		return Strings.build_google_feeling_lucky_url( query, site_host() )
+	end
+	def build_lyrics_fetch_data( request )
+		return FetchPageData.new( build_google_feeling_lucky_url( request.artist, request.title ) )
+	end
+	def lyrics_page_valid?( request, page_body, page_url )
+		md = /<title>([^<]+) Lyrics<\/title>/i.match( page_body )
+		return false if ! md
+		page_title = Strings.normalize( md[1] )
+		return	page_title.index( Strings.normalize( request.artist ) ) &&
+				page_title.index( Strings.normalize( request.title ) )
+	end
+	def parse_lyrics( response, page_body )
+		page_body = Strings.latin12utf8( page_body )
+		page_body.tr_s!( " \n\r\t", " " )
+		return if ! page_body.sub!( /^.* lyrics<\/h3>/, "" ) # metadata
+		metadata = {}
+		["artist", "album"].each() do |key|
+			if (md =/#{key}: <b><a href=[^>]+>([^<]+)<\/a><\/b>/i.match( page_body ))
+				metadata[key.downcase()] = md[1].strip().sub( /\ *lyrics$/, "" )
+			end
+		end
+		["year", "title"].each() do |key|
+			if (md =/#{key}: ([^<]+)<(br|\/td)>/i.match( page_body ))
+				metadata[key.downcase()] = md[1].strip()
+			end
+		end
+		response.artist = metadata["artist"] if metadata.include?( "artist" )
+		response.title = metadata["title"] if metadata.include?( "title" )
+		response.album = metadata["album"] if metadata.include?( "album" )
+		response.year = metadata["year"] if metadata.include?( "year" )
+		md = /<\/span> ?<\/center>(.*)<center> ?<span style/.match( page_body )
+		return if ! md
+		page_body = md[1]
+		page_body.sub!( /&#91;.+ Lyrics on http:\/\/#{site_host()}\/ &#93;/, "" )
+		page_body.sub!( /^.*<\/a>/, "" ) # additional (optional) crap at the beginning
+		page_body.gsub!( /<u>&lt;a[^<]+&lt;\/a&gt;<\/u>/, "" ) # yet more crap
+		page_body.gsub!( /\ ?<br ?\/?> ?/i, "\n" )
+		page_body.sub!( /^\ ?<strong>Lyrics to [^<]+<\/strong> :<\/?br> */i, "" )
+		page_body.strip!()
+		response.lyrics = page_body
+	end
+	def build_suggestions_fetch_data( request )
+		return FetchPageData.new( build_google_feeling_lucky_url( request.artist ) )
+	end
+	def suggestions_page_valid?( request, page_body, page_url )
+		md = /<title>([^<]+) Lyrics<\/title>/i.match( page_body )
+		return md ? Strings.normalize( md[1] ).index( Strings.normalize( request.artist ) ) : nil
+	end
+	# returns an array of maps with following keys: url, artist, title
+	def parse_suggestions( request, page_body, page_url )
+		page_body = Strings.latin12utf8( page_body )
+		page_body.tr_s!( " \n\r\t", " " )
+		suggestions = []
+		# remove table with other artists at the bottom
+		return suggestions if ! page_body.sub!( /(.*)<table.*/, "\\1" )
+		md = /<table width=100%>(.*)<\/table>/.match( page_body )
+		return suggestions if ! md
+		md[1].split( /<a href=/ ).each() do |entry|
+			if (md = /"(\/lyrics\/[^"]+)" title="[^"]+"> ?([^>]+) lyrics<\/a><br>/.match( entry ))
+				suggestions << Suggestion.new( request.artist, md[2], "http://#{site_host()}#{md[1]}" )
+			end
+		end
+		return suggestions
+	end
+end

data/lib/lyrics/lyrics_Lyriki.rb ADDED

@@ -0,0 +1,286 @@
+# Copyright (C) 2006-2008 by Sergio Pistone
+# sergio_pistone@yahoo.com.ar
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+$LOAD_PATH << File.expand_path(File.dirname(__FILE__))
+require "utils/strings"
+require "utils/http"
+require "mediawikilyrics"
+require "uri"
+require "cgi"
+class Lyriki < MediaWikiLyrics
+	def Lyriki.site_host()
+		return "www.lyriki.com"
+	end
+	def Lyriki.site_name()
+		return "Lyriki"
+	end
+	def Lyriki.control_page()
+		return "Lyriki:Wiki-Lyrics"
+	end
+	def parse_lyrics( response, page_body )
+		custom_data = {}
+		if (md = /\{\{\s*[Ss]ong\s*\|.*$/m.match( page_body ))
+			template_data = parse_template( md[0] )
+			template_data["params"].each() do |key, value|
+				custom_data[key.to_s()] = value
+				if value.is_a?( String )
+					value.gsub!( /<br ?\/?>/i, "; " ) if key != "song"
+					value.gsub!( /<\/?[^>]+\/?>/, " " )
+					value.tr_s!( " \n\r\t", " " )
+					value.strip!()
+				end
+			end
+			response.title = custom_data["song"] if custom_data.include?( "song" )
+			if custom_data["artist"].is_a?( String )
+				response.artist = custom_data["artist"]
+			elsif custom_data["artists"].is_a?( Array )
+				artist = ""
+				custom_data["artists"].each() do |token|
+					if token.is_a?( String )
+						artist << token.gsub( /\[\[|\]\]/, "" )
+					elsif token.is_a?( Hash ) && token["name"].downcase() == "song artist" && token["params"][1]
+						artist << token["params"][1]
+					end
+				end
+				artist.gsub!( /\s*<br *\/?>\s*/i, " " )
+				response.artist = artist
+			elsif custom_data["artists"].is_a?( String ) # this case is DEPRECATED by Lyriki guides
+				response.artist = custom_data["artists"].gsub( /\[\[|\]\]/, "" )
+			end
+			if custom_data["album"].is_a?( String )
+				response.album, response.year = custom_data["album"], custom_data["year"]
+			elsif custom_data["albums"].is_a?( Array )
+				custom_data["albums"].each() do |token|
+					if token.is_a?( String )
+						if (md = /([^:]+):(.+) \(([0-9]{4,4})\)/.match( token ))
+							response.album, response.year = md[2], md[3]
+							break
+						end
+					elsif token.is_a?( Hash )
+						if token["name"].downcase() == "song album" && token["params"][2] && token["params"][3]
+							response.album, response.year = token["params"][2], token["params"][3]
+							break
+						end
+					end
+				end
+			end
+		end
+		custom_data["reviewed"] = (/\{\{[Aa]utoGenerated\}\}/.match( page_body ) == nil)
+		if (md = /<lyrics>(.*)<\/lyrics>/im.match( page_body ))
+	 		page_body = md[1]
+			page_body.gsub!( /[ \t]*[\r\n][ \t]*/m, "\n" )
+		else
+			page_body.gsub!( /\{\{.*\}\}\n?/, "" )
+			page_body.gsub!( /\[\[Category:.*\]\]\n?/, "" )
+			page_body.gsub!( /\ *== *(External *Links|Links) *==.*$/im, "" )
+			page_body = page_body.split( "\n" ).collect() do |line|
+				if line.index( /\s/ ) == 0
+					"\n" + line
+				else
+					line
+				end
+			end.join( "" )
+			page_body.gsub!( /\s*<br ?\/?>\s*/i, "\n" )
+		end
+		response.lyrics = page_body if ! Strings.empty?( page_body )
+		response.custom_data = custom_data
+	end
+	def Lyriki.build_tracks( album_data )
+		ret = ""
+		album_data.tracks.each() do |track|
+			track_length = track.length > 0 ?
+				"|#{track.length / 60}:#{track.length % 60 < 10 ? "0#{track.length % 60}" : track.length % 60}" :
+				""
+			track_artist = cleanup_title_token( track.artist )
+			track_title  = cleanup_title_token( track.title )
+			if album_data.various_artists?
+				ret += "# {{song link va|#{track_artist}|#{track_title}#{track_length}}}\n"
+			else
+				ret += "# {{song link|#{track_artist}|#{track_title}#{track_length}}}\n"
+			end
+		end
+		return ret
+	end
+	def Lyriki.build_album_page( reviewed, artist, album, year, month, day, tracks, album_art )
+		raise ArgumentError if Strings.empty?( artist ) || Strings.empty?( album ) || Strings.empty?( tracks )
+		s_name = get_sort_name( album )
+		s_letter = get_sort_letter( album )
+		album_art = nil if ! year || album_art == build_album_art_name( artist, album, year, "jpg", false )
+		contents = \
+		"#{reviewed ? "" : "{{autoGenerated}}\n"}" \
+		"{{Album\n" \
+		"| album    = #{album}\n" \
+		"| artist   = #{artist}\n" \
+		"| released = #{build_date( year, month, day )}\n" \
+		"#{album_art ? "| image    = #{album_art}\n" : ""}" \
+		"| tracks   =\n"
+		return \
+		"#{contents}" \
+		"#{tracks.strip()}\n" \
+		"}}\n" \
+		"\n" \
+		"{{C:Album|#{s_letter}|#{s_name}}}"
+	end
+	def Lyriki.build_song_page( reviewed, artist, title, album, year, credits, lyricist, lyrics )
+		raise ArgumentError if artist == nil || title == nil
+		s_name = get_sort_name( title )
+		s_letter = get_sort_letter( title )
+		year = year.to_i() <= 1900 ? "" : year.to_s()
+		song_page = reviewed ? "": "{{autoGenerated}}\n"
+		if (md = /^([^\s].*)\s+feat\.\s+([^\s].*)$/i.match( artist.strip() ))
+			artist, fartist = md[1].strip(), md[2]
+			song_page <<
+			"{{Song\n" \
+			"| song     = #{title}\n" \
+			"| artists  = {{song artist|#{artist}}}<br />feat. {{song artist|#{fartist}}}\n" \
+			"| albums   = {{song album|#{artist}|#{album}|#{year}}}\n" \
+			"| credits  = #{credits.to_s().split( "; " ).join( "<br />" )}\n" \
+			"| lyricist = #{lyricist.to_s().split( "; " ).join( "<br />" )}\n" \
+			"}}\n" \
+		else
+			song_page <<
+			"{{Song\n" \
+			"| song     = #{title}\n" \
+			"| artist   = #{artist}\n" \
+			"| album    = #{album}\n" \
+			"| year     = #{year}\n" \
+			"| credits  = #{credits.to_s().split( "; " ).join( "<br />" )}\n" \
+			"| lyricist = #{lyricist.to_s().split( "; " ).join( "<br />" )}\n" \
+			"}}\n" \
+		end
+		return song_page <<
+		"\n" \
+		"<lyrics>#{Strings.empty?( lyrics ) ? "<tt>(Instrumental)</tt>" : lyrics}</lyrics>\n" \
+		"\n" \
+		"{{C:Song|#{s_letter}|#{s_name}}}"
+	end
+	def Lyriki.build_album_art_name( artist, album, year, extension="jpg", cleanup=true )
+		if cleanup
+			artist = cleanup_title_token( artist )
+			album = cleanup_title_token( album )
+		end
+		album_art_name = "AlbumArt-#{artist}-#{album}_(#{year})#{Strings.empty?( extension ) ? "" : ".#{extension.strip()}"}".gsub( " ", "_" )
+		return Strings.remove_invalid_filename_chars( album_art_name )
+	end
+	def Lyriki.build_album_art_description( artist, album, year, cleanup=true )
+		if cleanup
+			artist = cleanup_title_token( artist )
+			album = cleanup_title_token( album )
+		end
+		return "#{artist}:#{album} (#{year})"
+	end
+	def Lyriki.find_album_art_name( artist, album, year )
+		normalized_artist = cleanup_title_token( artist )
+		Strings.remove_invalid_filename_chars!( normalized_artist )
+		Strings.normalize!( normalized_artist )
+		normalized_artist.gsub!( " ", "" )
+		normalized_album = cleanup_title_token( album )
+		Strings.remove_invalid_filename_chars!( normalized_album )
+		Strings.normalize!( normalized_album )
+		normalized_album.gsub!( " ", "" )
+		year = year.to_s().strip()
+		artist = cleanup_title_token( artist )
+		Strings.remove_invalid_filename_chars!( artist )
+		search_url = "http://#{site_host()}/index.php?ns6=1&search=#{CGI.escape( artist )}&searchx=Search&limit=500"
+		response, search_url = HTTP.fetch_page_get( search_url )
+		return nil if response == nil || response.body() == nil
+		candidates = []
+		parse_search_results( response.body(), true ).each() do |result|
+			next if result[@@SEARCH_RESULT_TITLE].index( "Image:" ) != 0
+			normalized_title = Strings.normalize( result[@@SEARCH_RESULT_TITLE] )
+			normalized_title.gsub!( " ", "" )
+			matches = 0
+			idx1 = normalized_title.index( "albumart" )
+			matches += 1 if idx1
+			idx1 = idx1 ? idx1 + "albumart".size : 0
+			idx2 = normalized_title.index( normalized_artist, idx1 )
+			matches += 4 if idx2
+			idx2 = idx2 ? idx2 + normalized_artist.size : idx1
+			idx3 = normalized_title.index( normalized_album, idx2 )
+			next if idx3 == nil
+			idx3 = idx3 ? idx3 + normalized_album.size : idx2
+			idx3 = normalized_title.index( year, idx3 )
+			matches += 2 if idx3
+			candidates.insert( -1, [ matches, result[@@SEARCH_RESULT_TITLE] ] )
+		end
+		if candidates.size > 0
+			candidates.sort!() { |x,y| y[0] <=> x[0] }
+			return URI.decode( candidates[0][1].slice( "Image:".size..-1 ).gsub( " ", "_" ) )
+		else
+			return nil
+		end
+	end
+	def Lyriki.cleanup_title_token!( title, downcase=false )
+		title.gsub!( /\[[^\]\[]*\]/, "" )
+		title.gsub!( /[\[|\]].*$/, "" )
+		title.gsub!( /`|´|’/, "'" )
+		title.gsub!( /''|«|»/, "\"" )
+		title.squeeze!( " " )
+		title.strip!()
+		title.gsub!( "+", "and" )
+		Strings.titlecase!( title, true, downcase )
+		return title
+	end
+end