lyrics 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.rdoc +17 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/bin/lyrics +66 -0
- data/lib/lyrics.rb +4 -0
- data/lib/lyrics/cli/application.rb +99 -0
- data/lib/lyrics/cli/optionsparser.rb +228 -0
- data/lib/lyrics/cli/pluginadapter.rb +56 -0
- data/lib/lyrics/cli/plugins.rb +79 -0
- data/lib/lyrics/cli/wikipluginadapter.rb +139 -0
- data/lib/lyrics/i18n/README +1 -0
- data/lib/lyrics/i18n/en.rb +181 -0
- data/lib/lyrics/i18n/es.rb +181 -0
- data/lib/lyrics/i18n/i18n.rb +126 -0
- data/lib/lyrics/i18n/sk.rb +174 -0
- data/lib/lyrics/itrans/COPYRIGHT +31 -0
- data/lib/lyrics/itrans/itrans +0 -0
- data/lib/lyrics/itrans/itrans.txt +8 -0
- data/lib/lyrics/itrans/lyric.txt +23 -0
- data/lib/lyrics/itrans/udvng.ifm +206 -0
- data/lib/lyrics/lyrics.rb +567 -0
- data/lib/lyrics/lyrics_AZLyrics.rb +113 -0
- data/lib/lyrics/lyrics_DarkLyrics.rb +124 -0
- data/lib/lyrics/lyrics_Giitaayan.rb +124 -0
- data/lib/lyrics/lyrics_Jamendo.rb +166 -0
- data/lib/lyrics/lyrics_LeosLyrics.rb +142 -0
- data/lib/lyrics/lyrics_LoudSongs.rb +135 -0
- data/lib/lyrics/lyrics_LyricWiki.rb +328 -0
- data/lib/lyrics/lyrics_LyricsDownload.rb +118 -0
- data/lib/lyrics/lyrics_LyricsMania.rb +141 -0
- data/lib/lyrics/lyrics_Lyriki.rb +286 -0
- data/lib/lyrics/lyrics_SeekLyrics.rb +108 -0
- data/lib/lyrics/lyrics_Sing365.rb +103 -0
- data/lib/lyrics/lyrics_TerraLetras.rb +126 -0
- data/lib/lyrics/mediawikilyrics.rb +1417 -0
- data/lib/lyrics/utils/formdata.rb +56 -0
- data/lib/lyrics/utils/htmlentities.rb +291 -0
- data/lib/lyrics/utils/http.rb +198 -0
- data/lib/lyrics/utils/itrans.rb +160 -0
- data/lib/lyrics/utils/logger.rb +123 -0
- data/lib/lyrics/utils/strings.rb +378 -0
- data/lib/lyrics/utils/xmlhash.rb +111 -0
- data/lyrics.gemspec +98 -0
- data/spec/lyrics_spec.rb +7 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- metadata +137 -0
@@ -0,0 +1,160 @@
|
|
1
|
+
# Copyright (C) 2006-2008 by Sergio Pistone
|
2
|
+
# sergio_pistone@yahoo.com.ar
|
3
|
+
#
|
4
|
+
# This program is free software; you can redistribute it and/or modify
|
5
|
+
# it under the terms of the GNU General Public License as published by
|
6
|
+
# the Free Software Foundation; either version 2 of the License, or
|
7
|
+
# (at your option) any later version.
|
8
|
+
#
|
9
|
+
# This program is distributed in the hope that it will be useful,
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
12
|
+
# GNU General Public License for more details.
|
13
|
+
#
|
14
|
+
# You should have received a copy of the GNU General Public License
|
15
|
+
# along with this program; if not, write to the
|
16
|
+
# Free Software Foundation, Inc.,
|
17
|
+
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
18
|
+
|
19
|
+
require File.expand_path( File.dirname( __FILE__ ) + "/strings" )
|
20
|
+
|
21
|
+
module ITRANS
|
22
|
+
|
23
|
+
@@itrans_dir = File.dirname( File.expand_path(__FILE__) ) + "/../itrans"
|
24
|
+
@@null_dev = "/dev/null"
|
25
|
+
|
26
|
+
def ITRANS.normalize( text )
|
27
|
+
return ITRANS.from_devanagari!( ITRANS.to_devanagari( text ) )
|
28
|
+
end
|
29
|
+
|
30
|
+
def ITRANS.to_devanagari!( text )
|
31
|
+
text.replace( to_devanagari( text ) )
|
32
|
+
end
|
33
|
+
|
34
|
+
def ITRANS.to_devanagari( text )
|
35
|
+
orig_pwd = Dir.pwd()
|
36
|
+
Dir.chdir( @@itrans_dir )
|
37
|
+
trans = `echo #{Strings.shell_quote( "#indianifm=udvng.ifm\n #indian\n#{text}\n#endindian" )} | #{@@itrans_dir}/itrans -U 2>#{@@null_dev}`
|
38
|
+
Dir.chdir( orig_pwd )
|
39
|
+
trans.gsub!( /%[^\n]*/, "" ) # TODO search line
|
40
|
+
trans.strip!()
|
41
|
+
return trans
|
42
|
+
end
|
43
|
+
|
44
|
+
def ITRANS.from_devanagari!( text )
|
45
|
+
@@devanagari2itrans.each() do |devana, itrans|
|
46
|
+
text.gsub!( devana, itrans )
|
47
|
+
end
|
48
|
+
@@devanagari2itrans_consonants.each() do |devana, itrans|
|
49
|
+
# is the only symbol in the 'word' --> add an 'a' at the end:
|
50
|
+
text.gsub!( /(^|[ ""\.:;\(\[])#{devana}([,;:?!\)\]\s]|$)/, "\\1#{itrans}a\\2" )
|
51
|
+
# is not followed by a vocal --> add an 'a' at the end:
|
52
|
+
text.gsub!( /#{devana}([^aeiouAEIOU,;:?!\)\]\s])/, "#{itrans}a\\1" )
|
53
|
+
text.gsub!( devana, itrans )
|
54
|
+
end
|
55
|
+
return text
|
56
|
+
end
|
57
|
+
|
58
|
+
def ITRANS.from_devanagari( text )
|
59
|
+
return ITRANS.from_devanagari!( String.new( text ) )
|
60
|
+
end
|
61
|
+
|
62
|
+
def ITRANS.unicode( codepoint )
|
63
|
+
[codepoint].pack( "U*" )
|
64
|
+
end
|
65
|
+
|
66
|
+
@@devanagari2itrans = {
|
67
|
+
ITRANS.unicode( 0x0901 ) => "",
|
68
|
+
|
69
|
+
# vowels:
|
70
|
+
ITRANS.unicode( 0x0905 ) => "a",
|
71
|
+
ITRANS.unicode( 0x0906 ) => "aa", # /A
|
72
|
+
ITRANS.unicode( 0x093E ) => "aa", # /A
|
73
|
+
ITRANS.unicode( 0x0907 ) => "i",
|
74
|
+
ITRANS.unicode( 0x093F ) => "i",
|
75
|
+
ITRANS.unicode( 0x0908 ) => "ii", # /I
|
76
|
+
ITRANS.unicode( 0x0940 ) => "ii", # /I
|
77
|
+
ITRANS.unicode( 0x0909 ) => "u",
|
78
|
+
ITRANS.unicode( 0x0941 ) => "u",
|
79
|
+
ITRANS.unicode( 0x090A ) => "uu", # /U
|
80
|
+
ITRANS.unicode( 0x0942 ) => "uu", # /U
|
81
|
+
ITRANS.unicode( 0x090B ) => "RRi", # R^i
|
82
|
+
ITRANS.unicode( 0x0943 ) => "RRi", # R^i
|
83
|
+
ITRANS.unicode( 0x090C ) => "LLi", # L^i
|
84
|
+
ITRANS.unicode( 0x0944 ) => "LLi", # L^i
|
85
|
+
ITRANS.unicode( 0x090F ) => "e",
|
86
|
+
ITRANS.unicode( 0x0947 ) => "e",
|
87
|
+
ITRANS.unicode( 0x0910 ) => "ai",
|
88
|
+
ITRANS.unicode( 0x0948 ) => "ai",
|
89
|
+
ITRANS.unicode( 0x0913 ) => "o",
|
90
|
+
ITRANS.unicode( 0x094B ) => "o",
|
91
|
+
ITRANS.unicode( 0x0914 ) => "au",
|
92
|
+
ITRANS.unicode( 0x094C ) => "au",
|
93
|
+
# itrans irregular
|
94
|
+
"क्ष"=> "kSh", # x / kS
|
95
|
+
"त्र"=> "tr",
|
96
|
+
"ज्ञ"=> "j~n", # GY / dny
|
97
|
+
"श्र"=> "shr",
|
98
|
+
}
|
99
|
+
|
100
|
+
@@devanagari2itrans_consonants = {
|
101
|
+
# gutturals:
|
102
|
+
ITRANS.unicode( 0x0915 ) => "k",
|
103
|
+
ITRANS.unicode( 0x0916 ) => "kh",
|
104
|
+
# ITRANS.unicode( 0x0916 ) => ".Nkh",
|
105
|
+
ITRANS.unicode( 0x0917 ) => "g",
|
106
|
+
ITRANS.unicode( 0x0918 ) => "gh",
|
107
|
+
ITRANS.unicode( 0x0918 ) => "~N",
|
108
|
+
# palatals:
|
109
|
+
ITRANS.unicode( 0x091A ) => "ch",
|
110
|
+
ITRANS.unicode( 0x091B ) => "Ch",
|
111
|
+
ITRANS.unicode( 0x091C ) => "j",
|
112
|
+
ITRANS.unicode( 0x091D ) => "jh",
|
113
|
+
ITRANS.unicode( 0x091E ) => "~n", # JN
|
114
|
+
# retroflexes:
|
115
|
+
ITRANS.unicode( 0x091F ) => "T",
|
116
|
+
ITRANS.unicode( 0x0920 ) => "Th",
|
117
|
+
ITRANS.unicode( 0x0921 ) => "D",
|
118
|
+
ITRANS.unicode( 0x0922 ) => "Dh",
|
119
|
+
# ITRANS.unicode( 0x0922 ) => ".Dh", # Rh (valid?)
|
120
|
+
ITRANS.unicode( 0x0923 ) => "N",
|
121
|
+
# dentals:
|
122
|
+
ITRANS.unicode( 0x0924 ) => "t",
|
123
|
+
ITRANS.unicode( 0x0925 ) => "th",
|
124
|
+
ITRANS.unicode( 0x0926 ) => "d",
|
125
|
+
ITRANS.unicode( 0x0927 ) => "dh",
|
126
|
+
ITRANS.unicode( 0x0928 ) => "n",
|
127
|
+
# labials:
|
128
|
+
ITRANS.unicode( 0x092A ) => "p",
|
129
|
+
ITRANS.unicode( 0x092B ) => "ph",
|
130
|
+
ITRANS.unicode( 0x092C ) => "b",
|
131
|
+
ITRANS.unicode( 0x092D ) => "bh",
|
132
|
+
ITRANS.unicode( 0x092E ) => "m",
|
133
|
+
# semi-vowels:
|
134
|
+
ITRANS.unicode( 0x092F ) => "y",
|
135
|
+
ITRANS.unicode( 0x0930 ) => "r",
|
136
|
+
ITRANS.unicode( 0x0932 ) => "l",
|
137
|
+
ITRANS.unicode( 0x0935 ) => "v", # w
|
138
|
+
# sibilants:
|
139
|
+
ITRANS.unicode( 0x0936 ) => "sh",
|
140
|
+
ITRANS.unicode( 0x0937 ) => "Sh", # shh
|
141
|
+
ITRANS.unicode( 0x0938 ) => "s",
|
142
|
+
# miscellaneous:
|
143
|
+
ITRANS.unicode( 0x0939 ) => "h",
|
144
|
+
ITRANS.unicode( 0x0902 ) => ".n", # M / .m
|
145
|
+
ITRANS.unicode( 0x0903 ) => "H", # .h
|
146
|
+
ITRANS.unicode( 0x0950 ) => "OM", # AUM
|
147
|
+
# other consonants:
|
148
|
+
"क़" => "q",
|
149
|
+
ITRANS.unicode( 0x0958 ) => "q",
|
150
|
+
"ख़" => "Kh",
|
151
|
+
"ग़" => "G",
|
152
|
+
"ज़" => "z",
|
153
|
+
ITRANS.unicode( 0x095B ) => "z",
|
154
|
+
"फ़" => "f",
|
155
|
+
"ड़" => ".D", # R
|
156
|
+
ITRANS.unicode( 0x095C ) => ".D", # R (valid?)
|
157
|
+
"ढ़" => ".Dh", # Rh
|
158
|
+
}
|
159
|
+
|
160
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# Copyright (C) 2006-2008 by Sergio Pistone
|
2
|
+
# sergio_pistone@yahoo.com.ar
|
3
|
+
#
|
4
|
+
# This program is free software; you can redistribute it and/or modify
|
5
|
+
# it under the terms of the GNU General Public License as published by
|
6
|
+
# the Free Software Foundation; either version 2 of the License, or
|
7
|
+
# (at your option) any later version.
|
8
|
+
#
|
9
|
+
# This program is distributed in the hope that it will be useful,
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
12
|
+
# GNU General Public License for more details.
|
13
|
+
#
|
14
|
+
# You should have received a copy of the GNU General Public License
|
15
|
+
# along with this program; if not, write to the
|
16
|
+
# Free Software Foundation, Inc.,
|
17
|
+
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
18
|
+
|
19
|
+
class Logger
|
20
|
+
|
21
|
+
def initialize( file_path, truncate_to_lines=-1 )
|
22
|
+
@file_path = file_path
|
23
|
+
@tabulation = nil
|
24
|
+
@tabulation_base = " ".freeze()
|
25
|
+
@tabulation_level = 0
|
26
|
+
@skip_first_line_tabulation = false
|
27
|
+
truncate( truncate_to_lines ) if truncate_to_lines >= 0
|
28
|
+
end
|
29
|
+
|
30
|
+
def finalize() # TODO revise implementation
|
31
|
+
end
|
32
|
+
|
33
|
+
def get_file_path()
|
34
|
+
return @file_path
|
35
|
+
end
|
36
|
+
|
37
|
+
def set_file_path( file_path )
|
38
|
+
if @file_path != file_path
|
39
|
+
File.delete( @file_path ) if File.exist?( @file_path ) && ! File.directory?( @file_path )
|
40
|
+
@file_path = file_path.clone().freeze()
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def truncate( max_lines )
|
45
|
+
begin
|
46
|
+
file = File.new( @file_path, File::RDONLY )
|
47
|
+
rescue Errno::ENOENT
|
48
|
+
file = File.new( @file_path, File::CREAT|File::TRUNC )
|
49
|
+
end
|
50
|
+
lines = file.read().split( "\n" )
|
51
|
+
file.close()
|
52
|
+
offset = lines.size() - max_lines
|
53
|
+
if offset > 0
|
54
|
+
file = File.new( @file_path, File::CREAT|File::TRUNC|File::WRONLY )
|
55
|
+
max_lines.times() do |index|
|
56
|
+
line = lines[offset + index]
|
57
|
+
break if ! line
|
58
|
+
file.write( line )
|
59
|
+
file.write( "\n" )
|
60
|
+
end
|
61
|
+
file.close()
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def reset()
|
66
|
+
output = File.new( @file_path, File::CREAT|File::TRUNC )
|
67
|
+
output.close()
|
68
|
+
end
|
69
|
+
|
70
|
+
def log( msg, new_lines=1 )
|
71
|
+
output = File.new( @file_path, File::CREAT|File::APPEND|File::WRONLY )
|
72
|
+
if @tabulation
|
73
|
+
output.write( @tabulation ) if ! @skip_first_line_tabulation
|
74
|
+
output.write( msg.gsub( "\n", "\n#{@tabulation}" ) )
|
75
|
+
@skip_first_line_tabulation = new_lines <= 0
|
76
|
+
else
|
77
|
+
output.write( msg )
|
78
|
+
end
|
79
|
+
new_lines.times() { output.write( "\n" ) }
|
80
|
+
output.close()
|
81
|
+
end
|
82
|
+
|
83
|
+
def get_tabulation_base()
|
84
|
+
return @tabulation_base
|
85
|
+
end
|
86
|
+
|
87
|
+
def set_tabulation_base( tabulation_base )
|
88
|
+
if @tabulation_base != tabulation_base
|
89
|
+
@tabulation_level = tabulation_base.clone().freeze()
|
90
|
+
if level <= 0
|
91
|
+
@tabulation = nil
|
92
|
+
else
|
93
|
+
@tabulation = ""
|
94
|
+
level.times() { @tabulation << @tabulation_base }
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def get_tabulation_level()
|
100
|
+
return @tabulation_level
|
101
|
+
end
|
102
|
+
|
103
|
+
def set_tabulation_level( level )
|
104
|
+
if @tabulation_level != level
|
105
|
+
@tabulation_level = level
|
106
|
+
if level <= 0
|
107
|
+
@tabulation = nil
|
108
|
+
else
|
109
|
+
@tabulation = ""
|
110
|
+
level.times() { @tabulation << @tabulation_base }
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def increase_tabulation_level()
|
116
|
+
set_tabulation_level( @tabulation_level + 1 )
|
117
|
+
end
|
118
|
+
|
119
|
+
def decrease_tabulation_level()
|
120
|
+
set_tabulation_level( @tabulation_level - 1 )
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
@@ -0,0 +1,378 @@
|
|
1
|
+
# Copyright (C) 2006-2008 by Sergio Pistone
|
2
|
+
# sergio_pistone@yahoo.com.ar
|
3
|
+
#
|
4
|
+
# This program is free software; you can redistribute it and/or modify
|
5
|
+
# it under the terms of the GNU General Public License as published by
|
6
|
+
# the Free Software Foundation; either version 2 of the License, or
|
7
|
+
# (at your option) any later version.
|
8
|
+
#
|
9
|
+
# This program is distributed in the hope that it will be useful,
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
12
|
+
# GNU General Public License for more details.
|
13
|
+
#
|
14
|
+
# You should have received a copy of the GNU General Public License
|
15
|
+
# along with this program; if not, write to the
|
16
|
+
# Free Software Foundation, Inc.,
|
17
|
+
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
18
|
+
|
19
|
+
require File.expand_path( File.dirname( __FILE__ ) + "/htmlentities" )
|
20
|
+
|
21
|
+
require "cgi"
|
22
|
+
|
23
|
+
$KCODE="u" # unicode support
|
24
|
+
|
25
|
+
module Strings
|
26
|
+
|
27
|
+
@@word_separators = " \t\n()[],.;:-¿?¡!\"/\\"
|
28
|
+
|
29
|
+
def Strings.empty?( text )
|
30
|
+
text = text.to_s()
|
31
|
+
return text.empty? ? true : text.strip.empty?
|
32
|
+
end
|
33
|
+
|
34
|
+
def Strings.shell_quote( text )
|
35
|
+
return "\"" + text.gsub( "\\", "\\\\\\" ).gsub( "\"", "\\\"" ).gsub( "`", "\\\\`" ) + "\""
|
36
|
+
end
|
37
|
+
|
38
|
+
def Strings.shell_unquote( text )
|
39
|
+
if text.slice( 0, 1 ) == "\""
|
40
|
+
return text.gsub( "\\`", "`" ).gsub( "\\\"", "\"" ).slice( 1..-2 )
|
41
|
+
else # if text.slice( 0, 1 ) == "'"
|
42
|
+
return text.slice( 1..-2 )
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def Strings.shell_escape( text )
|
47
|
+
return text.gsub( "\\", "\\\\\\" ).gsub( "\"", "\\\"" ).gsub( "`", "\\\\`" ).gsub( %q/'/, %q/\\\'/ ).gsub( " ", "\\ " )
|
48
|
+
end
|
49
|
+
|
50
|
+
def Strings.shell_unescape( text )
|
51
|
+
return text.gsub( "\\ ", " " ).gsub( "\\'", "'" ).gsub( "\\`", "`" ).gsub( "\\\"", "\"" )
|
52
|
+
end
|
53
|
+
|
54
|
+
def Strings.sql_quote( text )
|
55
|
+
return "'" + Strings.sql_escape( text ) + "'"
|
56
|
+
end
|
57
|
+
|
58
|
+
def Strings.sql_unquote( text )
|
59
|
+
return Strings.sql_unescape( text.slice( 1..-2 ) )
|
60
|
+
end
|
61
|
+
|
62
|
+
def Strings.sql_escape( text )
|
63
|
+
return text.gsub( "'", "''" )
|
64
|
+
end
|
65
|
+
|
66
|
+
def Strings.sql_unescape( text )
|
67
|
+
return text.gsub( "''", "'" )
|
68
|
+
end
|
69
|
+
|
70
|
+
def Strings.random_token( length=10 )
|
71
|
+
chars = ( "a".."z" ).to_a() + ( "0".."9" ).to_a()
|
72
|
+
token = ""
|
73
|
+
1.upto( length ) { |i| token << chars[rand(chars.size-1)] }
|
74
|
+
return token
|
75
|
+
end
|
76
|
+
|
77
|
+
def Strings.remove_invalid_filename_chars( filename )
|
78
|
+
return Strings.remove_invalid_filename_chars!( String.new( filename ) )
|
79
|
+
end
|
80
|
+
|
81
|
+
def Strings.remove_invalid_filename_chars!( filename )
|
82
|
+
filename.tr_s!( "*?:|/\\<>", "" )
|
83
|
+
return filename
|
84
|
+
end
|
85
|
+
|
86
|
+
def Strings.remove_vocal_accents( text )
|
87
|
+
return Strings.remove_vocal_accents!( String.new( text ) )
|
88
|
+
end
|
89
|
+
|
90
|
+
def Strings.remove_vocal_accents!( text )
|
91
|
+
text.gsub!( /á|à|ä|â|å|ã/, "a" )
|
92
|
+
text.gsub!( /Á|À|Ä|Â|Å|Ã/, "A" )
|
93
|
+
text.gsub!( /é|è|ë|ê/, "e" )
|
94
|
+
text.gsub!( /É|È|Ë|Ê/, "E" )
|
95
|
+
text.gsub!( /í|ì|ï|î/, "i" )
|
96
|
+
text.gsub!( /Í|Ì|Ï|Î/, "I" )
|
97
|
+
text.gsub!( /ó|ò|ö|ô/, "o" )
|
98
|
+
text.gsub!( /Ó|Ò|Ö|Ô/, "O" )
|
99
|
+
text.gsub!( /ú|ù|ü|û/, "u" )
|
100
|
+
text.gsub!( /Ú|Ù|Ü|Û/, "U" )
|
101
|
+
return text
|
102
|
+
end
|
103
|
+
|
104
|
+
def Strings.google_search_quote( text )
|
105
|
+
text = text.gsub( "\"", "" )
|
106
|
+
text.gsub!( /^\ *the\ */i, "" )
|
107
|
+
return Strings.empty?( text) ? "" : "\"#{text}\""
|
108
|
+
end
|
109
|
+
|
110
|
+
def Strings.build_google_feeling_lucky_url( query, site=nil )
|
111
|
+
url = "http://www.google.com/search?q=#{CGI.escape( query )}"
|
112
|
+
url += "+site%3A#{site}" if site
|
113
|
+
return url + "&btnI"
|
114
|
+
end
|
115
|
+
|
116
|
+
def Strings.downcase( text )
|
117
|
+
begin
|
118
|
+
return text.to_s().unpack( "U*" ).collect() do |c|
|
119
|
+
if c >= 65 && c <= 90 # abcdefghijklmnopqrstuvwxyz
|
120
|
+
c + 32
|
121
|
+
elsif c >= 192 && c <= 222 # ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞ
|
122
|
+
c + 32
|
123
|
+
else
|
124
|
+
c
|
125
|
+
end
|
126
|
+
end.pack( "U*" )
|
127
|
+
rescue Exception # fallback to normal operation on error
|
128
|
+
return text.downcase()
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def Strings.downcase!( text )
|
133
|
+
return text.replace( Strings.downcase( text ) )
|
134
|
+
end
|
135
|
+
|
136
|
+
def Strings.upcase( text )
|
137
|
+
begin
|
138
|
+
return text.to_s().unpack( "U*" ).collect() do |c|
|
139
|
+
if c >= 97 && c <= 122 # ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
140
|
+
c - 32
|
141
|
+
elsif c >= 224 && c <= 254 # àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþ
|
142
|
+
c - 32
|
143
|
+
else
|
144
|
+
c
|
145
|
+
end
|
146
|
+
end.pack( "U*" )
|
147
|
+
rescue Exception # fallback to normal operation on error
|
148
|
+
return text.upcase()
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def Strings.upcase!( text )
|
153
|
+
return text.replace( Strings.upcase( text ) )
|
154
|
+
end
|
155
|
+
|
156
|
+
def Strings.capitalize( text, downcase=false, first_only=false )
|
157
|
+
text = downcase ? Strings.downcase( text ) : text.to_s()
|
158
|
+
if first_only
|
159
|
+
text.sub!( /^([0-9a-zA-Zàáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞ])/ ) {|c| Strings.upcase( c ) }
|
160
|
+
else
|
161
|
+
text.sub!( /([0-9a-zA-Zàáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞ])/ ) {|c| Strings.upcase( c ) }
|
162
|
+
end
|
163
|
+
return text
|
164
|
+
end
|
165
|
+
|
166
|
+
def Strings.capitalize!( text, downcase=false, first_only=false )
|
167
|
+
return text.replace( Strings.capitalize( text, downcase, first_only ) )
|
168
|
+
end
|
169
|
+
|
170
|
+
def Strings.titlecase( text, correct_case=true, downcase=false )
|
171
|
+
text = Strings.capitalize( text, downcase )
|
172
|
+
word_start = true
|
173
|
+
text = text.unpack( "U*" ).collect() do |c|
|
174
|
+
if word_start
|
175
|
+
chr = [c].pack( "U*" )
|
176
|
+
if ! @@word_separators.include?( chr )
|
177
|
+
word_start = false
|
178
|
+
c = Strings.upcase( chr ).unpack( "U*" )[0]
|
179
|
+
end
|
180
|
+
else
|
181
|
+
chr = c < 256 ? c.chr() : [c].pack( "U*" )
|
182
|
+
word_start = true if @@word_separators.include?( chr )
|
183
|
+
end
|
184
|
+
c
|
185
|
+
end.pack( "U*" )
|
186
|
+
if correct_case
|
187
|
+
lc_words = [
|
188
|
+
"the", "a", "an", # articles
|
189
|
+
"and", "but", "or", "nor", # conjunctions
|
190
|
+
"'n'", "'n", "n'", # and contractions
|
191
|
+
"as", "at", "by", "for", "in", "of", "on", "to", # short prepositions
|
192
|
+
#"from", "into", "onto", "with", "over" # not so short prepositions
|
193
|
+
"feat", "vs", # special words
|
194
|
+
]
|
195
|
+
lc_words.each() do |lc_word|
|
196
|
+
text.gsub!( /\ #{lc_word}([ ,;:\.-?!\"\/\\\)])/i, " #{lc_word}\\1" )
|
197
|
+
end
|
198
|
+
end
|
199
|
+
return text
|
200
|
+
end
|
201
|
+
|
202
|
+
def Strings.titlecase!( text, correct_case=true, downcase=false )
|
203
|
+
return text.replace( Strings.titlecase( text, correct_case, downcase ) )
|
204
|
+
end
|
205
|
+
|
206
|
+
def Strings.normalize( token )
|
207
|
+
token = Strings.downcase( token )
|
208
|
+
token.tr_s!( " \n\r\t.;:()[]", " " )
|
209
|
+
token.strip!()
|
210
|
+
token.gsub!( /`|´|’/, "'" )
|
211
|
+
token.gsub!( /''|«|»/, "\"" )
|
212
|
+
token.gsub!( /[&+]/, "and" )
|
213
|
+
token.gsub!( /\ ('n'|'n|n') /, " and " )
|
214
|
+
token.gsub!( /^the /, "" )
|
215
|
+
token.gsub!( /, the$/, "" )
|
216
|
+
return token
|
217
|
+
end
|
218
|
+
|
219
|
+
def Strings.normalize!( token )
|
220
|
+
return token.replace( Strings.normalize( token ) )
|
221
|
+
end
|
222
|
+
|
223
|
+
def Strings.decode_htmlentities!( var )
|
224
|
+
if var.is_a?( String )
|
225
|
+
HTMLEntities.decode!( var )
|
226
|
+
elsif var.is_a?( Hash )
|
227
|
+
var.each() { |key, value| decode_htmlentities!( value ) }
|
228
|
+
end
|
229
|
+
return var
|
230
|
+
end
|
231
|
+
|
232
|
+
def Strings.decode_htmlentities( var )
|
233
|
+
if var.is_a?( String )
|
234
|
+
return HTMLEntities.decode( var )
|
235
|
+
elsif var.is_a?( Hash )
|
236
|
+
ret = {}
|
237
|
+
var.each() do |key, value|
|
238
|
+
ret[key] = decode_htmlentities( value )
|
239
|
+
end
|
240
|
+
return ret
|
241
|
+
else
|
242
|
+
return var
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
def Strings.cleanup_lyrics( lyrics )
|
247
|
+
|
248
|
+
lyrics = HTMLEntities.decode( lyrics )
|
249
|
+
|
250
|
+
prev_line = ""
|
251
|
+
lines = []
|
252
|
+
|
253
|
+
lyrics.split( /\r\n|\n|\r/ ).each do |line|
|
254
|
+
|
255
|
+
# remove unnecesary spaces
|
256
|
+
line.tr_s!( "\t ", " " )
|
257
|
+
line.strip!()
|
258
|
+
|
259
|
+
# quotes and double quotes
|
260
|
+
line.gsub!( /`|´|’|‘|’|/, "'" )
|
261
|
+
line.gsub!( /''|"|«|»|„|”||/, "\"" )
|
262
|
+
|
263
|
+
# suspensive points
|
264
|
+
line.gsub!( /…+/, "..." )
|
265
|
+
line.gsub!( /[,;]?\.{2,}/, "..." )
|
266
|
+
|
267
|
+
# add space after "?", "!", ",", ";", ":", ".", ")" and "]" if not present
|
268
|
+
line.gsub!( /([^\.]?[\?!,;:\.\)\]])([^ "'<])/, "\\1 \\2" )
|
269
|
+
|
270
|
+
# remove spaces after "¿", "¡", "(" and ")"
|
271
|
+
line.gsub!( /([¿¡\(\[]) /, "\\1" )
|
272
|
+
|
273
|
+
# remove spaces before "?", "!", ",", ";", ":", ".", ")" and "]"
|
274
|
+
line.gsub!( /\ ([\?!,;:\.\)\]])/, "\\1" )
|
275
|
+
|
276
|
+
# remove space after ... at the beginning of sentence
|
277
|
+
line.gsub!( /^\.\.\. /, "..." )
|
278
|
+
|
279
|
+
# remove single points at end of sentence
|
280
|
+
line.gsub!( /([^\.])\.$/, "\\1" )
|
281
|
+
|
282
|
+
# remove commas and semicolons at end of sentence
|
283
|
+
line.gsub!( /[,;]$/, "" )
|
284
|
+
|
285
|
+
# fix english I pronoun capitalization
|
286
|
+
line.gsub!( /([ "'\(\[])i([\ '",;:\.\?!\]\)]|$)/, "\\1I\\2" )
|
287
|
+
|
288
|
+
# remove spaces after " or ' at the begin of sentence of before them when at the end
|
289
|
+
line.sub!( /^(["']) /, "\\1" )
|
290
|
+
line.sub!( /\ (["'])$/, "\\1" )
|
291
|
+
|
292
|
+
# capitalize first alfabet character of the line
|
293
|
+
Strings.capitalize!( line )
|
294
|
+
|
295
|
+
# no more than one empty line at the time
|
296
|
+
if ! line.empty? || ! prev_line.empty?
|
297
|
+
lines << line
|
298
|
+
prev_line = line
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
if lines.length > 0 && lines[lines.length-1].empty?
|
303
|
+
lines.delete_at( lines.length-1 )
|
304
|
+
end
|
305
|
+
|
306
|
+
return lines.join( "\n" )
|
307
|
+
end
|
308
|
+
|
309
|
+
def Strings.cleanup_lyrics!( lyrics )
|
310
|
+
return lyrics.replace( Strings.cleanup_lyrics( lyrics ) )
|
311
|
+
end
|
312
|
+
|
313
|
+
def Strings.cleanup_artist( artist, title )
|
314
|
+
artist = artist.strip()
|
315
|
+
if artist != ""
|
316
|
+
if (md = /[ \(\[](ft\.|ft |feat\.|feat |featuring ) *([^\)\]]+)[\)\]]? *$/i.match( title.to_s() ))
|
317
|
+
artist << " feat. " << md[2]
|
318
|
+
else
|
319
|
+
artist.gsub!( /[ \(\[](ft\.|ft |feat\.|feat |featuring ) *([^\)\]]+)[\)\]]? *$/i, " feat. \\2" )
|
320
|
+
end
|
321
|
+
end
|
322
|
+
return artist
|
323
|
+
end
|
324
|
+
|
325
|
+
def Strings.cleanup_title( title )
|
326
|
+
title = title.gsub( /[ \(\[](ft\.|ft |feat\.|feat |featuring ) *([^\)\]]+)[\)\]]? *$/i, "" )
|
327
|
+
title.strip!()
|
328
|
+
return title
|
329
|
+
end
|
330
|
+
|
331
|
+
def Strings.utf82latin1( text )
|
332
|
+
begin
|
333
|
+
return text.unpack( "U*" ).pack( "C*" )
|
334
|
+
rescue Exception
|
335
|
+
$stderr << "warning: conversion from UTF-8 to Latin1 failed\n"
|
336
|
+
return text
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
def Strings.latin12utf8( text )
|
341
|
+
begin
|
342
|
+
return text.unpack( "C*" ).pack( "U*" )
|
343
|
+
rescue Exception
|
344
|
+
$stderr << "warning: conversion from Latin1 to UTF-8 failed\n"
|
345
|
+
return text
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
def Strings.scramble( text )
|
350
|
+
text = text.to_s()
|
351
|
+
2.times() do
|
352
|
+
chars = text.unpack( "U*" ).reverse()
|
353
|
+
chars.size.times() { |idx| chars[idx] = (chars[idx] + idx + 1) }
|
354
|
+
text = chars.collect() { |c| c.to_s }.join( ":" )
|
355
|
+
end
|
356
|
+
return text
|
357
|
+
end
|
358
|
+
|
359
|
+
def Strings.scramble!( text )
|
360
|
+
return text.replace( Strings.scramble( text ) )
|
361
|
+
end
|
362
|
+
|
363
|
+
def Strings.descramble( text )
|
364
|
+
text = text.to_s()
|
365
|
+
2.times() do
|
366
|
+
chars = text.split( ":" ).collect() { |c| c.to_i }
|
367
|
+
chars.size.times() { |idx| chars[idx] = (chars[idx] - idx - 1) }
|
368
|
+
text = chars.reverse().pack( "U*" )
|
369
|
+
end
|
370
|
+
return text
|
371
|
+
end
|
372
|
+
|
373
|
+
def Strings.descramble!( text )
|
374
|
+
return text.replace( Strings.descramble( text ) )
|
375
|
+
end
|
376
|
+
|
377
|
+
end
|
378
|
+
|