lyrics 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.rdoc +17 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/bin/lyrics +66 -0
- data/lib/lyrics.rb +4 -0
- data/lib/lyrics/cli/application.rb +99 -0
- data/lib/lyrics/cli/optionsparser.rb +228 -0
- data/lib/lyrics/cli/pluginadapter.rb +56 -0
- data/lib/lyrics/cli/plugins.rb +79 -0
- data/lib/lyrics/cli/wikipluginadapter.rb +139 -0
- data/lib/lyrics/i18n/README +1 -0
- data/lib/lyrics/i18n/en.rb +181 -0
- data/lib/lyrics/i18n/es.rb +181 -0
- data/lib/lyrics/i18n/i18n.rb +126 -0
- data/lib/lyrics/i18n/sk.rb +174 -0
- data/lib/lyrics/itrans/COPYRIGHT +31 -0
- data/lib/lyrics/itrans/itrans +0 -0
- data/lib/lyrics/itrans/itrans.txt +8 -0
- data/lib/lyrics/itrans/lyric.txt +23 -0
- data/lib/lyrics/itrans/udvng.ifm +206 -0
- data/lib/lyrics/lyrics.rb +567 -0
- data/lib/lyrics/lyrics_AZLyrics.rb +113 -0
- data/lib/lyrics/lyrics_DarkLyrics.rb +124 -0
- data/lib/lyrics/lyrics_Giitaayan.rb +124 -0
- data/lib/lyrics/lyrics_Jamendo.rb +166 -0
- data/lib/lyrics/lyrics_LeosLyrics.rb +142 -0
- data/lib/lyrics/lyrics_LoudSongs.rb +135 -0
- data/lib/lyrics/lyrics_LyricWiki.rb +328 -0
- data/lib/lyrics/lyrics_LyricsDownload.rb +118 -0
- data/lib/lyrics/lyrics_LyricsMania.rb +141 -0
- data/lib/lyrics/lyrics_Lyriki.rb +286 -0
- data/lib/lyrics/lyrics_SeekLyrics.rb +108 -0
- data/lib/lyrics/lyrics_Sing365.rb +103 -0
- data/lib/lyrics/lyrics_TerraLetras.rb +126 -0
- data/lib/lyrics/mediawikilyrics.rb +1417 -0
- data/lib/lyrics/utils/formdata.rb +56 -0
- data/lib/lyrics/utils/htmlentities.rb +291 -0
- data/lib/lyrics/utils/http.rb +198 -0
- data/lib/lyrics/utils/itrans.rb +160 -0
- data/lib/lyrics/utils/logger.rb +123 -0
- data/lib/lyrics/utils/strings.rb +378 -0
- data/lib/lyrics/utils/xmlhash.rb +111 -0
- data/lyrics.gemspec +98 -0
- data/spec/lyrics_spec.rb +7 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- metadata +137 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# Copyright (C) 2006-2008 by Sergio Pistone
|
|
2
|
+
# sergio_pistone@yahoo.com.ar
|
|
3
|
+
#
|
|
4
|
+
# This program is free software; you can redistribute it and/or modify
|
|
5
|
+
# it under the terms of the GNU General Public License as published by
|
|
6
|
+
# the Free Software Foundation; either version 2 of the License, or
|
|
7
|
+
# (at your option) any later version.
|
|
8
|
+
#
|
|
9
|
+
# This program is distributed in the hope that it will be useful,
|
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
+
# GNU General Public License for more details.
|
|
13
|
+
#
|
|
14
|
+
# You should have received a copy of the GNU General Public License
|
|
15
|
+
# along with this program; if not, write to the
|
|
16
|
+
# Free Software Foundation, Inc.,
|
|
17
|
+
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
18
|
+
|
|
19
|
+
require File.expand_path( File.dirname( __FILE__ ) + "/strings" )
|
|
20
|
+
|
|
21
|
+
module ITRANS
|
|
22
|
+
|
|
23
|
+
@@itrans_dir = File.dirname( File.expand_path(__FILE__) ) + "/../itrans"
|
|
24
|
+
@@null_dev = "/dev/null"
|
|
25
|
+
|
|
26
|
+
def ITRANS.normalize( text )
|
|
27
|
+
return ITRANS.from_devanagari!( ITRANS.to_devanagari( text ) )
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def ITRANS.to_devanagari!( text )
|
|
31
|
+
text.replace( to_devanagari( text ) )
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def ITRANS.to_devanagari( text )
|
|
35
|
+
orig_pwd = Dir.pwd()
|
|
36
|
+
Dir.chdir( @@itrans_dir )
|
|
37
|
+
trans = `echo #{Strings.shell_quote( "#indianifm=udvng.ifm\n #indian\n#{text}\n#endindian" )} | #{@@itrans_dir}/itrans -U 2>#{@@null_dev}`
|
|
38
|
+
Dir.chdir( orig_pwd )
|
|
39
|
+
trans.gsub!( /%[^\n]*/, "" ) # TODO search line
|
|
40
|
+
trans.strip!()
|
|
41
|
+
return trans
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def ITRANS.from_devanagari!( text )
|
|
45
|
+
@@devanagari2itrans.each() do |devana, itrans|
|
|
46
|
+
text.gsub!( devana, itrans )
|
|
47
|
+
end
|
|
48
|
+
@@devanagari2itrans_consonants.each() do |devana, itrans|
|
|
49
|
+
# is the only symbol in the 'word' --> add an 'a' at the end:
|
|
50
|
+
text.gsub!( /(^|[ ""\.:;\(\[])#{devana}([,;:?!\)\]\s]|$)/, "\\1#{itrans}a\\2" )
|
|
51
|
+
# is not followed by a vocal --> add an 'a' at the end:
|
|
52
|
+
text.gsub!( /#{devana}([^aeiouAEIOU,;:?!\)\]\s])/, "#{itrans}a\\1" )
|
|
53
|
+
text.gsub!( devana, itrans )
|
|
54
|
+
end
|
|
55
|
+
return text
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def ITRANS.from_devanagari( text )
|
|
59
|
+
return ITRANS.from_devanagari!( String.new( text ) )
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def ITRANS.unicode( codepoint )
|
|
63
|
+
[codepoint].pack( "U*" )
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
@@devanagari2itrans = {
|
|
67
|
+
ITRANS.unicode( 0x0901 ) => "",
|
|
68
|
+
|
|
69
|
+
# vowels:
|
|
70
|
+
ITRANS.unicode( 0x0905 ) => "a",
|
|
71
|
+
ITRANS.unicode( 0x0906 ) => "aa", # /A
|
|
72
|
+
ITRANS.unicode( 0x093E ) => "aa", # /A
|
|
73
|
+
ITRANS.unicode( 0x0907 ) => "i",
|
|
74
|
+
ITRANS.unicode( 0x093F ) => "i",
|
|
75
|
+
ITRANS.unicode( 0x0908 ) => "ii", # /I
|
|
76
|
+
ITRANS.unicode( 0x0940 ) => "ii", # /I
|
|
77
|
+
ITRANS.unicode( 0x0909 ) => "u",
|
|
78
|
+
ITRANS.unicode( 0x0941 ) => "u",
|
|
79
|
+
ITRANS.unicode( 0x090A ) => "uu", # /U
|
|
80
|
+
ITRANS.unicode( 0x0942 ) => "uu", # /U
|
|
81
|
+
ITRANS.unicode( 0x090B ) => "RRi", # R^i
|
|
82
|
+
ITRANS.unicode( 0x0943 ) => "RRi", # R^i
|
|
83
|
+
ITRANS.unicode( 0x090C ) => "LLi", # L^i
|
|
84
|
+
ITRANS.unicode( 0x0944 ) => "LLi", # L^i
|
|
85
|
+
ITRANS.unicode( 0x090F ) => "e",
|
|
86
|
+
ITRANS.unicode( 0x0947 ) => "e",
|
|
87
|
+
ITRANS.unicode( 0x0910 ) => "ai",
|
|
88
|
+
ITRANS.unicode( 0x0948 ) => "ai",
|
|
89
|
+
ITRANS.unicode( 0x0913 ) => "o",
|
|
90
|
+
ITRANS.unicode( 0x094B ) => "o",
|
|
91
|
+
ITRANS.unicode( 0x0914 ) => "au",
|
|
92
|
+
ITRANS.unicode( 0x094C ) => "au",
|
|
93
|
+
# itrans irregular
|
|
94
|
+
"क्ष"=> "kSh", # x / kS
|
|
95
|
+
"त्र"=> "tr",
|
|
96
|
+
"ज्ञ"=> "j~n", # GY / dny
|
|
97
|
+
"श्र"=> "shr",
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
@@devanagari2itrans_consonants = {
|
|
101
|
+
# gutturals:
|
|
102
|
+
ITRANS.unicode( 0x0915 ) => "k",
|
|
103
|
+
ITRANS.unicode( 0x0916 ) => "kh",
|
|
104
|
+
# ITRANS.unicode( 0x0916 ) => ".Nkh",
|
|
105
|
+
ITRANS.unicode( 0x0917 ) => "g",
|
|
106
|
+
ITRANS.unicode( 0x0918 ) => "gh",
|
|
107
|
+
ITRANS.unicode( 0x0918 ) => "~N",
|
|
108
|
+
# palatals:
|
|
109
|
+
ITRANS.unicode( 0x091A ) => "ch",
|
|
110
|
+
ITRANS.unicode( 0x091B ) => "Ch",
|
|
111
|
+
ITRANS.unicode( 0x091C ) => "j",
|
|
112
|
+
ITRANS.unicode( 0x091D ) => "jh",
|
|
113
|
+
ITRANS.unicode( 0x091E ) => "~n", # JN
|
|
114
|
+
# retroflexes:
|
|
115
|
+
ITRANS.unicode( 0x091F ) => "T",
|
|
116
|
+
ITRANS.unicode( 0x0920 ) => "Th",
|
|
117
|
+
ITRANS.unicode( 0x0921 ) => "D",
|
|
118
|
+
ITRANS.unicode( 0x0922 ) => "Dh",
|
|
119
|
+
# ITRANS.unicode( 0x0922 ) => ".Dh", # Rh (valid?)
|
|
120
|
+
ITRANS.unicode( 0x0923 ) => "N",
|
|
121
|
+
# dentals:
|
|
122
|
+
ITRANS.unicode( 0x0924 ) => "t",
|
|
123
|
+
ITRANS.unicode( 0x0925 ) => "th",
|
|
124
|
+
ITRANS.unicode( 0x0926 ) => "d",
|
|
125
|
+
ITRANS.unicode( 0x0927 ) => "dh",
|
|
126
|
+
ITRANS.unicode( 0x0928 ) => "n",
|
|
127
|
+
# labials:
|
|
128
|
+
ITRANS.unicode( 0x092A ) => "p",
|
|
129
|
+
ITRANS.unicode( 0x092B ) => "ph",
|
|
130
|
+
ITRANS.unicode( 0x092C ) => "b",
|
|
131
|
+
ITRANS.unicode( 0x092D ) => "bh",
|
|
132
|
+
ITRANS.unicode( 0x092E ) => "m",
|
|
133
|
+
# semi-vowels:
|
|
134
|
+
ITRANS.unicode( 0x092F ) => "y",
|
|
135
|
+
ITRANS.unicode( 0x0930 ) => "r",
|
|
136
|
+
ITRANS.unicode( 0x0932 ) => "l",
|
|
137
|
+
ITRANS.unicode( 0x0935 ) => "v", # w
|
|
138
|
+
# sibilants:
|
|
139
|
+
ITRANS.unicode( 0x0936 ) => "sh",
|
|
140
|
+
ITRANS.unicode( 0x0937 ) => "Sh", # shh
|
|
141
|
+
ITRANS.unicode( 0x0938 ) => "s",
|
|
142
|
+
# miscellaneous:
|
|
143
|
+
ITRANS.unicode( 0x0939 ) => "h",
|
|
144
|
+
ITRANS.unicode( 0x0902 ) => ".n", # M / .m
|
|
145
|
+
ITRANS.unicode( 0x0903 ) => "H", # .h
|
|
146
|
+
ITRANS.unicode( 0x0950 ) => "OM", # AUM
|
|
147
|
+
# other consonants:
|
|
148
|
+
"क़" => "q",
|
|
149
|
+
ITRANS.unicode( 0x0958 ) => "q",
|
|
150
|
+
"ख़" => "Kh",
|
|
151
|
+
"ग़" => "G",
|
|
152
|
+
"ज़" => "z",
|
|
153
|
+
ITRANS.unicode( 0x095B ) => "z",
|
|
154
|
+
"फ़" => "f",
|
|
155
|
+
"ड़" => ".D", # R
|
|
156
|
+
ITRANS.unicode( 0x095C ) => ".D", # R (valid?)
|
|
157
|
+
"ढ़" => ".Dh", # Rh
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
end
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# Copyright (C) 2006-2008 by Sergio Pistone
|
|
2
|
+
# sergio_pistone@yahoo.com.ar
|
|
3
|
+
#
|
|
4
|
+
# This program is free software; you can redistribute it and/or modify
|
|
5
|
+
# it under the terms of the GNU General Public License as published by
|
|
6
|
+
# the Free Software Foundation; either version 2 of the License, or
|
|
7
|
+
# (at your option) any later version.
|
|
8
|
+
#
|
|
9
|
+
# This program is distributed in the hope that it will be useful,
|
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
+
# GNU General Public License for more details.
|
|
13
|
+
#
|
|
14
|
+
# You should have received a copy of the GNU General Public License
|
|
15
|
+
# along with this program; if not, write to the
|
|
16
|
+
# Free Software Foundation, Inc.,
|
|
17
|
+
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
18
|
+
|
|
19
|
+
class Logger
|
|
20
|
+
|
|
21
|
+
def initialize( file_path, truncate_to_lines=-1 )
|
|
22
|
+
@file_path = file_path
|
|
23
|
+
@tabulation = nil
|
|
24
|
+
@tabulation_base = " ".freeze()
|
|
25
|
+
@tabulation_level = 0
|
|
26
|
+
@skip_first_line_tabulation = false
|
|
27
|
+
truncate( truncate_to_lines ) if truncate_to_lines >= 0
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def finalize() # TODO revise implementation
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def get_file_path()
|
|
34
|
+
return @file_path
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def set_file_path( file_path )
|
|
38
|
+
if @file_path != file_path
|
|
39
|
+
File.delete( @file_path ) if File.exist?( @file_path ) && ! File.directory?( @file_path )
|
|
40
|
+
@file_path = file_path.clone().freeze()
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def truncate( max_lines )
|
|
45
|
+
begin
|
|
46
|
+
file = File.new( @file_path, File::RDONLY )
|
|
47
|
+
rescue Errno::ENOENT
|
|
48
|
+
file = File.new( @file_path, File::CREAT|File::TRUNC )
|
|
49
|
+
end
|
|
50
|
+
lines = file.read().split( "\n" )
|
|
51
|
+
file.close()
|
|
52
|
+
offset = lines.size() - max_lines
|
|
53
|
+
if offset > 0
|
|
54
|
+
file = File.new( @file_path, File::CREAT|File::TRUNC|File::WRONLY )
|
|
55
|
+
max_lines.times() do |index|
|
|
56
|
+
line = lines[offset + index]
|
|
57
|
+
break if ! line
|
|
58
|
+
file.write( line )
|
|
59
|
+
file.write( "\n" )
|
|
60
|
+
end
|
|
61
|
+
file.close()
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def reset()
|
|
66
|
+
output = File.new( @file_path, File::CREAT|File::TRUNC )
|
|
67
|
+
output.close()
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def log( msg, new_lines=1 )
|
|
71
|
+
output = File.new( @file_path, File::CREAT|File::APPEND|File::WRONLY )
|
|
72
|
+
if @tabulation
|
|
73
|
+
output.write( @tabulation ) if ! @skip_first_line_tabulation
|
|
74
|
+
output.write( msg.gsub( "\n", "\n#{@tabulation}" ) )
|
|
75
|
+
@skip_first_line_tabulation = new_lines <= 0
|
|
76
|
+
else
|
|
77
|
+
output.write( msg )
|
|
78
|
+
end
|
|
79
|
+
new_lines.times() { output.write( "\n" ) }
|
|
80
|
+
output.close()
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def get_tabulation_base()
|
|
84
|
+
return @tabulation_base
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def set_tabulation_base( tabulation_base )
|
|
88
|
+
if @tabulation_base != tabulation_base
|
|
89
|
+
@tabulation_level = tabulation_base.clone().freeze()
|
|
90
|
+
if level <= 0
|
|
91
|
+
@tabulation = nil
|
|
92
|
+
else
|
|
93
|
+
@tabulation = ""
|
|
94
|
+
level.times() { @tabulation << @tabulation_base }
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def get_tabulation_level()
|
|
100
|
+
return @tabulation_level
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def set_tabulation_level( level )
|
|
104
|
+
if @tabulation_level != level
|
|
105
|
+
@tabulation_level = level
|
|
106
|
+
if level <= 0
|
|
107
|
+
@tabulation = nil
|
|
108
|
+
else
|
|
109
|
+
@tabulation = ""
|
|
110
|
+
level.times() { @tabulation << @tabulation_base }
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def increase_tabulation_level()
|
|
116
|
+
set_tabulation_level( @tabulation_level + 1 )
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def decrease_tabulation_level()
|
|
120
|
+
set_tabulation_level( @tabulation_level - 1 )
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
end
|
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
# Copyright (C) 2006-2008 by Sergio Pistone
|
|
2
|
+
# sergio_pistone@yahoo.com.ar
|
|
3
|
+
#
|
|
4
|
+
# This program is free software; you can redistribute it and/or modify
|
|
5
|
+
# it under the terms of the GNU General Public License as published by
|
|
6
|
+
# the Free Software Foundation; either version 2 of the License, or
|
|
7
|
+
# (at your option) any later version.
|
|
8
|
+
#
|
|
9
|
+
# This program is distributed in the hope that it will be useful,
|
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
+
# GNU General Public License for more details.
|
|
13
|
+
#
|
|
14
|
+
# You should have received a copy of the GNU General Public License
|
|
15
|
+
# along with this program; if not, write to the
|
|
16
|
+
# Free Software Foundation, Inc.,
|
|
17
|
+
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
18
|
+
|
|
19
|
+
require File.expand_path( File.dirname( __FILE__ ) + "/htmlentities" )
|
|
20
|
+
|
|
21
|
+
require "cgi"
|
|
22
|
+
|
|
23
|
+
$KCODE="u" # unicode support
|
|
24
|
+
|
|
25
|
+
module Strings
|
|
26
|
+
|
|
27
|
+
@@word_separators = " \t\n()[],.;:-¿?¡!\"/\\"
|
|
28
|
+
|
|
29
|
+
def Strings.empty?( text )
|
|
30
|
+
text = text.to_s()
|
|
31
|
+
return text.empty? ? true : text.strip.empty?
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def Strings.shell_quote( text )
|
|
35
|
+
return "\"" + text.gsub( "\\", "\\\\\\" ).gsub( "\"", "\\\"" ).gsub( "`", "\\\\`" ) + "\""
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def Strings.shell_unquote( text )
|
|
39
|
+
if text.slice( 0, 1 ) == "\""
|
|
40
|
+
return text.gsub( "\\`", "`" ).gsub( "\\\"", "\"" ).slice( 1..-2 )
|
|
41
|
+
else # if text.slice( 0, 1 ) == "'"
|
|
42
|
+
return text.slice( 1..-2 )
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def Strings.shell_escape( text )
|
|
47
|
+
return text.gsub( "\\", "\\\\\\" ).gsub( "\"", "\\\"" ).gsub( "`", "\\\\`" ).gsub( %q/'/, %q/\\\'/ ).gsub( " ", "\\ " )
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def Strings.shell_unescape( text )
|
|
51
|
+
return text.gsub( "\\ ", " " ).gsub( "\\'", "'" ).gsub( "\\`", "`" ).gsub( "\\\"", "\"" )
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def Strings.sql_quote( text )
|
|
55
|
+
return "'" + Strings.sql_escape( text ) + "'"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def Strings.sql_unquote( text )
|
|
59
|
+
return Strings.sql_unescape( text.slice( 1..-2 ) )
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def Strings.sql_escape( text )
|
|
63
|
+
return text.gsub( "'", "''" )
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def Strings.sql_unescape( text )
|
|
67
|
+
return text.gsub( "''", "'" )
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def Strings.random_token( length=10 )
|
|
71
|
+
chars = ( "a".."z" ).to_a() + ( "0".."9" ).to_a()
|
|
72
|
+
token = ""
|
|
73
|
+
1.upto( length ) { |i| token << chars[rand(chars.size-1)] }
|
|
74
|
+
return token
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def Strings.remove_invalid_filename_chars( filename )
|
|
78
|
+
return Strings.remove_invalid_filename_chars!( String.new( filename ) )
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def Strings.remove_invalid_filename_chars!( filename )
|
|
82
|
+
filename.tr_s!( "*?:|/\\<>", "" )
|
|
83
|
+
return filename
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def Strings.remove_vocal_accents( text )
|
|
87
|
+
return Strings.remove_vocal_accents!( String.new( text ) )
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def Strings.remove_vocal_accents!( text )
|
|
91
|
+
text.gsub!( /á|à|ä|â|å|ã/, "a" )
|
|
92
|
+
text.gsub!( /Á|À|Ä|Â|Å|Ã/, "A" )
|
|
93
|
+
text.gsub!( /é|è|ë|ê/, "e" )
|
|
94
|
+
text.gsub!( /É|È|Ë|Ê/, "E" )
|
|
95
|
+
text.gsub!( /í|ì|ï|î/, "i" )
|
|
96
|
+
text.gsub!( /Í|Ì|Ï|Î/, "I" )
|
|
97
|
+
text.gsub!( /ó|ò|ö|ô/, "o" )
|
|
98
|
+
text.gsub!( /Ó|Ò|Ö|Ô/, "O" )
|
|
99
|
+
text.gsub!( /ú|ù|ü|û/, "u" )
|
|
100
|
+
text.gsub!( /Ú|Ù|Ü|Û/, "U" )
|
|
101
|
+
return text
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def Strings.google_search_quote( text )
|
|
105
|
+
text = text.gsub( "\"", "" )
|
|
106
|
+
text.gsub!( /^\ *the\ */i, "" )
|
|
107
|
+
return Strings.empty?( text) ? "" : "\"#{text}\""
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def Strings.build_google_feeling_lucky_url( query, site=nil )
|
|
111
|
+
url = "http://www.google.com/search?q=#{CGI.escape( query )}"
|
|
112
|
+
url += "+site%3A#{site}" if site
|
|
113
|
+
return url + "&btnI"
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def Strings.downcase( text )
|
|
117
|
+
begin
|
|
118
|
+
return text.to_s().unpack( "U*" ).collect() do |c|
|
|
119
|
+
if c >= 65 && c <= 90 # abcdefghijklmnopqrstuvwxyz
|
|
120
|
+
c + 32
|
|
121
|
+
elsif c >= 192 && c <= 222 # ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞ
|
|
122
|
+
c + 32
|
|
123
|
+
else
|
|
124
|
+
c
|
|
125
|
+
end
|
|
126
|
+
end.pack( "U*" )
|
|
127
|
+
rescue Exception # fallback to normal operation on error
|
|
128
|
+
return text.downcase()
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def Strings.downcase!( text )
|
|
133
|
+
return text.replace( Strings.downcase( text ) )
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def Strings.upcase( text )
|
|
137
|
+
begin
|
|
138
|
+
return text.to_s().unpack( "U*" ).collect() do |c|
|
|
139
|
+
if c >= 97 && c <= 122 # ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
|
140
|
+
c - 32
|
|
141
|
+
elsif c >= 224 && c <= 254 # àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþ
|
|
142
|
+
c - 32
|
|
143
|
+
else
|
|
144
|
+
c
|
|
145
|
+
end
|
|
146
|
+
end.pack( "U*" )
|
|
147
|
+
rescue Exception # fallback to normal operation on error
|
|
148
|
+
return text.upcase()
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def Strings.upcase!( text )
|
|
153
|
+
return text.replace( Strings.upcase( text ) )
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def Strings.capitalize( text, downcase=false, first_only=false )
|
|
157
|
+
text = downcase ? Strings.downcase( text ) : text.to_s()
|
|
158
|
+
if first_only
|
|
159
|
+
text.sub!( /^([0-9a-zA-Zàáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞ])/ ) {|c| Strings.upcase( c ) }
|
|
160
|
+
else
|
|
161
|
+
text.sub!( /([0-9a-zA-Zàáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞ])/ ) {|c| Strings.upcase( c ) }
|
|
162
|
+
end
|
|
163
|
+
return text
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def Strings.capitalize!( text, downcase=false, first_only=false )
|
|
167
|
+
return text.replace( Strings.capitalize( text, downcase, first_only ) )
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def Strings.titlecase( text, correct_case=true, downcase=false )
|
|
171
|
+
text = Strings.capitalize( text, downcase )
|
|
172
|
+
word_start = true
|
|
173
|
+
text = text.unpack( "U*" ).collect() do |c|
|
|
174
|
+
if word_start
|
|
175
|
+
chr = [c].pack( "U*" )
|
|
176
|
+
if ! @@word_separators.include?( chr )
|
|
177
|
+
word_start = false
|
|
178
|
+
c = Strings.upcase( chr ).unpack( "U*" )[0]
|
|
179
|
+
end
|
|
180
|
+
else
|
|
181
|
+
chr = c < 256 ? c.chr() : [c].pack( "U*" )
|
|
182
|
+
word_start = true if @@word_separators.include?( chr )
|
|
183
|
+
end
|
|
184
|
+
c
|
|
185
|
+
end.pack( "U*" )
|
|
186
|
+
if correct_case
|
|
187
|
+
lc_words = [
|
|
188
|
+
"the", "a", "an", # articles
|
|
189
|
+
"and", "but", "or", "nor", # conjunctions
|
|
190
|
+
"'n'", "'n", "n'", # and contractions
|
|
191
|
+
"as", "at", "by", "for", "in", "of", "on", "to", # short prepositions
|
|
192
|
+
#"from", "into", "onto", "with", "over" # not so short prepositions
|
|
193
|
+
"feat", "vs", # special words
|
|
194
|
+
]
|
|
195
|
+
lc_words.each() do |lc_word|
|
|
196
|
+
text.gsub!( /\ #{lc_word}([ ,;:\.-?!\"\/\\\)])/i, " #{lc_word}\\1" )
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
return text
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def Strings.titlecase!( text, correct_case=true, downcase=false )
|
|
203
|
+
return text.replace( Strings.titlecase( text, correct_case, downcase ) )
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def Strings.normalize( token )
|
|
207
|
+
token = Strings.downcase( token )
|
|
208
|
+
token.tr_s!( " \n\r\t.;:()[]", " " )
|
|
209
|
+
token.strip!()
|
|
210
|
+
token.gsub!( /`|´|’/, "'" )
|
|
211
|
+
token.gsub!( /''|«|»/, "\"" )
|
|
212
|
+
token.gsub!( /[&+]/, "and" )
|
|
213
|
+
token.gsub!( /\ ('n'|'n|n') /, " and " )
|
|
214
|
+
token.gsub!( /^the /, "" )
|
|
215
|
+
token.gsub!( /, the$/, "" )
|
|
216
|
+
return token
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def Strings.normalize!( token )
|
|
220
|
+
return token.replace( Strings.normalize( token ) )
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def Strings.decode_htmlentities!( var )
|
|
224
|
+
if var.is_a?( String )
|
|
225
|
+
HTMLEntities.decode!( var )
|
|
226
|
+
elsif var.is_a?( Hash )
|
|
227
|
+
var.each() { |key, value| decode_htmlentities!( value ) }
|
|
228
|
+
end
|
|
229
|
+
return var
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def Strings.decode_htmlentities( var )
|
|
233
|
+
if var.is_a?( String )
|
|
234
|
+
return HTMLEntities.decode( var )
|
|
235
|
+
elsif var.is_a?( Hash )
|
|
236
|
+
ret = {}
|
|
237
|
+
var.each() do |key, value|
|
|
238
|
+
ret[key] = decode_htmlentities( value )
|
|
239
|
+
end
|
|
240
|
+
return ret
|
|
241
|
+
else
|
|
242
|
+
return var
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
def Strings.cleanup_lyrics( lyrics )
|
|
247
|
+
|
|
248
|
+
lyrics = HTMLEntities.decode( lyrics )
|
|
249
|
+
|
|
250
|
+
prev_line = ""
|
|
251
|
+
lines = []
|
|
252
|
+
|
|
253
|
+
lyrics.split( /\r\n|\n|\r/ ).each do |line|
|
|
254
|
+
|
|
255
|
+
# remove unnecesary spaces
|
|
256
|
+
line.tr_s!( "\t ", " " )
|
|
257
|
+
line.strip!()
|
|
258
|
+
|
|
259
|
+
# quotes and double quotes
|
|
260
|
+
line.gsub!( /`|´|’|‘|’|/, "'" )
|
|
261
|
+
line.gsub!( /''|"|«|»|„|”||/, "\"" )
|
|
262
|
+
|
|
263
|
+
# suspensive points
|
|
264
|
+
line.gsub!( /…+/, "..." )
|
|
265
|
+
line.gsub!( /[,;]?\.{2,}/, "..." )
|
|
266
|
+
|
|
267
|
+
# add space after "?", "!", ",", ";", ":", ".", ")" and "]" if not present
|
|
268
|
+
line.gsub!( /([^\.]?[\?!,;:\.\)\]])([^ "'<])/, "\\1 \\2" )
|
|
269
|
+
|
|
270
|
+
# remove spaces after "¿", "¡", "(" and ")"
|
|
271
|
+
line.gsub!( /([¿¡\(\[]) /, "\\1" )
|
|
272
|
+
|
|
273
|
+
# remove spaces before "?", "!", ",", ";", ":", ".", ")" and "]"
|
|
274
|
+
line.gsub!( /\ ([\?!,;:\.\)\]])/, "\\1" )
|
|
275
|
+
|
|
276
|
+
# remove space after ... at the beginning of sentence
|
|
277
|
+
line.gsub!( /^\.\.\. /, "..." )
|
|
278
|
+
|
|
279
|
+
# remove single points at end of sentence
|
|
280
|
+
line.gsub!( /([^\.])\.$/, "\\1" )
|
|
281
|
+
|
|
282
|
+
# remove commas and semicolons at end of sentence
|
|
283
|
+
line.gsub!( /[,;]$/, "" )
|
|
284
|
+
|
|
285
|
+
# fix english I pronoun capitalization
|
|
286
|
+
line.gsub!( /([ "'\(\[])i([\ '",;:\.\?!\]\)]|$)/, "\\1I\\2" )
|
|
287
|
+
|
|
288
|
+
# remove spaces after " or ' at the begin of sentence of before them when at the end
|
|
289
|
+
line.sub!( /^(["']) /, "\\1" )
|
|
290
|
+
line.sub!( /\ (["'])$/, "\\1" )
|
|
291
|
+
|
|
292
|
+
# capitalize first alfabet character of the line
|
|
293
|
+
Strings.capitalize!( line )
|
|
294
|
+
|
|
295
|
+
# no more than one empty line at the time
|
|
296
|
+
if ! line.empty? || ! prev_line.empty?
|
|
297
|
+
lines << line
|
|
298
|
+
prev_line = line
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
if lines.length > 0 && lines[lines.length-1].empty?
|
|
303
|
+
lines.delete_at( lines.length-1 )
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
return lines.join( "\n" )
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
def Strings.cleanup_lyrics!( lyrics )
|
|
310
|
+
return lyrics.replace( Strings.cleanup_lyrics( lyrics ) )
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def Strings.cleanup_artist( artist, title )
|
|
314
|
+
artist = artist.strip()
|
|
315
|
+
if artist != ""
|
|
316
|
+
if (md = /[ \(\[](ft\.|ft |feat\.|feat |featuring ) *([^\)\]]+)[\)\]]? *$/i.match( title.to_s() ))
|
|
317
|
+
artist << " feat. " << md[2]
|
|
318
|
+
else
|
|
319
|
+
artist.gsub!( /[ \(\[](ft\.|ft |feat\.|feat |featuring ) *([^\)\]]+)[\)\]]? *$/i, " feat. \\2" )
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
return artist
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
def Strings.cleanup_title( title )
|
|
326
|
+
title = title.gsub( /[ \(\[](ft\.|ft |feat\.|feat |featuring ) *([^\)\]]+)[\)\]]? *$/i, "" )
|
|
327
|
+
title.strip!()
|
|
328
|
+
return title
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
def Strings.utf82latin1( text )
|
|
332
|
+
begin
|
|
333
|
+
return text.unpack( "U*" ).pack( "C*" )
|
|
334
|
+
rescue Exception
|
|
335
|
+
$stderr << "warning: conversion from UTF-8 to Latin1 failed\n"
|
|
336
|
+
return text
|
|
337
|
+
end
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
def Strings.latin12utf8( text )
|
|
341
|
+
begin
|
|
342
|
+
return text.unpack( "C*" ).pack( "U*" )
|
|
343
|
+
rescue Exception
|
|
344
|
+
$stderr << "warning: conversion from Latin1 to UTF-8 failed\n"
|
|
345
|
+
return text
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def Strings.scramble( text )
|
|
350
|
+
text = text.to_s()
|
|
351
|
+
2.times() do
|
|
352
|
+
chars = text.unpack( "U*" ).reverse()
|
|
353
|
+
chars.size.times() { |idx| chars[idx] = (chars[idx] + idx + 1) }
|
|
354
|
+
text = chars.collect() { |c| c.to_s }.join( ":" )
|
|
355
|
+
end
|
|
356
|
+
return text
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
def Strings.scramble!( text )
|
|
360
|
+
return text.replace( Strings.scramble( text ) )
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
def Strings.descramble( text )
|
|
364
|
+
text = text.to_s()
|
|
365
|
+
2.times() do
|
|
366
|
+
chars = text.split( ":" ).collect() { |c| c.to_i }
|
|
367
|
+
chars.size.times() { |idx| chars[idx] = (chars[idx] - idx - 1) }
|
|
368
|
+
text = chars.reverse().pack( "U*" )
|
|
369
|
+
end
|
|
370
|
+
return text
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
def Strings.descramble!( text )
|
|
374
|
+
return text.replace( Strings.descramble( text ) )
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
end
|
|
378
|
+
|