lyrics 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. data/.document +5 -0
  2. data/.gitignore +21 -0
  3. data/LICENSE +20 -0
  4. data/README.rdoc +17 -0
  5. data/Rakefile +45 -0
  6. data/VERSION +1 -0
  7. data/bin/lyrics +66 -0
  8. data/lib/lyrics.rb +4 -0
  9. data/lib/lyrics/cli/application.rb +99 -0
  10. data/lib/lyrics/cli/optionsparser.rb +228 -0
  11. data/lib/lyrics/cli/pluginadapter.rb +56 -0
  12. data/lib/lyrics/cli/plugins.rb +79 -0
  13. data/lib/lyrics/cli/wikipluginadapter.rb +139 -0
  14. data/lib/lyrics/i18n/README +1 -0
  15. data/lib/lyrics/i18n/en.rb +181 -0
  16. data/lib/lyrics/i18n/es.rb +181 -0
  17. data/lib/lyrics/i18n/i18n.rb +126 -0
  18. data/lib/lyrics/i18n/sk.rb +174 -0
  19. data/lib/lyrics/itrans/COPYRIGHT +31 -0
  20. data/lib/lyrics/itrans/itrans +0 -0
  21. data/lib/lyrics/itrans/itrans.txt +8 -0
  22. data/lib/lyrics/itrans/lyric.txt +23 -0
  23. data/lib/lyrics/itrans/udvng.ifm +206 -0
  24. data/lib/lyrics/lyrics.rb +567 -0
  25. data/lib/lyrics/lyrics_AZLyrics.rb +113 -0
  26. data/lib/lyrics/lyrics_DarkLyrics.rb +124 -0
  27. data/lib/lyrics/lyrics_Giitaayan.rb +124 -0
  28. data/lib/lyrics/lyrics_Jamendo.rb +166 -0
  29. data/lib/lyrics/lyrics_LeosLyrics.rb +142 -0
  30. data/lib/lyrics/lyrics_LoudSongs.rb +135 -0
  31. data/lib/lyrics/lyrics_LyricWiki.rb +328 -0
  32. data/lib/lyrics/lyrics_LyricsDownload.rb +118 -0
  33. data/lib/lyrics/lyrics_LyricsMania.rb +141 -0
  34. data/lib/lyrics/lyrics_Lyriki.rb +286 -0
  35. data/lib/lyrics/lyrics_SeekLyrics.rb +108 -0
  36. data/lib/lyrics/lyrics_Sing365.rb +103 -0
  37. data/lib/lyrics/lyrics_TerraLetras.rb +126 -0
  38. data/lib/lyrics/mediawikilyrics.rb +1417 -0
  39. data/lib/lyrics/utils/formdata.rb +56 -0
  40. data/lib/lyrics/utils/htmlentities.rb +291 -0
  41. data/lib/lyrics/utils/http.rb +198 -0
  42. data/lib/lyrics/utils/itrans.rb +160 -0
  43. data/lib/lyrics/utils/logger.rb +123 -0
  44. data/lib/lyrics/utils/strings.rb +378 -0
  45. data/lib/lyrics/utils/xmlhash.rb +111 -0
  46. data/lyrics.gemspec +98 -0
  47. data/spec/lyrics_spec.rb +7 -0
  48. data/spec/spec.opts +1 -0
  49. data/spec/spec_helper.rb +9 -0
  50. metadata +137 -0
@@ -0,0 +1,160 @@
1
+ # Copyright (C) 2006-2008 by Sergio Pistone
2
+ # sergio_pistone@yahoo.com.ar
3
+ #
4
+ # This program is free software; you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation; either version 2 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program; if not, write to the
16
+ # Free Software Foundation, Inc.,
17
+ # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ require File.expand_path( File.dirname( __FILE__ ) + "/strings" )
20
+
21
+ module ITRANS
22
+
23
+ @@itrans_dir = File.dirname( File.expand_path(__FILE__) ) + "/../itrans"
24
+ @@null_dev = "/dev/null"
25
+
26
+ def ITRANS.normalize( text )
27
+ return ITRANS.from_devanagari!( ITRANS.to_devanagari( text ) )
28
+ end
29
+
30
+ def ITRANS.to_devanagari!( text )
31
+ text.replace( to_devanagari( text ) )
32
+ end
33
+
34
+ def ITRANS.to_devanagari( text )
35
+ orig_pwd = Dir.pwd()
36
+ Dir.chdir( @@itrans_dir )
37
+ trans = `echo #{Strings.shell_quote( "#indianifm=udvng.ifm\n #indian\n#{text}\n#endindian" )} | #{@@itrans_dir}/itrans -U 2>#{@@null_dev}`
38
+ Dir.chdir( orig_pwd )
39
+ trans.gsub!( /%[^\n]*/, "" ) # TODO search line
40
+ trans.strip!()
41
+ return trans
42
+ end
43
+
44
+ def ITRANS.from_devanagari!( text )
45
+ @@devanagari2itrans.each() do |devana, itrans|
46
+ text.gsub!( devana, itrans )
47
+ end
48
+ @@devanagari2itrans_consonants.each() do |devana, itrans|
49
+ # is the only symbol in the 'word' --> add an 'a' at the end:
50
+ text.gsub!( /(^|[ ""\.:;\(\[])#{devana}([,;:?!\)\]\s]|$)/, "\\1#{itrans}a\\2" )
51
+ # is not followed by a vocal --> add an 'a' at the end:
52
+ text.gsub!( /#{devana}([^aeiouAEIOU,;:?!\)\]\s])/, "#{itrans}a\\1" )
53
+ text.gsub!( devana, itrans )
54
+ end
55
+ return text
56
+ end
57
+
58
+ def ITRANS.from_devanagari( text )
59
+ return ITRANS.from_devanagari!( String.new( text ) )
60
+ end
61
+
62
+ def ITRANS.unicode( codepoint )
63
+ [codepoint].pack( "U*" )
64
+ end
65
+
66
+ @@devanagari2itrans = {
67
+ ITRANS.unicode( 0x0901 ) => "",
68
+
69
+ # vowels:
70
+ ITRANS.unicode( 0x0905 ) => "a",
71
+ ITRANS.unicode( 0x0906 ) => "aa", # /A
72
+ ITRANS.unicode( 0x093E ) => "aa", # /A
73
+ ITRANS.unicode( 0x0907 ) => "i",
74
+ ITRANS.unicode( 0x093F ) => "i",
75
+ ITRANS.unicode( 0x0908 ) => "ii", # /I
76
+ ITRANS.unicode( 0x0940 ) => "ii", # /I
77
+ ITRANS.unicode( 0x0909 ) => "u",
78
+ ITRANS.unicode( 0x0941 ) => "u",
79
+ ITRANS.unicode( 0x090A ) => "uu", # /U
80
+ ITRANS.unicode( 0x0942 ) => "uu", # /U
81
+ ITRANS.unicode( 0x090B ) => "RRi", # R^i
82
+ ITRANS.unicode( 0x0943 ) => "RRi", # R^i
83
+ ITRANS.unicode( 0x090C ) => "LLi", # L^i
84
+ ITRANS.unicode( 0x0944 ) => "LLi", # L^i
85
+ ITRANS.unicode( 0x090F ) => "e",
86
+ ITRANS.unicode( 0x0947 ) => "e",
87
+ ITRANS.unicode( 0x0910 ) => "ai",
88
+ ITRANS.unicode( 0x0948 ) => "ai",
89
+ ITRANS.unicode( 0x0913 ) => "o",
90
+ ITRANS.unicode( 0x094B ) => "o",
91
+ ITRANS.unicode( 0x0914 ) => "au",
92
+ ITRANS.unicode( 0x094C ) => "au",
93
+ # itrans irregular
94
+ "क्ष"=> "kSh", # x / kS
95
+ "त्र"=> "tr",
96
+ "ज्ञ"=> "j~n", # GY / dny
97
+ "श्र"=> "shr",
98
+ }
99
+
100
+ @@devanagari2itrans_consonants = {
101
+ # gutturals:
102
+ ITRANS.unicode( 0x0915 ) => "k",
103
+ ITRANS.unicode( 0x0916 ) => "kh",
104
+ # ITRANS.unicode( 0x0916 ) => ".Nkh",
105
+ ITRANS.unicode( 0x0917 ) => "g",
106
+ ITRANS.unicode( 0x0918 ) => "gh",
107
+ ITRANS.unicode( 0x0918 ) => "~N",
108
+ # palatals:
109
+ ITRANS.unicode( 0x091A ) => "ch",
110
+ ITRANS.unicode( 0x091B ) => "Ch",
111
+ ITRANS.unicode( 0x091C ) => "j",
112
+ ITRANS.unicode( 0x091D ) => "jh",
113
+ ITRANS.unicode( 0x091E ) => "~n", # JN
114
+ # retroflexes:
115
+ ITRANS.unicode( 0x091F ) => "T",
116
+ ITRANS.unicode( 0x0920 ) => "Th",
117
+ ITRANS.unicode( 0x0921 ) => "D",
118
+ ITRANS.unicode( 0x0922 ) => "Dh",
119
+ # ITRANS.unicode( 0x0922 ) => ".Dh", # Rh (valid?)
120
+ ITRANS.unicode( 0x0923 ) => "N",
121
+ # dentals:
122
+ ITRANS.unicode( 0x0924 ) => "t",
123
+ ITRANS.unicode( 0x0925 ) => "th",
124
+ ITRANS.unicode( 0x0926 ) => "d",
125
+ ITRANS.unicode( 0x0927 ) => "dh",
126
+ ITRANS.unicode( 0x0928 ) => "n",
127
+ # labials:
128
+ ITRANS.unicode( 0x092A ) => "p",
129
+ ITRANS.unicode( 0x092B ) => "ph",
130
+ ITRANS.unicode( 0x092C ) => "b",
131
+ ITRANS.unicode( 0x092D ) => "bh",
132
+ ITRANS.unicode( 0x092E ) => "m",
133
+ # semi-vowels:
134
+ ITRANS.unicode( 0x092F ) => "y",
135
+ ITRANS.unicode( 0x0930 ) => "r",
136
+ ITRANS.unicode( 0x0932 ) => "l",
137
+ ITRANS.unicode( 0x0935 ) => "v", # w
138
+ # sibilants:
139
+ ITRANS.unicode( 0x0936 ) => "sh",
140
+ ITRANS.unicode( 0x0937 ) => "Sh", # shh
141
+ ITRANS.unicode( 0x0938 ) => "s",
142
+ # miscellaneous:
143
+ ITRANS.unicode( 0x0939 ) => "h",
144
+ ITRANS.unicode( 0x0902 ) => ".n", # M / .m
145
+ ITRANS.unicode( 0x0903 ) => "H", # .h
146
+ ITRANS.unicode( 0x0950 ) => "OM", # AUM
147
+ # other consonants:
148
+ "क़" => "q",
149
+ ITRANS.unicode( 0x0958 ) => "q",
150
+ "ख़" => "Kh",
151
+ "ग़" => "G",
152
+ "ज़" => "z",
153
+ ITRANS.unicode( 0x095B ) => "z",
154
+ "फ़" => "f",
155
+ "ड़" => ".D", # R
156
+ ITRANS.unicode( 0x095C ) => ".D", # R (valid?)
157
+ "ढ़" => ".Dh", # Rh
158
+ }
159
+
160
+ end
@@ -0,0 +1,123 @@
1
+ # Copyright (C) 2006-2008 by Sergio Pistone
2
+ # sergio_pistone@yahoo.com.ar
3
+ #
4
+ # This program is free software; you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation; either version 2 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program; if not, write to the
16
+ # Free Software Foundation, Inc.,
17
+ # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ class Logger
20
+
21
+ def initialize( file_path, truncate_to_lines=-1 )
22
+ @file_path = file_path
23
+ @tabulation = nil
24
+ @tabulation_base = " ".freeze()
25
+ @tabulation_level = 0
26
+ @skip_first_line_tabulation = false
27
+ truncate( truncate_to_lines ) if truncate_to_lines >= 0
28
+ end
29
+
30
+ def finalize() # TODO revise implementation
31
+ end
32
+
33
+ def get_file_path()
34
+ return @file_path
35
+ end
36
+
37
+ def set_file_path( file_path )
38
+ if @file_path != file_path
39
+ File.delete( @file_path ) if File.exist?( @file_path ) && ! File.directory?( @file_path )
40
+ @file_path = file_path.clone().freeze()
41
+ end
42
+ end
43
+
44
+ def truncate( max_lines )
45
+ begin
46
+ file = File.new( @file_path, File::RDONLY )
47
+ rescue Errno::ENOENT
48
+ file = File.new( @file_path, File::CREAT|File::TRUNC )
49
+ end
50
+ lines = file.read().split( "\n" )
51
+ file.close()
52
+ offset = lines.size() - max_lines
53
+ if offset > 0
54
+ file = File.new( @file_path, File::CREAT|File::TRUNC|File::WRONLY )
55
+ max_lines.times() do |index|
56
+ line = lines[offset + index]
57
+ break if ! line
58
+ file.write( line )
59
+ file.write( "\n" )
60
+ end
61
+ file.close()
62
+ end
63
+ end
64
+
65
+ def reset()
66
+ output = File.new( @file_path, File::CREAT|File::TRUNC )
67
+ output.close()
68
+ end
69
+
70
+ def log( msg, new_lines=1 )
71
+ output = File.new( @file_path, File::CREAT|File::APPEND|File::WRONLY )
72
+ if @tabulation
73
+ output.write( @tabulation ) if ! @skip_first_line_tabulation
74
+ output.write( msg.gsub( "\n", "\n#{@tabulation}" ) )
75
+ @skip_first_line_tabulation = new_lines <= 0
76
+ else
77
+ output.write( msg )
78
+ end
79
+ new_lines.times() { output.write( "\n" ) }
80
+ output.close()
81
+ end
82
+
83
+ def get_tabulation_base()
84
+ return @tabulation_base
85
+ end
86
+
87
+ def set_tabulation_base( tabulation_base )
88
+ if @tabulation_base != tabulation_base
89
+ @tabulation_level = tabulation_base.clone().freeze()
90
+ if level <= 0
91
+ @tabulation = nil
92
+ else
93
+ @tabulation = ""
94
+ level.times() { @tabulation << @tabulation_base }
95
+ end
96
+ end
97
+ end
98
+
99
+ def get_tabulation_level()
100
+ return @tabulation_level
101
+ end
102
+
103
+ def set_tabulation_level( level )
104
+ if @tabulation_level != level
105
+ @tabulation_level = level
106
+ if level <= 0
107
+ @tabulation = nil
108
+ else
109
+ @tabulation = ""
110
+ level.times() { @tabulation << @tabulation_base }
111
+ end
112
+ end
113
+ end
114
+
115
+ def increase_tabulation_level()
116
+ set_tabulation_level( @tabulation_level + 1 )
117
+ end
118
+
119
+ def decrease_tabulation_level()
120
+ set_tabulation_level( @tabulation_level - 1 )
121
+ end
122
+
123
+ end
@@ -0,0 +1,378 @@
1
+ # Copyright (C) 2006-2008 by Sergio Pistone
2
+ # sergio_pistone@yahoo.com.ar
3
+ #
4
+ # This program is free software; you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation; either version 2 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program; if not, write to the
16
+ # Free Software Foundation, Inc.,
17
+ # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ require File.expand_path( File.dirname( __FILE__ ) + "/htmlentities" )
20
+
21
+ require "cgi"
22
+
23
+ $KCODE="u" # unicode support
24
+
25
+ module Strings
26
+
27
+ @@word_separators = " \t\n()[],.;:-¿?¡!\"/\\"
28
+
29
+ def Strings.empty?( text )
30
+ text = text.to_s()
31
+ return text.empty? ? true : text.strip.empty?
32
+ end
33
+
34
+ def Strings.shell_quote( text )
35
+ return "\"" + text.gsub( "\\", "\\\\\\" ).gsub( "\"", "\\\"" ).gsub( "`", "\\\\`" ) + "\""
36
+ end
37
+
38
+ def Strings.shell_unquote( text )
39
+ if text.slice( 0, 1 ) == "\""
40
+ return text.gsub( "\\`", "`" ).gsub( "\\\"", "\"" ).slice( 1..-2 )
41
+ else # if text.slice( 0, 1 ) == "'"
42
+ return text.slice( 1..-2 )
43
+ end
44
+ end
45
+
46
+ def Strings.shell_escape( text )
47
+ return text.gsub( "\\", "\\\\\\" ).gsub( "\"", "\\\"" ).gsub( "`", "\\\\`" ).gsub( %q/'/, %q/\\\'/ ).gsub( " ", "\\ " )
48
+ end
49
+
50
+ def Strings.shell_unescape( text )
51
+ return text.gsub( "\\ ", " " ).gsub( "\\'", "'" ).gsub( "\\`", "`" ).gsub( "\\\"", "\"" )
52
+ end
53
+
54
+ def Strings.sql_quote( text )
55
+ return "'" + Strings.sql_escape( text ) + "'"
56
+ end
57
+
58
+ def Strings.sql_unquote( text )
59
+ return Strings.sql_unescape( text.slice( 1..-2 ) )
60
+ end
61
+
62
+ def Strings.sql_escape( text )
63
+ return text.gsub( "'", "''" )
64
+ end
65
+
66
+ def Strings.sql_unescape( text )
67
+ return text.gsub( "''", "'" )
68
+ end
69
+
70
+ def Strings.random_token( length=10 )
71
+ chars = ( "a".."z" ).to_a() + ( "0".."9" ).to_a()
72
+ token = ""
73
+ 1.upto( length ) { |i| token << chars[rand(chars.size-1)] }
74
+ return token
75
+ end
76
+
77
+ def Strings.remove_invalid_filename_chars( filename )
78
+ return Strings.remove_invalid_filename_chars!( String.new( filename ) )
79
+ end
80
+
81
+ def Strings.remove_invalid_filename_chars!( filename )
82
+ filename.tr_s!( "*?:|/\\<>", "" )
83
+ return filename
84
+ end
85
+
86
+ def Strings.remove_vocal_accents( text )
87
+ return Strings.remove_vocal_accents!( String.new( text ) )
88
+ end
89
+
90
+ def Strings.remove_vocal_accents!( text )
91
+ text.gsub!( /á|à|ä|â|å|ã/, "a" )
92
+ text.gsub!( /Á|À|Ä|Â|Å|Ã/, "A" )
93
+ text.gsub!( /é|è|ë|ê/, "e" )
94
+ text.gsub!( /É|È|Ë|Ê/, "E" )
95
+ text.gsub!( /í|ì|ï|î/, "i" )
96
+ text.gsub!( /Í|Ì|Ï|Î/, "I" )
97
+ text.gsub!( /ó|ò|ö|ô/, "o" )
98
+ text.gsub!( /Ó|Ò|Ö|Ô/, "O" )
99
+ text.gsub!( /ú|ù|ü|û/, "u" )
100
+ text.gsub!( /Ú|Ù|Ü|Û/, "U" )
101
+ return text
102
+ end
103
+
104
+ def Strings.google_search_quote( text )
105
+ text = text.gsub( "\"", "" )
106
+ text.gsub!( /^\ *the\ */i, "" )
107
+ return Strings.empty?( text) ? "" : "\"#{text}\""
108
+ end
109
+
110
+ def Strings.build_google_feeling_lucky_url( query, site=nil )
111
+ url = "http://www.google.com/search?q=#{CGI.escape( query )}"
112
+ url += "+site%3A#{site}" if site
113
+ return url + "&btnI"
114
+ end
115
+
116
+ def Strings.downcase( text )
117
+ begin
118
+ return text.to_s().unpack( "U*" ).collect() do |c|
119
+ if c >= 65 && c <= 90 # abcdefghijklmnopqrstuvwxyz
120
+ c + 32
121
+ elsif c >= 192 && c <= 222 # ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞ
122
+ c + 32
123
+ else
124
+ c
125
+ end
126
+ end.pack( "U*" )
127
+ rescue Exception # fallback to normal operation on error
128
+ return text.downcase()
129
+ end
130
+ end
131
+
132
+ def Strings.downcase!( text )
133
+ return text.replace( Strings.downcase( text ) )
134
+ end
135
+
136
+ def Strings.upcase( text )
137
+ begin
138
+ return text.to_s().unpack( "U*" ).collect() do |c|
139
+ if c >= 97 && c <= 122 # ABCDEFGHIJKLMNOPQRSTUVWXYZ
140
+ c - 32
141
+ elsif c >= 224 && c <= 254 # àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþ
142
+ c - 32
143
+ else
144
+ c
145
+ end
146
+ end.pack( "U*" )
147
+ rescue Exception # fallback to normal operation on error
148
+ return text.upcase()
149
+ end
150
+ end
151
+
152
+ def Strings.upcase!( text )
153
+ return text.replace( Strings.upcase( text ) )
154
+ end
155
+
156
+ def Strings.capitalize( text, downcase=false, first_only=false )
157
+ text = downcase ? Strings.downcase( text ) : text.to_s()
158
+ if first_only
159
+ text.sub!( /^([0-9a-zA-Zàáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞ])/ ) {|c| Strings.upcase( c ) }
160
+ else
161
+ text.sub!( /([0-9a-zA-Zàáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞ])/ ) {|c| Strings.upcase( c ) }
162
+ end
163
+ return text
164
+ end
165
+
166
+ def Strings.capitalize!( text, downcase=false, first_only=false )
167
+ return text.replace( Strings.capitalize( text, downcase, first_only ) )
168
+ end
169
+
170
+ def Strings.titlecase( text, correct_case=true, downcase=false )
171
+ text = Strings.capitalize( text, downcase )
172
+ word_start = true
173
+ text = text.unpack( "U*" ).collect() do |c|
174
+ if word_start
175
+ chr = [c].pack( "U*" )
176
+ if ! @@word_separators.include?( chr )
177
+ word_start = false
178
+ c = Strings.upcase( chr ).unpack( "U*" )[0]
179
+ end
180
+ else
181
+ chr = c < 256 ? c.chr() : [c].pack( "U*" )
182
+ word_start = true if @@word_separators.include?( chr )
183
+ end
184
+ c
185
+ end.pack( "U*" )
186
+ if correct_case
187
+ lc_words = [
188
+ "the", "a", "an", # articles
189
+ "and", "but", "or", "nor", # conjunctions
190
+ "'n'", "'n", "n'", # and contractions
191
+ "as", "at", "by", "for", "in", "of", "on", "to", # short prepositions
192
+ #"from", "into", "onto", "with", "over" # not so short prepositions
193
+ "feat", "vs", # special words
194
+ ]
195
+ lc_words.each() do |lc_word|
196
+ text.gsub!( /\ #{lc_word}([ ,;:\.-?!\"\/\\\)])/i, " #{lc_word}\\1" )
197
+ end
198
+ end
199
+ return text
200
+ end
201
+
202
+ def Strings.titlecase!( text, correct_case=true, downcase=false )
203
+ return text.replace( Strings.titlecase( text, correct_case, downcase ) )
204
+ end
205
+
206
+ def Strings.normalize( token )
207
+ token = Strings.downcase( token )
208
+ token.tr_s!( " \n\r\t.;:()[]", " " )
209
+ token.strip!()
210
+ token.gsub!( /`|´|’/, "'" )
211
+ token.gsub!( /''|«|»/, "\"" )
212
+ token.gsub!( /[&+]/, "and" )
213
+ token.gsub!( /\ ('n'|'n|n') /, " and " )
214
+ token.gsub!( /^the /, "" )
215
+ token.gsub!( /, the$/, "" )
216
+ return token
217
+ end
218
+
219
+ def Strings.normalize!( token )
220
+ return token.replace( Strings.normalize( token ) )
221
+ end
222
+
223
+ def Strings.decode_htmlentities!( var )
224
+ if var.is_a?( String )
225
+ HTMLEntities.decode!( var )
226
+ elsif var.is_a?( Hash )
227
+ var.each() { |key, value| decode_htmlentities!( value ) }
228
+ end
229
+ return var
230
+ end
231
+
232
+ def Strings.decode_htmlentities( var )
233
+ if var.is_a?( String )
234
+ return HTMLEntities.decode( var )
235
+ elsif var.is_a?( Hash )
236
+ ret = {}
237
+ var.each() do |key, value|
238
+ ret[key] = decode_htmlentities( value )
239
+ end
240
+ return ret
241
+ else
242
+ return var
243
+ end
244
+ end
245
+
246
+ def Strings.cleanup_lyrics( lyrics )
247
+
248
+ lyrics = HTMLEntities.decode( lyrics )
249
+
250
+ prev_line = ""
251
+ lines = []
252
+
253
+ lyrics.split( /\r\n|\n|\r/ ).each do |line|
254
+
255
+ # remove unnecesary spaces
256
+ line.tr_s!( "\t ", " " )
257
+ line.strip!()
258
+
259
+ # quotes and double quotes
260
+ line.gsub!( /`|´|’|‘|’|’/, "'" )
261
+ line.gsub!( /''|&quot;|«|»|„|”|“|”/, "\"" )
262
+
263
+ # suspensive points
264
+ line.gsub!( /…+/, "..." )
265
+ line.gsub!( /[,;]?\.{2,}/, "..." )
266
+
267
+ # add space after "?", "!", ",", ";", ":", ".", ")" and "]" if not present
268
+ line.gsub!( /([^\.]?[\?!,;:\.\)\]])([^ "'<])/, "\\1 \\2" )
269
+
270
+ # remove spaces after "¿", "¡", "(" and ")"
271
+ line.gsub!( /([¿¡\(\[]) /, "\\1" )
272
+
273
+ # remove spaces before "?", "!", ",", ";", ":", ".", ")" and "]"
274
+ line.gsub!( /\ ([\?!,;:\.\)\]])/, "\\1" )
275
+
276
+ # remove space after ... at the beginning of sentence
277
+ line.gsub!( /^\.\.\. /, "..." )
278
+
279
+ # remove single points at end of sentence
280
+ line.gsub!( /([^\.])\.$/, "\\1" )
281
+
282
+ # remove commas and semicolons at end of sentence
283
+ line.gsub!( /[,;]$/, "" )
284
+
285
+ # fix english I pronoun capitalization
286
+ line.gsub!( /([ "'\(\[])i([\ '",;:\.\?!\]\)]|$)/, "\\1I\\2" )
287
+
288
+ # remove spaces after " or ' at the begin of sentence of before them when at the end
289
+ line.sub!( /^(["']) /, "\\1" )
290
+ line.sub!( /\ (["'])$/, "\\1" )
291
+
292
+ # capitalize first alfabet character of the line
293
+ Strings.capitalize!( line )
294
+
295
+ # no more than one empty line at the time
296
+ if ! line.empty? || ! prev_line.empty?
297
+ lines << line
298
+ prev_line = line
299
+ end
300
+ end
301
+
302
+ if lines.length > 0 && lines[lines.length-1].empty?
303
+ lines.delete_at( lines.length-1 )
304
+ end
305
+
306
+ return lines.join( "\n" )
307
+ end
308
+
309
+ def Strings.cleanup_lyrics!( lyrics )
310
+ return lyrics.replace( Strings.cleanup_lyrics( lyrics ) )
311
+ end
312
+
313
+ def Strings.cleanup_artist( artist, title )
314
+ artist = artist.strip()
315
+ if artist != ""
316
+ if (md = /[ \(\[](ft\.|ft |feat\.|feat |featuring ) *([^\)\]]+)[\)\]]? *$/i.match( title.to_s() ))
317
+ artist << " feat. " << md[2]
318
+ else
319
+ artist.gsub!( /[ \(\[](ft\.|ft |feat\.|feat |featuring ) *([^\)\]]+)[\)\]]? *$/i, " feat. \\2" )
320
+ end
321
+ end
322
+ return artist
323
+ end
324
+
325
+ def Strings.cleanup_title( title )
326
+ title = title.gsub( /[ \(\[](ft\.|ft |feat\.|feat |featuring ) *([^\)\]]+)[\)\]]? *$/i, "" )
327
+ title.strip!()
328
+ return title
329
+ end
330
+
331
+ def Strings.utf82latin1( text )
332
+ begin
333
+ return text.unpack( "U*" ).pack( "C*" )
334
+ rescue Exception
335
+ $stderr << "warning: conversion from UTF-8 to Latin1 failed\n"
336
+ return text
337
+ end
338
+ end
339
+
340
+ def Strings.latin12utf8( text )
341
+ begin
342
+ return text.unpack( "C*" ).pack( "U*" )
343
+ rescue Exception
344
+ $stderr << "warning: conversion from Latin1 to UTF-8 failed\n"
345
+ return text
346
+ end
347
+ end
348
+
349
+ def Strings.scramble( text )
350
+ text = text.to_s()
351
+ 2.times() do
352
+ chars = text.unpack( "U*" ).reverse()
353
+ chars.size.times() { |idx| chars[idx] = (chars[idx] + idx + 1) }
354
+ text = chars.collect() { |c| c.to_s }.join( ":" )
355
+ end
356
+ return text
357
+ end
358
+
359
+ def Strings.scramble!( text )
360
+ return text.replace( Strings.scramble( text ) )
361
+ end
362
+
363
+ def Strings.descramble( text )
364
+ text = text.to_s()
365
+ 2.times() do
366
+ chars = text.split( ":" ).collect() { |c| c.to_i }
367
+ chars.size.times() { |idx| chars[idx] = (chars[idx] - idx - 1) }
368
+ text = chars.reverse().pack( "U*" )
369
+ end
370
+ return text
371
+ end
372
+
373
+ def Strings.descramble!( text )
374
+ return text.replace( Strings.descramble( text ) )
375
+ end
376
+
377
+ end
378
+