lyrics 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. data/.document +5 -0
  2. data/.gitignore +21 -0
  3. data/LICENSE +20 -0
  4. data/README.rdoc +17 -0
  5. data/Rakefile +45 -0
  6. data/VERSION +1 -0
  7. data/bin/lyrics +66 -0
  8. data/lib/lyrics.rb +4 -0
  9. data/lib/lyrics/cli/application.rb +99 -0
  10. data/lib/lyrics/cli/optionsparser.rb +228 -0
  11. data/lib/lyrics/cli/pluginadapter.rb +56 -0
  12. data/lib/lyrics/cli/plugins.rb +79 -0
  13. data/lib/lyrics/cli/wikipluginadapter.rb +139 -0
  14. data/lib/lyrics/i18n/README +1 -0
  15. data/lib/lyrics/i18n/en.rb +181 -0
  16. data/lib/lyrics/i18n/es.rb +181 -0
  17. data/lib/lyrics/i18n/i18n.rb +126 -0
  18. data/lib/lyrics/i18n/sk.rb +174 -0
  19. data/lib/lyrics/itrans/COPYRIGHT +31 -0
  20. data/lib/lyrics/itrans/itrans +0 -0
  21. data/lib/lyrics/itrans/itrans.txt +8 -0
  22. data/lib/lyrics/itrans/lyric.txt +23 -0
  23. data/lib/lyrics/itrans/udvng.ifm +206 -0
  24. data/lib/lyrics/lyrics.rb +567 -0
  25. data/lib/lyrics/lyrics_AZLyrics.rb +113 -0
  26. data/lib/lyrics/lyrics_DarkLyrics.rb +124 -0
  27. data/lib/lyrics/lyrics_Giitaayan.rb +124 -0
  28. data/lib/lyrics/lyrics_Jamendo.rb +166 -0
  29. data/lib/lyrics/lyrics_LeosLyrics.rb +142 -0
  30. data/lib/lyrics/lyrics_LoudSongs.rb +135 -0
  31. data/lib/lyrics/lyrics_LyricWiki.rb +328 -0
  32. data/lib/lyrics/lyrics_LyricsDownload.rb +118 -0
  33. data/lib/lyrics/lyrics_LyricsMania.rb +141 -0
  34. data/lib/lyrics/lyrics_Lyriki.rb +286 -0
  35. data/lib/lyrics/lyrics_SeekLyrics.rb +108 -0
  36. data/lib/lyrics/lyrics_Sing365.rb +103 -0
  37. data/lib/lyrics/lyrics_TerraLetras.rb +126 -0
  38. data/lib/lyrics/mediawikilyrics.rb +1417 -0
  39. data/lib/lyrics/utils/formdata.rb +56 -0
  40. data/lib/lyrics/utils/htmlentities.rb +291 -0
  41. data/lib/lyrics/utils/http.rb +198 -0
  42. data/lib/lyrics/utils/itrans.rb +160 -0
  43. data/lib/lyrics/utils/logger.rb +123 -0
  44. data/lib/lyrics/utils/strings.rb +378 -0
  45. data/lib/lyrics/utils/xmlhash.rb +111 -0
  46. data/lyrics.gemspec +98 -0
  47. data/spec/lyrics_spec.rb +7 -0
  48. data/spec/spec.opts +1 -0
  49. data/spec/spec_helper.rb +9 -0
  50. metadata +137 -0
@@ -0,0 +1,160 @@
1
+ # Copyright (C) 2006-2008 by Sergio Pistone
2
+ # sergio_pistone@yahoo.com.ar
3
+ #
4
+ # This program is free software; you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation; either version 2 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program; if not, write to the
16
+ # Free Software Foundation, Inc.,
17
+ # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ require File.expand_path( File.dirname( __FILE__ ) + "/strings" )
20
+
21
+ module ITRANS
22
+
23
+ @@itrans_dir = File.dirname( File.expand_path(__FILE__) ) + "/../itrans"
24
+ @@null_dev = "/dev/null"
25
+
26
+ def ITRANS.normalize( text )
27
+ return ITRANS.from_devanagari!( ITRANS.to_devanagari( text ) )
28
+ end
29
+
30
+ def ITRANS.to_devanagari!( text )
31
+ text.replace( to_devanagari( text ) )
32
+ end
33
+
34
+ def ITRANS.to_devanagari( text )
35
+ orig_pwd = Dir.pwd()
36
+ Dir.chdir( @@itrans_dir )
37
+ trans = `echo #{Strings.shell_quote( "#indianifm=udvng.ifm\n #indian\n#{text}\n#endindian" )} | #{@@itrans_dir}/itrans -U 2>#{@@null_dev}`
38
+ Dir.chdir( orig_pwd )
39
+ trans.gsub!( /%[^\n]*/, "" ) # TODO search line
40
+ trans.strip!()
41
+ return trans
42
+ end
43
+
44
+ def ITRANS.from_devanagari!( text )
45
+ @@devanagari2itrans.each() do |devana, itrans|
46
+ text.gsub!( devana, itrans )
47
+ end
48
+ @@devanagari2itrans_consonants.each() do |devana, itrans|
49
+ # is the only symbol in the 'word' --> add an 'a' at the end:
50
+ text.gsub!( /(^|[ ""\.:;\(\[])#{devana}([,;:?!\)\]\s]|$)/, "\\1#{itrans}a\\2" )
51
+ # is not followed by a vocal --> add an 'a' at the end:
52
+ text.gsub!( /#{devana}([^aeiouAEIOU,;:?!\)\]\s])/, "#{itrans}a\\1" )
53
+ text.gsub!( devana, itrans )
54
+ end
55
+ return text
56
+ end
57
+
58
+ def ITRANS.from_devanagari( text )
59
+ return ITRANS.from_devanagari!( String.new( text ) )
60
+ end
61
+
62
+ def ITRANS.unicode( codepoint )
63
+ [codepoint].pack( "U*" )
64
+ end
65
+
66
+ @@devanagari2itrans = {
67
+ ITRANS.unicode( 0x0901 ) => "",
68
+
69
+ # vowels:
70
+ ITRANS.unicode( 0x0905 ) => "a",
71
+ ITRANS.unicode( 0x0906 ) => "aa", # /A
72
+ ITRANS.unicode( 0x093E ) => "aa", # /A
73
+ ITRANS.unicode( 0x0907 ) => "i",
74
+ ITRANS.unicode( 0x093F ) => "i",
75
+ ITRANS.unicode( 0x0908 ) => "ii", # /I
76
+ ITRANS.unicode( 0x0940 ) => "ii", # /I
77
+ ITRANS.unicode( 0x0909 ) => "u",
78
+ ITRANS.unicode( 0x0941 ) => "u",
79
+ ITRANS.unicode( 0x090A ) => "uu", # /U
80
+ ITRANS.unicode( 0x0942 ) => "uu", # /U
81
+ ITRANS.unicode( 0x090B ) => "RRi", # R^i
82
+ ITRANS.unicode( 0x0943 ) => "RRi", # R^i
83
+ ITRANS.unicode( 0x090C ) => "LLi", # L^i
84
+ ITRANS.unicode( 0x0944 ) => "LLi", # L^i
85
+ ITRANS.unicode( 0x090F ) => "e",
86
+ ITRANS.unicode( 0x0947 ) => "e",
87
+ ITRANS.unicode( 0x0910 ) => "ai",
88
+ ITRANS.unicode( 0x0948 ) => "ai",
89
+ ITRANS.unicode( 0x0913 ) => "o",
90
+ ITRANS.unicode( 0x094B ) => "o",
91
+ ITRANS.unicode( 0x0914 ) => "au",
92
+ ITRANS.unicode( 0x094C ) => "au",
93
+ # itrans irregular
94
+ "क्ष"=> "kSh", # x / kS
95
+ "त्र"=> "tr",
96
+ "ज्ञ"=> "j~n", # GY / dny
97
+ "श्र"=> "shr",
98
+ }
99
+
100
+ @@devanagari2itrans_consonants = {
101
+ # gutturals:
102
+ ITRANS.unicode( 0x0915 ) => "k",
103
+ ITRANS.unicode( 0x0916 ) => "kh",
104
+ # ITRANS.unicode( 0x0916 ) => ".Nkh",
105
+ ITRANS.unicode( 0x0917 ) => "g",
106
+ ITRANS.unicode( 0x0918 ) => "gh",
107
+ ITRANS.unicode( 0x0918 ) => "~N",
108
+ # palatals:
109
+ ITRANS.unicode( 0x091A ) => "ch",
110
+ ITRANS.unicode( 0x091B ) => "Ch",
111
+ ITRANS.unicode( 0x091C ) => "j",
112
+ ITRANS.unicode( 0x091D ) => "jh",
113
+ ITRANS.unicode( 0x091E ) => "~n", # JN
114
+ # retroflexes:
115
+ ITRANS.unicode( 0x091F ) => "T",
116
+ ITRANS.unicode( 0x0920 ) => "Th",
117
+ ITRANS.unicode( 0x0921 ) => "D",
118
+ ITRANS.unicode( 0x0922 ) => "Dh",
119
+ # ITRANS.unicode( 0x0922 ) => ".Dh", # Rh (valid?)
120
+ ITRANS.unicode( 0x0923 ) => "N",
121
+ # dentals:
122
+ ITRANS.unicode( 0x0924 ) => "t",
123
+ ITRANS.unicode( 0x0925 ) => "th",
124
+ ITRANS.unicode( 0x0926 ) => "d",
125
+ ITRANS.unicode( 0x0927 ) => "dh",
126
+ ITRANS.unicode( 0x0928 ) => "n",
127
+ # labials:
128
+ ITRANS.unicode( 0x092A ) => "p",
129
+ ITRANS.unicode( 0x092B ) => "ph",
130
+ ITRANS.unicode( 0x092C ) => "b",
131
+ ITRANS.unicode( 0x092D ) => "bh",
132
+ ITRANS.unicode( 0x092E ) => "m",
133
+ # semi-vowels:
134
+ ITRANS.unicode( 0x092F ) => "y",
135
+ ITRANS.unicode( 0x0930 ) => "r",
136
+ ITRANS.unicode( 0x0932 ) => "l",
137
+ ITRANS.unicode( 0x0935 ) => "v", # w
138
+ # sibilants:
139
+ ITRANS.unicode( 0x0936 ) => "sh",
140
+ ITRANS.unicode( 0x0937 ) => "Sh", # shh
141
+ ITRANS.unicode( 0x0938 ) => "s",
142
+ # miscellaneous:
143
+ ITRANS.unicode( 0x0939 ) => "h",
144
+ ITRANS.unicode( 0x0902 ) => ".n", # M / .m
145
+ ITRANS.unicode( 0x0903 ) => "H", # .h
146
+ ITRANS.unicode( 0x0950 ) => "OM", # AUM
147
+ # other consonants:
148
+ "क़" => "q",
149
+ ITRANS.unicode( 0x0958 ) => "q",
150
+ "ख़" => "Kh",
151
+ "ग़" => "G",
152
+ "ज़" => "z",
153
+ ITRANS.unicode( 0x095B ) => "z",
154
+ "फ़" => "f",
155
+ "ड़" => ".D", # R
156
+ ITRANS.unicode( 0x095C ) => ".D", # R (valid?)
157
+ "ढ़" => ".Dh", # Rh
158
+ }
159
+
160
+ end
@@ -0,0 +1,123 @@
1
+ # Copyright (C) 2006-2008 by Sergio Pistone
2
+ # sergio_pistone@yahoo.com.ar
3
+ #
4
+ # This program is free software; you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation; either version 2 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program; if not, write to the
16
+ # Free Software Foundation, Inc.,
17
+ # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ class Logger
20
+
21
+ def initialize( file_path, truncate_to_lines=-1 )
22
+ @file_path = file_path
23
+ @tabulation = nil
24
+ @tabulation_base = " ".freeze()
25
+ @tabulation_level = 0
26
+ @skip_first_line_tabulation = false
27
+ truncate( truncate_to_lines ) if truncate_to_lines >= 0
28
+ end
29
+
30
+ def finalize() # TODO revise implementation
31
+ end
32
+
33
+ def get_file_path()
34
+ return @file_path
35
+ end
36
+
37
+ def set_file_path( file_path )
38
+ if @file_path != file_path
39
+ File.delete( @file_path ) if File.exist?( @file_path ) && ! File.directory?( @file_path )
40
+ @file_path = file_path.clone().freeze()
41
+ end
42
+ end
43
+
44
+ def truncate( max_lines )
45
+ begin
46
+ file = File.new( @file_path, File::RDONLY )
47
+ rescue Errno::ENOENT
48
+ file = File.new( @file_path, File::CREAT|File::TRUNC )
49
+ end
50
+ lines = file.read().split( "\n" )
51
+ file.close()
52
+ offset = lines.size() - max_lines
53
+ if offset > 0
54
+ file = File.new( @file_path, File::CREAT|File::TRUNC|File::WRONLY )
55
+ max_lines.times() do |index|
56
+ line = lines[offset + index]
57
+ break if ! line
58
+ file.write( line )
59
+ file.write( "\n" )
60
+ end
61
+ file.close()
62
+ end
63
+ end
64
+
65
+ def reset()
66
+ output = File.new( @file_path, File::CREAT|File::TRUNC )
67
+ output.close()
68
+ end
69
+
70
+ def log( msg, new_lines=1 )
71
+ output = File.new( @file_path, File::CREAT|File::APPEND|File::WRONLY )
72
+ if @tabulation
73
+ output.write( @tabulation ) if ! @skip_first_line_tabulation
74
+ output.write( msg.gsub( "\n", "\n#{@tabulation}" ) )
75
+ @skip_first_line_tabulation = new_lines <= 0
76
+ else
77
+ output.write( msg )
78
+ end
79
+ new_lines.times() { output.write( "\n" ) }
80
+ output.close()
81
+ end
82
+
83
+ def get_tabulation_base()
84
+ return @tabulation_base
85
+ end
86
+
87
+ def set_tabulation_base( tabulation_base )
88
+ if @tabulation_base != tabulation_base
89
+ @tabulation_level = tabulation_base.clone().freeze()
90
+ if level <= 0
91
+ @tabulation = nil
92
+ else
93
+ @tabulation = ""
94
+ level.times() { @tabulation << @tabulation_base }
95
+ end
96
+ end
97
+ end
98
+
99
+ def get_tabulation_level()
100
+ return @tabulation_level
101
+ end
102
+
103
+ def set_tabulation_level( level )
104
+ if @tabulation_level != level
105
+ @tabulation_level = level
106
+ if level <= 0
107
+ @tabulation = nil
108
+ else
109
+ @tabulation = ""
110
+ level.times() { @tabulation << @tabulation_base }
111
+ end
112
+ end
113
+ end
114
+
115
+ def increase_tabulation_level()
116
+ set_tabulation_level( @tabulation_level + 1 )
117
+ end
118
+
119
+ def decrease_tabulation_level()
120
+ set_tabulation_level( @tabulation_level - 1 )
121
+ end
122
+
123
+ end
@@ -0,0 +1,378 @@
1
+ # Copyright (C) 2006-2008 by Sergio Pistone
2
+ # sergio_pistone@yahoo.com.ar
3
+ #
4
+ # This program is free software; you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation; either version 2 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program; if not, write to the
16
+ # Free Software Foundation, Inc.,
17
+ # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
+
19
+ require File.expand_path( File.dirname( __FILE__ ) + "/htmlentities" )
20
+
21
+ require "cgi"
22
+
23
+ $KCODE="u" # unicode support
24
+
25
+ module Strings
26
+
27
+ @@word_separators = " \t\n()[],.;:-¿?¡!\"/\\"
28
+
29
+ def Strings.empty?( text )
30
+ text = text.to_s()
31
+ return text.empty? ? true : text.strip.empty?
32
+ end
33
+
34
+ def Strings.shell_quote( text )
35
+ return "\"" + text.gsub( "\\", "\\\\\\" ).gsub( "\"", "\\\"" ).gsub( "`", "\\\\`" ) + "\""
36
+ end
37
+
38
+ def Strings.shell_unquote( text )
39
+ if text.slice( 0, 1 ) == "\""
40
+ return text.gsub( "\\`", "`" ).gsub( "\\\"", "\"" ).slice( 1..-2 )
41
+ else # if text.slice( 0, 1 ) == "'"
42
+ return text.slice( 1..-2 )
43
+ end
44
+ end
45
+
46
+ def Strings.shell_escape( text )
47
+ return text.gsub( "\\", "\\\\\\" ).gsub( "\"", "\\\"" ).gsub( "`", "\\\\`" ).gsub( %q/'/, %q/\\\'/ ).gsub( " ", "\\ " )
48
+ end
49
+
50
+ def Strings.shell_unescape( text )
51
+ return text.gsub( "\\ ", " " ).gsub( "\\'", "'" ).gsub( "\\`", "`" ).gsub( "\\\"", "\"" )
52
+ end
53
+
54
+ def Strings.sql_quote( text )
55
+ return "'" + Strings.sql_escape( text ) + "'"
56
+ end
57
+
58
+ def Strings.sql_unquote( text )
59
+ return Strings.sql_unescape( text.slice( 1..-2 ) )
60
+ end
61
+
62
+ def Strings.sql_escape( text )
63
+ return text.gsub( "'", "''" )
64
+ end
65
+
66
+ def Strings.sql_unescape( text )
67
+ return text.gsub( "''", "'" )
68
+ end
69
+
70
+ def Strings.random_token( length=10 )
71
+ chars = ( "a".."z" ).to_a() + ( "0".."9" ).to_a()
72
+ token = ""
73
+ 1.upto( length ) { |i| token << chars[rand(chars.size-1)] }
74
+ return token
75
+ end
76
+
77
+ def Strings.remove_invalid_filename_chars( filename )
78
+ return Strings.remove_invalid_filename_chars!( String.new( filename ) )
79
+ end
80
+
81
+ def Strings.remove_invalid_filename_chars!( filename )
82
+ filename.tr_s!( "*?:|/\\<>", "" )
83
+ return filename
84
+ end
85
+
86
+ def Strings.remove_vocal_accents( text )
87
+ return Strings.remove_vocal_accents!( String.new( text ) )
88
+ end
89
+
90
+ def Strings.remove_vocal_accents!( text )
91
+ text.gsub!( /á|à|ä|â|å|ã/, "a" )
92
+ text.gsub!( /Á|À|Ä|Â|Å|Ã/, "A" )
93
+ text.gsub!( /é|è|ë|ê/, "e" )
94
+ text.gsub!( /É|È|Ë|Ê/, "E" )
95
+ text.gsub!( /í|ì|ï|î/, "i" )
96
+ text.gsub!( /Í|Ì|Ï|Î/, "I" )
97
+ text.gsub!( /ó|ò|ö|ô/, "o" )
98
+ text.gsub!( /Ó|Ò|Ö|Ô/, "O" )
99
+ text.gsub!( /ú|ù|ü|û/, "u" )
100
+ text.gsub!( /Ú|Ù|Ü|Û/, "U" )
101
+ return text
102
+ end
103
+
104
+ def Strings.google_search_quote( text )
105
+ text = text.gsub( "\"", "" )
106
+ text.gsub!( /^\ *the\ */i, "" )
107
+ return Strings.empty?( text) ? "" : "\"#{text}\""
108
+ end
109
+
110
+ def Strings.build_google_feeling_lucky_url( query, site=nil )
111
+ url = "http://www.google.com/search?q=#{CGI.escape( query )}"
112
+ url += "+site%3A#{site}" if site
113
+ return url + "&btnI"
114
+ end
115
+
116
+ def Strings.downcase( text )
117
+ begin
118
+ return text.to_s().unpack( "U*" ).collect() do |c|
119
+ if c >= 65 && c <= 90 # abcdefghijklmnopqrstuvwxyz
120
+ c + 32
121
+ elsif c >= 192 && c <= 222 # ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞ
122
+ c + 32
123
+ else
124
+ c
125
+ end
126
+ end.pack( "U*" )
127
+ rescue Exception # fallback to normal operation on error
128
+ return text.downcase()
129
+ end
130
+ end
131
+
132
+ def Strings.downcase!( text )
133
+ return text.replace( Strings.downcase( text ) )
134
+ end
135
+
136
+ def Strings.upcase( text )
137
+ begin
138
+ return text.to_s().unpack( "U*" ).collect() do |c|
139
+ if c >= 97 && c <= 122 # ABCDEFGHIJKLMNOPQRSTUVWXYZ
140
+ c - 32
141
+ elsif c >= 224 && c <= 254 # àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþ
142
+ c - 32
143
+ else
144
+ c
145
+ end
146
+ end.pack( "U*" )
147
+ rescue Exception # fallback to normal operation on error
148
+ return text.upcase()
149
+ end
150
+ end
151
+
152
+ def Strings.upcase!( text )
153
+ return text.replace( Strings.upcase( text ) )
154
+ end
155
+
156
+ def Strings.capitalize( text, downcase=false, first_only=false )
157
+ text = downcase ? Strings.downcase( text ) : text.to_s()
158
+ if first_only
159
+ text.sub!( /^([0-9a-zA-Zàáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞ])/ ) {|c| Strings.upcase( c ) }
160
+ else
161
+ text.sub!( /([0-9a-zA-Zàáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞ])/ ) {|c| Strings.upcase( c ) }
162
+ end
163
+ return text
164
+ end
165
+
166
+ def Strings.capitalize!( text, downcase=false, first_only=false )
167
+ return text.replace( Strings.capitalize( text, downcase, first_only ) )
168
+ end
169
+
170
+ def Strings.titlecase( text, correct_case=true, downcase=false )
171
+ text = Strings.capitalize( text, downcase )
172
+ word_start = true
173
+ text = text.unpack( "U*" ).collect() do |c|
174
+ if word_start
175
+ chr = [c].pack( "U*" )
176
+ if ! @@word_separators.include?( chr )
177
+ word_start = false
178
+ c = Strings.upcase( chr ).unpack( "U*" )[0]
179
+ end
180
+ else
181
+ chr = c < 256 ? c.chr() : [c].pack( "U*" )
182
+ word_start = true if @@word_separators.include?( chr )
183
+ end
184
+ c
185
+ end.pack( "U*" )
186
+ if correct_case
187
+ lc_words = [
188
+ "the", "a", "an", # articles
189
+ "and", "but", "or", "nor", # conjunctions
190
+ "'n'", "'n", "n'", # and contractions
191
+ "as", "at", "by", "for", "in", "of", "on", "to", # short prepositions
192
+ #"from", "into", "onto", "with", "over" # not so short prepositions
193
+ "feat", "vs", # special words
194
+ ]
195
+ lc_words.each() do |lc_word|
196
+ text.gsub!( /\ #{lc_word}([ ,;:\.-?!\"\/\\\)])/i, " #{lc_word}\\1" )
197
+ end
198
+ end
199
+ return text
200
+ end
201
+
202
+ def Strings.titlecase!( text, correct_case=true, downcase=false )
203
+ return text.replace( Strings.titlecase( text, correct_case, downcase ) )
204
+ end
205
+
206
+ def Strings.normalize( token )
207
+ token = Strings.downcase( token )
208
+ token.tr_s!( " \n\r\t.;:()[]", " " )
209
+ token.strip!()
210
+ token.gsub!( /`|´|’/, "'" )
211
+ token.gsub!( /''|«|»/, "\"" )
212
+ token.gsub!( /[&+]/, "and" )
213
+ token.gsub!( /\ ('n'|'n|n') /, " and " )
214
+ token.gsub!( /^the /, "" )
215
+ token.gsub!( /, the$/, "" )
216
+ return token
217
+ end
218
+
219
+ def Strings.normalize!( token )
220
+ return token.replace( Strings.normalize( token ) )
221
+ end
222
+
223
+ def Strings.decode_htmlentities!( var )
224
+ if var.is_a?( String )
225
+ HTMLEntities.decode!( var )
226
+ elsif var.is_a?( Hash )
227
+ var.each() { |key, value| decode_htmlentities!( value ) }
228
+ end
229
+ return var
230
+ end
231
+
232
+ def Strings.decode_htmlentities( var )
233
+ if var.is_a?( String )
234
+ return HTMLEntities.decode( var )
235
+ elsif var.is_a?( Hash )
236
+ ret = {}
237
+ var.each() do |key, value|
238
+ ret[key] = decode_htmlentities( value )
239
+ end
240
+ return ret
241
+ else
242
+ return var
243
+ end
244
+ end
245
+
246
+ def Strings.cleanup_lyrics( lyrics )
247
+
248
+ lyrics = HTMLEntities.decode( lyrics )
249
+
250
+ prev_line = ""
251
+ lines = []
252
+
253
+ lyrics.split( /\r\n|\n|\r/ ).each do |line|
254
+
255
+ # remove unnecesary spaces
256
+ line.tr_s!( "\t ", " " )
257
+ line.strip!()
258
+
259
+ # quotes and double quotes
260
+ line.gsub!( /`|´|’|‘|’|’/, "'" )
261
+ line.gsub!( /''|&quot;|«|»|„|”|“|”/, "\"" )
262
+
263
+ # suspensive points
264
+ line.gsub!( /…+/, "..." )
265
+ line.gsub!( /[,;]?\.{2,}/, "..." )
266
+
267
+ # add space after "?", "!", ",", ";", ":", ".", ")" and "]" if not present
268
+ line.gsub!( /([^\.]?[\?!,;:\.\)\]])([^ "'<])/, "\\1 \\2" )
269
+
270
+ # remove spaces after "¿", "¡", "(" and ")"
271
+ line.gsub!( /([¿¡\(\[]) /, "\\1" )
272
+
273
+ # remove spaces before "?", "!", ",", ";", ":", ".", ")" and "]"
274
+ line.gsub!( /\ ([\?!,;:\.\)\]])/, "\\1" )
275
+
276
+ # remove space after ... at the beginning of sentence
277
+ line.gsub!( /^\.\.\. /, "..." )
278
+
279
+ # remove single points at end of sentence
280
+ line.gsub!( /([^\.])\.$/, "\\1" )
281
+
282
+ # remove commas and semicolons at end of sentence
283
+ line.gsub!( /[,;]$/, "" )
284
+
285
+ # fix english I pronoun capitalization
286
+ line.gsub!( /([ "'\(\[])i([\ '",;:\.\?!\]\)]|$)/, "\\1I\\2" )
287
+
288
+ # remove spaces after " or ' at the begin of sentence of before them when at the end
289
+ line.sub!( /^(["']) /, "\\1" )
290
+ line.sub!( /\ (["'])$/, "\\1" )
291
+
292
+ # capitalize first alfabet character of the line
293
+ Strings.capitalize!( line )
294
+
295
+ # no more than one empty line at the time
296
+ if ! line.empty? || ! prev_line.empty?
297
+ lines << line
298
+ prev_line = line
299
+ end
300
+ end
301
+
302
+ if lines.length > 0 && lines[lines.length-1].empty?
303
+ lines.delete_at( lines.length-1 )
304
+ end
305
+
306
+ return lines.join( "\n" )
307
+ end
308
+
309
+ def Strings.cleanup_lyrics!( lyrics )
310
+ return lyrics.replace( Strings.cleanup_lyrics( lyrics ) )
311
+ end
312
+
313
+ def Strings.cleanup_artist( artist, title )
314
+ artist = artist.strip()
315
+ if artist != ""
316
+ if (md = /[ \(\[](ft\.|ft |feat\.|feat |featuring ) *([^\)\]]+)[\)\]]? *$/i.match( title.to_s() ))
317
+ artist << " feat. " << md[2]
318
+ else
319
+ artist.gsub!( /[ \(\[](ft\.|ft |feat\.|feat |featuring ) *([^\)\]]+)[\)\]]? *$/i, " feat. \\2" )
320
+ end
321
+ end
322
+ return artist
323
+ end
324
+
325
+ def Strings.cleanup_title( title )
326
+ title = title.gsub( /[ \(\[](ft\.|ft |feat\.|feat |featuring ) *([^\)\]]+)[\)\]]? *$/i, "" )
327
+ title.strip!()
328
+ return title
329
+ end
330
+
331
+ def Strings.utf82latin1( text )
332
+ begin
333
+ return text.unpack( "U*" ).pack( "C*" )
334
+ rescue Exception
335
+ $stderr << "warning: conversion from UTF-8 to Latin1 failed\n"
336
+ return text
337
+ end
338
+ end
339
+
340
+ def Strings.latin12utf8( text )
341
+ begin
342
+ return text.unpack( "C*" ).pack( "U*" )
343
+ rescue Exception
344
+ $stderr << "warning: conversion from Latin1 to UTF-8 failed\n"
345
+ return text
346
+ end
347
+ end
348
+
349
+ def Strings.scramble( text )
350
+ text = text.to_s()
351
+ 2.times() do
352
+ chars = text.unpack( "U*" ).reverse()
353
+ chars.size.times() { |idx| chars[idx] = (chars[idx] + idx + 1) }
354
+ text = chars.collect() { |c| c.to_s }.join( ":" )
355
+ end
356
+ return text
357
+ end
358
+
359
+ def Strings.scramble!( text )
360
+ return text.replace( Strings.scramble( text ) )
361
+ end
362
+
363
+ def Strings.descramble( text )
364
+ text = text.to_s()
365
+ 2.times() do
366
+ chars = text.split( ":" ).collect() { |c| c.to_i }
367
+ chars.size.times() { |idx| chars[idx] = (chars[idx] - idx - 1) }
368
+ text = chars.reverse().pack( "U*" )
369
+ end
370
+ return text
371
+ end
372
+
373
+ def Strings.descramble!( text )
374
+ return text.replace( Strings.descramble( text ) )
375
+ end
376
+
377
+ end
378
+