lolspeak 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/COPYING ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2008 Dave Dribin
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use, copy,
7
+ modify, merge, publish, distribute, sublicense, and/or sell copies
8
+ of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README ADDED
@@ -0,0 +1,22 @@
1
+ lolspeak is an English to LOLspeak translator. It contains a default
2
+ dictionary and various helper methods. The main class is the
3
+ LOLspeak::Tranzlator class. Since you typically don't need to use a custom
4
+ dictionary, you will most likely use the default tranzlator,
5
+ LOLspeak.default_tranzlator. Or better yet, use the extensions to String and
6
+ REXML::Element. For example:
7
+
8
+ "Hi there! You have a cute cat.".to_lolspeak
9
+ -> "oh hai thar! u has cute kitteh."
10
+
11
+ To install this via Ruby Gems:
12
+
13
+ % sudo gem install lolspeak
14
+
15
+ It also includes a command line application called lolspeak:
16
+
17
+ % lolspeak "Hi there! You have a cute cat."
18
+ oh hai thar! u has cute kitteh.
19
+
20
+ For more information and the latest version, visit:
21
+
22
+ http://www.dribin.org/dave/lolspeak/
@@ -0,0 +1,78 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/rdoctask'
5
+ require 'rake/gempackagetask'
6
+
7
+ $LOAD_PATH.unshift 'lib'
8
+ require 'lolspeak/version'
9
+
10
+ $KCODE = "UTF-8"
11
+
12
+ task :default => [:test_units]
13
+
14
+ desc "Run basic tests"
15
+ Rake::TestTask.new("test_units") do |t|
16
+ t.pattern = 'test/*_test.rb'
17
+ t.verbose = false
18
+ t.warning = true
19
+ end
20
+
21
+ PKG_NAME = 'lolspeak'
22
+ PKG_VERSION = LOLspeak::VERSION
23
+ PKG_FILES = FileList[
24
+ '[A-Z]*',
25
+ 'bin/**/*',
26
+ 'lib/**/*.rb',
27
+ # Add this manually, because it's copied. If it doesn't exist at the time
28
+ # this is run, the wildcard won't pick it up, and it won't be included
29
+ # in the final package.
30
+ 'lib/lolspeak/tranzlator.yml',
31
+ 'test/**/*',
32
+ ]
33
+
34
+ Rake::RDocTask.new(:rdoc) do |rdoc|
35
+ rdoc.rdoc_dir = 'rdoc'
36
+ rdoc.title = "#{PKG_NAME} -- A LOLspeak translator"
37
+ rdoc.rdoc_files.include('README')
38
+ rdoc.rdoc_files.include('lib/**/*.rb', 'bin/*')
39
+ end
40
+
41
+ spec = Gem::Specification.new do |s|
42
+ s.platform = Gem::Platform::RUBY
43
+ s.summary = "LOLspeak translator"
44
+ s.name = PKG_NAME
45
+ s.version = PKG_VERSION
46
+ s.requirements << 'none'
47
+ s.require_path = 'lib'
48
+ s.files = PKG_FILES
49
+ s.has_rdoc = true
50
+ s.bindir = 'bin'
51
+ s.executables = ['lolspeak']
52
+ s.description = <<-EOF
53
+ Translates English text into LOLspeak.
54
+ EOF
55
+
56
+ s.author = "Dave Dribin"
57
+ s.homepage = "http://www.dribin.org/dave/software/lolspeak/"
58
+ s.rubyforge_project = 'lolspeak'
59
+ end
60
+
61
+ Rake::GemPackageTask.new(spec) do |pkg|
62
+ pkg.need_zip = true
63
+ pkg.need_tar = true
64
+ end
65
+
66
+
67
+ SRC_TRANZ = "../tranzlator.yml"
68
+ DEST_TRANZ = "lib/lolspeak/tranzlator.yml"
69
+
70
+ if File.exists? SRC_TRANZ
71
+ file DEST_TRANZ => [SRC_TRANZ] do
72
+ safe_ln(SRC_TRANZ, DEST_TRANZ)
73
+ end
74
+ task :tranzlator => DEST_TRANZ
75
+ task :test_units => [:tranzlator]
76
+ task :package => [:tranzlator]
77
+ end
78
+
@@ -0,0 +1,149 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # == Synopsis
4
+ #
5
+ # Translate text into LOLspeak
6
+ #
7
+ # == Usage
8
+ #
9
+ # lolspeak [OPTIONS] [<text> ...]
10
+ #
11
+ # == Options
12
+ #
13
+ # -f, --file FILE:: Read input from a file. Use <tt>-</tt> for standard
14
+ # input.
15
+ #
16
+ # -x, --xml:: Parse input text as XML and translate only the text portions.
17
+ #
18
+ # -h, --help:: Prints a help message and exits.
19
+ #
20
+ # -v, --version:: Prints the version and exits.
21
+ #
22
+ # == Author
23
+ # Dave Dribin
24
+ #
25
+ # == Copyright
26
+ # Copyright (c) 2008 Dave Dribin
27
+ # Licensed under the MIT license.
28
+
29
+ # This makes STDIN and STDOUT both act as containing UTF-8.
30
+ $KCODE = 'u'
31
+
32
+ require 'lolspeak'
33
+ require 'optparse'
34
+ require 'ostruct'
35
+ require 'set'
36
+
37
+ class LOLspeakApp
38
+ COMMAND = File.basename($0)
39
+ USAGE = "Usage: #{COMMAND} [OPTIONS] [<text> ...]"
40
+ def initialize
41
+ @command = File.basename($0)
42
+
43
+ @options = OpenStruct.new
44
+ @options.inputFile = nil
45
+ @options.xml = false
46
+ @options.try_heuristics = false
47
+ @options.heuristics_exclude = Set.new
48
+ end
49
+
50
+ def run(argv)
51
+ begin
52
+ if parse_options(argv)
53
+ main(argv)
54
+ end
55
+ rescue
56
+ STDERR.puts "#{COMMAND}: " + $!
57
+ return 1
58
+ end
59
+ return 0
60
+ end
61
+
62
+ def parse_options(argv)
63
+ opts = OptionParser.new do |opts|
64
+ opts.banner = USAGE
65
+ opts.separator ""
66
+ opts.separator "Specific options:"
67
+
68
+ opts.on("-f", "--file FILE", "Read input from a file") do |fileName|
69
+ @options.inputFile = fileName
70
+ end
71
+
72
+ opts.on("-x", "--xml", "Parse text as XML") do |fileName|
73
+ @options.xml = true
74
+ end
75
+
76
+ opts.on("-H", "--heuristics", "Enable heuristics") do
77
+ @options.try_heuristics = true
78
+ end
79
+
80
+ opts.on("-X", "--heuristics-exclude FILE",
81
+ "List of words excluded from heuristics") do |path|
82
+ @options.try_heuristics = true
83
+ excludes = @options.heuristics_exclude
84
+ File.open(path) do |f|
85
+ f.each { |word| excludes.add(word.chomp) }
86
+ end
87
+ end
88
+
89
+ opts.on_tail("-h", "--help", "Show a help message") do
90
+ puts opts
91
+ return false
92
+ end
93
+
94
+ opts.on_tail("-v", "--version", "Show version") do
95
+ puts "#{COMMAND} #{LOLspeak::VERSION}"
96
+ return false
97
+ end
98
+ end
99
+
100
+ opts.parse!(argv)
101
+ return true
102
+ end
103
+
104
+ def main(argv)
105
+ if @options.try_heuristics
106
+ LOLspeak.default_tranzlator.trace = true
107
+ LOLspeak.default_tranzlator.try_heuristics = true
108
+ LOLspeak.default_tranzlator.heuristics_exclude = @options.heuristics_exclude
109
+ end
110
+
111
+ if argv.length == 0 and @options.inputFile.nil?
112
+ @options.inputFile = "-"
113
+ end
114
+
115
+ if @options.inputFile.nil?
116
+ argv.collect! { |a| a.to_lolspeak }
117
+ puts argv.join(" ")
118
+ return
119
+ end
120
+
121
+ if (@options.inputFile == "-")
122
+ io = STDIN
123
+ else
124
+ io = File.open(@options.inputFile)
125
+ end
126
+
127
+ if (@options.xml)
128
+ translate_xml_io(io)
129
+ else
130
+ translate_io(io)
131
+ end
132
+ end
133
+
134
+ def translate_io(io)
135
+ begin
136
+ io.each { |l| puts l.to_lolspeak }
137
+ rescue Interrupt
138
+ end
139
+ end
140
+
141
+ def translate_xml_io(io)
142
+ xml = io.read
143
+ puts xml.xml_to_lolspeak
144
+ end
145
+ end
146
+
147
+ app = LOLspeakApp.new
148
+ rc = app.run(ARGV)
149
+ exit rc
@@ -0,0 +1,292 @@
1
+ $KCODE = "UTF-8"
2
+
3
+ require 'lolspeak/version'
4
+ require 'yaml'
5
+ require 'rexml/document'
6
+ require 'set'
7
+
8
+ # This module encapsulates the English to LOLspeak translator.
9
+ # See LOLspeak::Tranzlator for more information.
10
+ module LOLspeak
11
+ # A class to perform English to LOLspeak translation based on a dictionary
12
+ # of words.
13
+ class Tranzlator
14
+ # (bool -> false) Wether or not to record translations
15
+ attr_accessor :trace
16
+ # (bool -> false) If true, try heurstics when translating words. If
17
+ # false, only use the dictionary for translation.
18
+ attr_accessor :try_heuristics
19
+ # (Hash) Stores all translations, if trace is true.
20
+ attr_reader :traced_words
21
+ # (Hash) Stores all words translated via heuristics, if try_heuristics is
22
+ # true.
23
+ attr_reader :translated_heuristics
24
+ # (Set) Words to exclude if heuristics are on.
25
+ attr_accessor :heuristics_exclude
26
+
27
+ class << Tranzlator
28
+ # Creates a Tranzlator using a dictionary from a YAML file
29
+ #
30
+ # :call-seq:
31
+ # Tranzlator.from_file(file) -> Tranzlator
32
+ #
33
+ def from_file(file)
34
+ dictionary = YAML::load_file(file)
35
+ return Tranzlator.new(dictionary)
36
+ end
37
+ end
38
+
39
+ # Creates a Tranzlator from the given dictionary
40
+ #
41
+ # :call-seq:
42
+ # initialize(dictionary) -> Tranzlator
43
+ #
44
+ def initialize(dictionary)
45
+ @dictionary = dictionary
46
+ @traced_words = {}
47
+ @try_heuristics = false
48
+ @translated_heuristics = {}
49
+ @heuristics_exclude = Set.new
50
+ end
51
+
52
+ # Translates a single word into LOLspeak. By default, the result is in all
53
+ # lower case:
54
+ #
55
+ # translator.translate_word("Hi") -> "oh hai"
56
+ #
57
+ # If a block is given the word may
58
+ # be transformed. You could use this to upper case or XML encode the
59
+ # result. This example upper cases the result:
60
+ #
61
+ # translator.translate_word("hi") { |w| w.upcase } -> "OH HAI"
62
+ #
63
+ # If heuristics are off, then only words in the dictionary are translated.
64
+ # If heuristics are on, then words not in the dictionary may be translated
65
+ # using standard LOLspeak heuristics, such as "*tion" -> "*shun".
66
+ #
67
+ # :call-seq:
68
+ # translate_word(word) -> String
69
+ # translate_word(word) { |word| transform } -> String
70
+ #
71
+ def translate_word(word, &filter)
72
+ word = word.downcase
73
+ lol_word = @dictionary[word]
74
+ if lol_word.nil?
75
+ lol_word = @dictionary[word.gsub("’", "'")]
76
+ end
77
+
78
+ if lol_word.nil? and word.match(/(.*)([\’\']\w+)$/)
79
+ prefix, suffix = $1, $2
80
+ lol_word = @dictionary[prefix]
81
+ lol_word += suffix if !lol_word.nil?
82
+ end
83
+
84
+ if lol_word.nil? and @try_heuristics and !@heuristics_exclude.member?(word)
85
+ if (word =~ /(.*)tion(s?)$/)
86
+ lol_word = "#{$1}shun#{$2}"
87
+ elsif (word =~ /(.*)ed$/)
88
+ lol_word = "#{$1}d"
89
+ elsif (word =~ /(.*)ing$/)
90
+ lol_word = "#{$1}in"
91
+ elsif (word =~ /(.*)ss$/)
92
+ lol_word = "#{$1}s"
93
+ elsif (word =~ /(.*)er$/)
94
+ lol_word = "#{$1}r"
95
+ elsif (word !~ /ous$/) and (word =~ /^([0-9A-Za-z_]+)s$/)
96
+ lol_word = "#{$1}z"
97
+ end
98
+ if (word =~ /ph/)
99
+ lol_word = word.dup if lol_word.nil?
100
+ lol_word.gsub!(/ph/, 'f')
101
+ end
102
+
103
+ if !lol_word.nil?
104
+ @translated_heuristics[word] = lol_word
105
+ end
106
+ end
107
+
108
+ if lol_word.nil?
109
+ lol_word = word
110
+ else
111
+ @traced_words[word] = lol_word
112
+ end
113
+
114
+ if !filter.nil?
115
+ lol_word = filter.call(lol_word)
116
+ end
117
+
118
+ return lol_word
119
+ end
120
+
121
+ # Clears the trace word hash
122
+ def clear_traced_words
123
+ @traced_words = {}
124
+ end
125
+
126
+ # Clears the hash storing words translated by heuristics
127
+ def clear_translated_heuristics
128
+ @translated_heuristics = {}
129
+ end
130
+
131
+ # Translates all the words in a string. If a block is given, it is called
132
+ # to transform each individual word.
133
+ #
134
+ # :call-seq:
135
+ # translate_words(words) -> String
136
+ # translate_words(words) { |word| transform } -> String
137
+ #
138
+ def translate_words(words, &filter)
139
+ lol_words = words.gsub(/(\w[\w’\']*)(\s*)/) do
140
+ word, space = $1, $2
141
+ lol_word = translate_word(word, &filter)
142
+
143
+ # Stick the space back on, as long is it's not empty
144
+ lol_word += space if lol_word != ""
145
+ lol_word
146
+ end
147
+ return lol_words
148
+ end
149
+
150
+ # Translates the REXML::Text parts of a single REXML::Element. The element
151
+ # is modified in place.
152
+ #
153
+ # If a block is given, it is called to transform each individual word. By
154
+ # default, each word is XML escaped, so this transform applies on top of
155
+ # that.
156
+ #
157
+ # :call-seq:
158
+ # translate_xml_element!(xml_element)
159
+ # translate_xml_element!(xml_element) { |word| transform }
160
+ #
161
+ def translate_xml_element!(xml_element, &filter)
162
+ xml_element.texts.each do |text|
163
+ string = REXML::Text::unnormalize(text.to_s)
164
+ string = self.translate_words(string) do |w|
165
+ w = REXML::Text::normalize(w)
166
+ w = filter.call(w) if !filter.nil?
167
+ w
168
+ end
169
+ new_text = REXML::Text.new(string, true, nil, true)
170
+ text.replace_with(new_text)
171
+ end
172
+ end
173
+
174
+ # Translates the REXML::Text parts of an REXML::Element and all child
175
+ # elements. The elements are modified in place.
176
+ #
177
+ # If a block is given, it iscalled to transform each individual word. By
178
+ # default, each word is XML escaped, so this transform applies on top of
179
+ # that.
180
+ #
181
+ # :call-seq:
182
+ # translate_xml_element!(xml_element)
183
+ # translate_xml_element!(xml_element) { |word| transform }
184
+ #
185
+ def translate_xml_element_recursive!(xml_element, &filter)
186
+ xml_element.each_recursive { |e| translate_xml_element!(e, &filter) }
187
+ end
188
+
189
+ # Translates the text parts of a well-formed XML string. It parses the
190
+ # string using REXML and then translates the root element using
191
+ # translate_xml_element_recursive!.
192
+ #
193
+ # If a block is given, it is called to transform each individual word.
194
+ #
195
+ # :call-seq:
196
+ # translate_xml_string(xml_string) -> String
197
+ # translate_xml_string(xml_string) { |word| transform } -> String
198
+ #
199
+ def translate_xml_string(xml_string, &filter)
200
+ xml_doc = REXML::Document.new xml_string
201
+ translate_xml_element_recursive!(xml_doc, &filter)
202
+ return xml_doc.to_s
203
+ end
204
+ end
205
+
206
+ class << self
207
+ @@default_tranzlator = nil
208
+
209
+ # Sets the default Tranzlator to new_tranzlator
210
+ #
211
+ def default_tranzlator=(new_tranzlator)
212
+ return @@default_tranzlator = new_tranzlator
213
+ end
214
+
215
+ # Returns the default Tranzlator. On the first time it is called, it
216
+ # creates a Translator using the built-in dictionary.
217
+ #
218
+ def default_tranzlator
219
+ if @@default_tranzlator.nil?
220
+ default_file = File.join(File.dirname(__FILE__), "lolspeak",
221
+ "tranzlator.yml")
222
+ @@default_tranzlator = Tranzlator.from_file(default_file)
223
+ end
224
+ return @@default_tranzlator
225
+ end
226
+ end
227
+ end
228
+
229
+ class String
230
+ # Translates all the words in this string. Calls Tranzlator.translate_words
231
+ # on the receiver using the default Tranzlator.
232
+ #
233
+ # "Hi cat".to_lospeak -> "oh hai kitteh"
234
+ #
235
+ # See also: LOLspeak.default_tranzlator
236
+ #
237
+ # :call-seq:
238
+ # to_lolspeak -> String
239
+ # to_lolspeak { |word| transform } -> String
240
+ #
241
+ def to_lolspeak(&filter)
242
+ return LOLspeak::default_tranzlator.translate_words(self, &filter)
243
+ end
244
+
245
+ # Treats the string as XML and translates all the text in this string. Calls
246
+ # Tranzlator.translate_xml_string on the receiver using the default
247
+ # Tranzlator.
248
+ #
249
+ # See also: LOLspeak.default_tranzlator
250
+ #
251
+ # :call-seq:
252
+ # xml_to_lolspeak -> String
253
+ # xml_to_lolspeak { |word| transform } -> String
254
+ #
255
+ def xml_to_lolspeak(&filter)
256
+ return LOLspeak::default_tranzlator.translate_xml_string(self, &filter)
257
+ end
258
+ end
259
+
260
+ module REXML # :nodoc:
261
+ class Element
262
+
263
+ # Translates each REXML::Text of this element. Calls
264
+ # Tranzlator.translate_xml_element! on the receiver using the default
265
+ # tranzlator.
266
+ #
267
+ # See also: LOLspeak.default_tranzlator
268
+ #
269
+ # :call-seq:
270
+ # to_lolspeak!
271
+ # to_lolspeak! { |word| transform }
272
+ #
273
+ def to_lolspeak!(&filter)
274
+ LOLspeak::default_tranzlator.translate_xml_element!(self, &filter)
275
+ end
276
+
277
+ # Translates each REXML::Text of this element and all child elements.
278
+ # Calls Tranzlator.translate_xml_element_recusvie! on the receiver using
279
+ # the default tranzlator.
280
+ #
281
+ # See also: LOLspeak.default_tranzlator
282
+ #
283
+ # :call-seq:
284
+ # to_lolspeak_recursive!
285
+ # to_lolspeak_recursive! { |word| transform }
286
+ #
287
+ def to_lolspeak_recursive!(&filter)
288
+ t = LOLspeak::default_tranzlator
289
+ t.translate_xml_element_recursive!(self, &filter)
290
+ end
291
+ end
292
+ end