lolspeak 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/COPYING ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2008 Dave Dribin
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use, copy,
7
+ modify, merge, publish, distribute, sublicense, and/or sell copies
8
+ of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README ADDED
@@ -0,0 +1,22 @@
1
+ lolspeak is an English to LOLspeak translator. It contains a default
2
+ dictionary and various helper methods. The main class is the
3
+ LOLspeak::Tranzlator class. Since you typically don't need to use a custom
4
+ dictionary, you will most likely use the default tranzlator,
5
+ LOLspeak.default_tranzlator. Or better yet, use the extensions to String and
6
+ REXML::Element. For example:
7
+
8
+ "Hi there! You have a cute cat.".to_lolspeak
9
+ -> "oh hai thar! u has cute kitteh."
10
+
11
+ To install this via Ruby Gems:
12
+
13
+ % sudo gem install lolspeak
14
+
15
+ It also includes a command line application called lolspeak:
16
+
17
+ % lolspeak "Hi there! You have a cute cat."
18
+ oh hai thar! u has cute kitteh.
19
+
20
+ For more information and the latest version, visit:
21
+
22
+ http://www.dribin.org/dave/lolspeak/
@@ -0,0 +1,78 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/rdoctask'
5
+ require 'rake/gempackagetask'
6
+
7
+ $LOAD_PATH.unshift 'lib'
8
+ require 'lolspeak/version'
9
+
10
+ $KCODE = "UTF-8"
11
+
12
+ task :default => [:test_units]
13
+
14
+ desc "Run basic tests"
15
+ Rake::TestTask.new("test_units") do |t|
16
+ t.pattern = 'test/*_test.rb'
17
+ t.verbose = false
18
+ t.warning = true
19
+ end
20
+
21
+ PKG_NAME = 'lolspeak'
22
+ PKG_VERSION = LOLspeak::VERSION
23
+ PKG_FILES = FileList[
24
+ '[A-Z]*',
25
+ 'bin/**/*',
26
+ 'lib/**/*.rb',
27
+ # Add this manually, because it's copied. If it doesn't exist at the time
28
+ # this is run, the wildcard won't pick it up, and it won't be included
29
+ # in the final package.
30
+ 'lib/lolspeak/tranzlator.yml',
31
+ 'test/**/*',
32
+ ]
33
+
34
+ Rake::RDocTask.new(:rdoc) do |rdoc|
35
+ rdoc.rdoc_dir = 'rdoc'
36
+ rdoc.title = "#{PKG_NAME} -- A LOLspeak translator"
37
+ rdoc.rdoc_files.include('README')
38
+ rdoc.rdoc_files.include('lib/**/*.rb', 'bin/*')
39
+ end
40
+
41
+ spec = Gem::Specification.new do |s|
42
+ s.platform = Gem::Platform::RUBY
43
+ s.summary = "LOLspeak translator"
44
+ s.name = PKG_NAME
45
+ s.version = PKG_VERSION
46
+ s.requirements << 'none'
47
+ s.require_path = 'lib'
48
+ s.files = PKG_FILES
49
+ s.has_rdoc = true
50
+ s.bindir = 'bin'
51
+ s.executables = ['lolspeak']
52
+ s.description = <<-EOF
53
+ Translates English text into LOLspeak.
54
+ EOF
55
+
56
+ s.author = "Dave Dribin"
57
+ s.homepage = "http://www.dribin.org/dave/software/lolspeak/"
58
+ s.rubyforge_project = 'lolspeak'
59
+ end
60
+
61
+ Rake::GemPackageTask.new(spec) do |pkg|
62
+ pkg.need_zip = true
63
+ pkg.need_tar = true
64
+ end
65
+
66
+
67
+ SRC_TRANZ = "../tranzlator.yml"
68
+ DEST_TRANZ = "lib/lolspeak/tranzlator.yml"
69
+
70
+ if File.exists? SRC_TRANZ
71
+ file DEST_TRANZ => [SRC_TRANZ] do
72
+ safe_ln(SRC_TRANZ, DEST_TRANZ)
73
+ end
74
+ task :tranzlator => DEST_TRANZ
75
+ task :test_units => [:tranzlator]
76
+ task :package => [:tranzlator]
77
+ end
78
+
@@ -0,0 +1,149 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # == Synopsis
4
+ #
5
+ # Translate text into LOLspeak
6
+ #
7
+ # == Usage
8
+ #
9
+ # lolspeak [OPTIONS] [<text> ...]
10
+ #
11
+ # == Options
12
+ #
13
+ # -f, --file FILE:: Read input from a file. Use <tt>-</tt> for standard
14
+ # input.
15
+ #
16
+ # -x, --xml:: Parse input text as XML and translate only the text portions.
17
+ #
18
+ # -h, --help:: Prints a help message and exits.
19
+ #
20
+ # -v, --version:: Prints the version and exits.
21
+ #
22
+ # == Author
23
+ # Dave Dribin
24
+ #
25
+ # == Copyright
26
+ # Copyright (c) 2008 Dave Dribin
27
+ # Licensed under the MIT license.
28
+
29
+ # This makes STDIN and STDOUT both act as containing UTF-8.
30
+ $KCODE = 'u'
31
+
32
+ require 'lolspeak'
33
+ require 'optparse'
34
+ require 'ostruct'
35
+ require 'set'
36
+
37
+ class LOLspeakApp
38
+ COMMAND = File.basename($0)
39
+ USAGE = "Usage: #{COMMAND} [OPTIONS] [<text> ...]"
40
+ def initialize
41
+ @command = File.basename($0)
42
+
43
+ @options = OpenStruct.new
44
+ @options.inputFile = nil
45
+ @options.xml = false
46
+ @options.try_heuristics = false
47
+ @options.heuristics_exclude = Set.new
48
+ end
49
+
50
+ def run(argv)
51
+ begin
52
+ if parse_options(argv)
53
+ main(argv)
54
+ end
55
+ rescue
56
+ STDERR.puts "#{COMMAND}: " + $!
57
+ return 1
58
+ end
59
+ return 0
60
+ end
61
+
62
+ def parse_options(argv)
63
+ opts = OptionParser.new do |opts|
64
+ opts.banner = USAGE
65
+ opts.separator ""
66
+ opts.separator "Specific options:"
67
+
68
+ opts.on("-f", "--file FILE", "Read input from a file") do |fileName|
69
+ @options.inputFile = fileName
70
+ end
71
+
72
+ opts.on("-x", "--xml", "Parse text as XML") do |fileName|
73
+ @options.xml = true
74
+ end
75
+
76
+ opts.on("-H", "--heuristics", "Enable heuristics") do
77
+ @options.try_heuristics = true
78
+ end
79
+
80
+ opts.on("-X", "--heuristics-exclude FILE",
81
+ "List of words excluded from heuristics") do |path|
82
+ @options.try_heuristics = true
83
+ excludes = @options.heuristics_exclude
84
+ File.open(path) do |f|
85
+ f.each { |word| excludes.add(word.chomp) }
86
+ end
87
+ end
88
+
89
+ opts.on_tail("-h", "--help", "Show a help message") do
90
+ puts opts
91
+ return false
92
+ end
93
+
94
+ opts.on_tail("-v", "--version", "Show version") do
95
+ puts "#{COMMAND} #{LOLspeak::VERSION}"
96
+ return false
97
+ end
98
+ end
99
+
100
+ opts.parse!(argv)
101
+ return true
102
+ end
103
+
104
+ def main(argv)
105
+ if @options.try_heuristics
106
+ LOLspeak.default_tranzlator.trace = true
107
+ LOLspeak.default_tranzlator.try_heuristics = true
108
+ LOLspeak.default_tranzlator.heuristics_exclude = @options.heuristics_exclude
109
+ end
110
+
111
+ if argv.length == 0 and @options.inputFile.nil?
112
+ @options.inputFile = "-"
113
+ end
114
+
115
+ if @options.inputFile.nil?
116
+ argv.collect! { |a| a.to_lolspeak }
117
+ puts argv.join(" ")
118
+ return
119
+ end
120
+
121
+ if (@options.inputFile == "-")
122
+ io = STDIN
123
+ else
124
+ io = File.open(@options.inputFile)
125
+ end
126
+
127
+ if (@options.xml)
128
+ translate_xml_io(io)
129
+ else
130
+ translate_io(io)
131
+ end
132
+ end
133
+
134
+ def translate_io(io)
135
+ begin
136
+ io.each { |l| puts l.to_lolspeak }
137
+ rescue Interrupt
138
+ end
139
+ end
140
+
141
+ def translate_xml_io(io)
142
+ xml = io.read
143
+ puts xml.xml_to_lolspeak
144
+ end
145
+ end
146
+
147
+ app = LOLspeakApp.new
148
+ rc = app.run(ARGV)
149
+ exit rc
@@ -0,0 +1,292 @@
1
+ $KCODE = "UTF-8"
2
+
3
+ require 'lolspeak/version'
4
+ require 'yaml'
5
+ require 'rexml/document'
6
+ require 'set'
7
+
8
+ # This module encapsulates the English to LOLspeak translator.
9
+ # See LOLspeak::Tranzlator for more information.
10
+ module LOLspeak
11
+ # A class to perform English to LOLspeak translation based on a dictionary
12
+ # of words.
13
+ class Tranzlator
14
+ # (bool -> false) Wether or not to record translations
15
+ attr_accessor :trace
16
+ # (bool -> false) If true, try heurstics when translating words. If
17
+ # false, only use the dictionary for translation.
18
+ attr_accessor :try_heuristics
19
+ # (Hash) Stores all translations, if trace is true.
20
+ attr_reader :traced_words
21
+ # (Hash) Stores all words translated via heuristics, if try_heuristics is
22
+ # true.
23
+ attr_reader :translated_heuristics
24
+ # (Set) Words to exclude if heuristics are on.
25
+ attr_accessor :heuristics_exclude
26
+
27
+ class << Tranzlator
28
+ # Creates a Tranzlator using a dictionary from a YAML file
29
+ #
30
+ # :call-seq:
31
+ # Tranzlator.from_file(file) -> Tranzlator
32
+ #
33
+ def from_file(file)
34
+ dictionary = YAML::load_file(file)
35
+ return Tranzlator.new(dictionary)
36
+ end
37
+ end
38
+
39
+ # Creates a Tranzlator from the given dictionary
40
+ #
41
+ # :call-seq:
42
+ # initialize(dictionary) -> Tranzlator
43
+ #
44
+ def initialize(dictionary)
45
+ @dictionary = dictionary
46
+ @traced_words = {}
47
+ @try_heuristics = false
48
+ @translated_heuristics = {}
49
+ @heuristics_exclude = Set.new
50
+ end
51
+
52
+ # Translates a single word into LOLspeak. By default, the result is in all
53
+ # lower case:
54
+ #
55
+ # translator.translate_word("Hi") -> "oh hai"
56
+ #
57
+ # If a block is given the word may
58
+ # be transformed. You could use this to upper case or XML encode the
59
+ # result. This example upper cases the result:
60
+ #
61
+ # translator.translate_word("hi") { |w| w.upcase } -> "OH HAI"
62
+ #
63
+ # If heuristics are off, then only words in the dictionary are translated.
64
+ # If heuristics are on, then words not in the dictionary may be translated
65
+ # using standard LOLspeak heuristics, such as "*tion" -> "*shun".
66
+ #
67
+ # :call-seq:
68
+ # translate_word(word) -> String
69
+ # translate_word(word) { |word| transform } -> String
70
+ #
71
+ def translate_word(word, &filter)
72
+ word = word.downcase
73
+ lol_word = @dictionary[word]
74
+ if lol_word.nil?
75
+ lol_word = @dictionary[word.gsub("’", "'")]
76
+ end
77
+
78
+ if lol_word.nil? and word.match(/(.*)([\’\']\w+)$/)
79
+ prefix, suffix = $1, $2
80
+ lol_word = @dictionary[prefix]
81
+ lol_word += suffix if !lol_word.nil?
82
+ end
83
+
84
+ if lol_word.nil? and @try_heuristics and !@heuristics_exclude.member?(word)
85
+ if (word =~ /(.*)tion(s?)$/)
86
+ lol_word = "#{$1}shun#{$2}"
87
+ elsif (word =~ /(.*)ed$/)
88
+ lol_word = "#{$1}d"
89
+ elsif (word =~ /(.*)ing$/)
90
+ lol_word = "#{$1}in"
91
+ elsif (word =~ /(.*)ss$/)
92
+ lol_word = "#{$1}s"
93
+ elsif (word =~ /(.*)er$/)
94
+ lol_word = "#{$1}r"
95
+ elsif (word !~ /ous$/) and (word =~ /^([0-9A-Za-z_]+)s$/)
96
+ lol_word = "#{$1}z"
97
+ end
98
+ if (word =~ /ph/)
99
+ lol_word = word.dup if lol_word.nil?
100
+ lol_word.gsub!(/ph/, 'f')
101
+ end
102
+
103
+ if !lol_word.nil?
104
+ @translated_heuristics[word] = lol_word
105
+ end
106
+ end
107
+
108
+ if lol_word.nil?
109
+ lol_word = word
110
+ else
111
+ @traced_words[word] = lol_word
112
+ end
113
+
114
+ if !filter.nil?
115
+ lol_word = filter.call(lol_word)
116
+ end
117
+
118
+ return lol_word
119
+ end
120
+
121
+ # Clears the trace word hash
122
+ def clear_traced_words
123
+ @traced_words = {}
124
+ end
125
+
126
+ # Clears the hash storing words translated by heuristics
127
+ def clear_translated_heuristics
128
+ @translated_heuristics = {}
129
+ end
130
+
131
+ # Translates all the words in a string. If a block is given, it is called
132
+ # to transform each individual word.
133
+ #
134
+ # :call-seq:
135
+ # translate_words(words) -> String
136
+ # translate_words(words) { |word| transform } -> String
137
+ #
138
+ def translate_words(words, &filter)
139
+ lol_words = words.gsub(/(\w[\w’\']*)(\s*)/) do
140
+ word, space = $1, $2
141
+ lol_word = translate_word(word, &filter)
142
+
143
+ # Stick the space back on, as long is it's not empty
144
+ lol_word += space if lol_word != ""
145
+ lol_word
146
+ end
147
+ return lol_words
148
+ end
149
+
150
+ # Translates the REXML::Text parts of a single REXML::Element. The element
151
+ # is modified in place.
152
+ #
153
+ # If a block is given, it is called to transform each individual word. By
154
+ # default, each word is XML escaped, so this transform applies on top of
155
+ # that.
156
+ #
157
+ # :call-seq:
158
+ # translate_xml_element!(xml_element)
159
+ # translate_xml_element!(xml_element) { |word| transform }
160
+ #
161
+ def translate_xml_element!(xml_element, &filter)
162
+ xml_element.texts.each do |text|
163
+ string = REXML::Text::unnormalize(text.to_s)
164
+ string = self.translate_words(string) do |w|
165
+ w = REXML::Text::normalize(w)
166
+ w = filter.call(w) if !filter.nil?
167
+ w
168
+ end
169
+ new_text = REXML::Text.new(string, true, nil, true)
170
+ text.replace_with(new_text)
171
+ end
172
+ end
173
+
174
+ # Translates the REXML::Text parts of an REXML::Element and all child
175
+ # elements. The elements are modified in place.
176
+ #
177
+ # If a block is given, it iscalled to transform each individual word. By
178
+ # default, each word is XML escaped, so this transform applies on top of
179
+ # that.
180
+ #
181
+ # :call-seq:
182
+ # translate_xml_element!(xml_element)
183
+ # translate_xml_element!(xml_element) { |word| transform }
184
+ #
185
+ def translate_xml_element_recursive!(xml_element, &filter)
186
+ xml_element.each_recursive { |e| translate_xml_element!(e, &filter) }
187
+ end
188
+
189
+ # Translates the text parts of a well-formed XML string. It parses the
190
+ # string using REXML and then translates the root element using
191
+ # translate_xml_element_recursive!.
192
+ #
193
+ # If a block is given, it is called to transform each individual word.
194
+ #
195
+ # :call-seq:
196
+ # translate_xml_string(xml_string) -> String
197
+ # translate_xml_string(xml_string) { |word| transform } -> String
198
+ #
199
+ def translate_xml_string(xml_string, &filter)
200
+ xml_doc = REXML::Document.new xml_string
201
+ translate_xml_element_recursive!(xml_doc, &filter)
202
+ return xml_doc.to_s
203
+ end
204
+ end
205
+
206
+ class << self
207
+ @@default_tranzlator = nil
208
+
209
+ # Sets the default Tranzlator to new_tranzlator
210
+ #
211
+ def default_tranzlator=(new_tranzlator)
212
+ return @@default_tranzlator = new_tranzlator
213
+ end
214
+
215
+ # Returns the default Tranzlator. On the first time it is called, it
216
+ # creates a Translator using the built-in dictionary.
217
+ #
218
+ def default_tranzlator
219
+ if @@default_tranzlator.nil?
220
+ default_file = File.join(File.dirname(__FILE__), "lolspeak",
221
+ "tranzlator.yml")
222
+ @@default_tranzlator = Tranzlator.from_file(default_file)
223
+ end
224
+ return @@default_tranzlator
225
+ end
226
+ end
227
+ end
228
+
229
+ class String
230
+ # Translates all the words in this string. Calls Tranzlator.translate_words
231
+ # on the receiver using the default Tranzlator.
232
+ #
233
+ # "Hi cat".to_lospeak -> "oh hai kitteh"
234
+ #
235
+ # See also: LOLspeak.default_tranzlator
236
+ #
237
+ # :call-seq:
238
+ # to_lolspeak -> String
239
+ # to_lolspeak { |word| transform } -> String
240
+ #
241
+ def to_lolspeak(&filter)
242
+ return LOLspeak::default_tranzlator.translate_words(self, &filter)
243
+ end
244
+
245
+ # Treats the string as XML and translates all the text in this string. Calls
246
+ # Tranzlator.translate_xml_string on the receiver using the default
247
+ # Tranzlator.
248
+ #
249
+ # See also: LOLspeak.default_tranzlator
250
+ #
251
+ # :call-seq:
252
+ # xml_to_lolspeak -> String
253
+ # xml_to_lolspeak { |word| transform } -> String
254
+ #
255
+ def xml_to_lolspeak(&filter)
256
+ return LOLspeak::default_tranzlator.translate_xml_string(self, &filter)
257
+ end
258
+ end
259
+
260
+ module REXML # :nodoc:
261
+ class Element
262
+
263
+ # Translates each REXML::Text of this element. Calls
264
+ # Tranzlator.translate_xml_element! on the receiver using the default
265
+ # tranzlator.
266
+ #
267
+ # See also: LOLspeak.default_tranzlator
268
+ #
269
+ # :call-seq:
270
+ # to_lolspeak!
271
+ # to_lolspeak! { |word| transform }
272
+ #
273
+ def to_lolspeak!(&filter)
274
+ LOLspeak::default_tranzlator.translate_xml_element!(self, &filter)
275
+ end
276
+
277
+ # Translates each REXML::Text of this element and all child elements.
278
+ # Calls Tranzlator.translate_xml_element_recusvie! on the receiver using
279
+ # the default tranzlator.
280
+ #
281
+ # See also: LOLspeak.default_tranzlator
282
+ #
283
+ # :call-seq:
284
+ # to_lolspeak_recursive!
285
+ # to_lolspeak_recursive! { |word| transform }
286
+ #
287
+ def to_lolspeak_recursive!(&filter)
288
+ t = LOLspeak::default_tranzlator
289
+ t.translate_xml_element_recursive!(self, &filter)
290
+ end
291
+ end
292
+ end