macronconversions 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +6 -0
- data/Gemfile +4 -0
- data/LICENSE.textile +9 -0
- data/README.textile +63 -0
- data/Rakefile +16 -0
- data/bin/mconv +16 -0
- data/bin/mdconv +6 -0
- data/lib/macronconversions/conversion_structure.rb +72 -0
- data/lib/macronconversions/macronconversions.rb +243 -0
- data/lib/macronconversions/version.rb +10 -0
- data/lib/macronconversions.rb +36 -0
- data/macronconversions.gemspec +23 -0
- data/test/testMacronConversion.rb +68 -0
- data/test/testMacronDeconversion.rb +19 -0
- metadata +80 -0
data/Gemfile
ADDED
data/LICENSE.textile
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) Steven G. Harms, http://stevengharms.com
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
6
|
+
|
7
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
8
|
+
|
9
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.textile
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
h1. Macronconversions
|
2
|
+
|
3
|
+
h2. Who's This For?
|
4
|
+
|
5
|
+
If you work with a language that makes use of vowels that have quantity...
|
6
|
+
|
7
|
+
AND
|
8
|
+
|
9
|
+
you need to generate those macron-bearing characters based on ASCII standards (because character panel lookup is not awesome)
|
10
|
+
|
11
|
+
AND
|
12
|
+
|
13
|
+
you work with this language from the CLI
|
14
|
+
|
15
|
+
THEN
|
16
|
+
|
17
|
+
Wow, there's someone else besides me?
|
18
|
+
|
19
|
+
*ahem*
|
20
|
+
|
21
|
+
THEN Macronconversions may be the library for you!
|
22
|
+
|
23
|
+
h2. Macronconversions lets you convert easily between ASCII and funnt mulitbyte representations
|
24
|
+
|
25
|
+
<pre>
|
26
|
+
$ mconv -M "laud\={a}re"
|
27
|
+
laudāre
|
28
|
+
</pre>
|
29
|
+
|
30
|
+
Coup-de-grâce
|
31
|
+
|
32
|
+
<pre>
|
33
|
+
$mdconv `mconv -M "laud\={a}re" `
|
34
|
+
laud\={a}re
|
35
|
+
</pre>
|
36
|
+
|
37
|
+
HTML codes instead
|
38
|
+
|
39
|
+
<pre>
|
40
|
+
$ mconv -H "laud\={a}re"
|
41
|
+
laudāre
|
42
|
+
</pre>
|
43
|
+
|
44
|
+
h2. Library
|
45
|
+
|
46
|
+
Obviously the bin/ files are just thin wrappers around the core library. Consult the RDoc for programmatic use.
|
47
|
+
|
48
|
+
h2. Install
|
49
|
+
|
50
|
+
gem install macronconversions
|
51
|
+
|
52
|
+
h2. Current Status
|
53
|
+
|
54
|
+
Stable
|
55
|
+
|
56
|
+
h2. Ridiculee-in-chief
|
57
|
+
|
58
|
+
"Steven G. Harms":http://stevengharms.com
|
59
|
+
|
60
|
+
h2. Horrible Typos
|
61
|
+
|
62
|
+
I'm currently relearning QWERTY from 10 years of Dvorak. I should be getting better real soon now.
|
63
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
require "rake/rdoctask"
|
3
|
+
|
4
|
+
# Add the bundler tasks, this comes because I used `bundle gem`.
|
5
|
+
Bundler::GemHelper.install_tasks
|
6
|
+
|
7
|
+
# `gem this` tasks
|
8
|
+
# Generate documentation
|
9
|
+
Rake::RDocTask.new do |rd|
|
10
|
+
rd.rdoc_files.include("lib/**/*.rb")
|
11
|
+
rd.rdoc_dir = "rdoc"
|
12
|
+
end
|
13
|
+
|
14
|
+
#Added to get testing working
|
15
|
+
require 'rake/testtask'
|
16
|
+
Rake::TestTask.new(:test)
|
data/bin/mconv
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$:.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
|
3
|
+
require 'macronconversions/macronconversions'
|
4
|
+
|
5
|
+
require "getopt/std"
|
6
|
+
opt = Getopt::Std.getopts("HMU")
|
7
|
+
|
8
|
+
mode = if opt["H"]
|
9
|
+
:html
|
10
|
+
elsif opt["M"]
|
11
|
+
:mc
|
12
|
+
elsif opt["U"]
|
13
|
+
:utf8
|
14
|
+
end
|
15
|
+
|
16
|
+
puts Text::Latex::Util::Macronconversions.convert(ARGV.join(''), (mode.nil? ? :mc : mode) )
|
data/bin/mdconv
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
module Text
|
3
|
+
module Latex
|
4
|
+
module Util
|
5
|
+
module Macronconversions
|
6
|
+
# Chart used for ASCII LaTeX lookup against the formats
|
7
|
+
CONVERSION_TABLE = {
|
8
|
+
"\\={a}" =>
|
9
|
+
{
|
10
|
+
:mc => "ā",
|
11
|
+
:utf8 => "\\xc4\\x81",
|
12
|
+
:html => "ā"
|
13
|
+
},
|
14
|
+
"\\={e}" =>
|
15
|
+
{
|
16
|
+
:mc => "ē",
|
17
|
+
:utf8 => "\\xc4\\x93",
|
18
|
+
:html => "ē"
|
19
|
+
},
|
20
|
+
"\\={\\i}" =>
|
21
|
+
{
|
22
|
+
:mc => "ī",
|
23
|
+
:utf8 => "\\xc4\\xab",
|
24
|
+
:html => "ī"
|
25
|
+
},
|
26
|
+
"\\={o}" =>
|
27
|
+
{
|
28
|
+
:mc => "ō",
|
29
|
+
:utf8 => "\\xc5\\x8d",
|
30
|
+
:html => "ō"
|
31
|
+
},
|
32
|
+
"\\={u}" =>
|
33
|
+
{
|
34
|
+
:mc => "ū",
|
35
|
+
:utf8 => "\\xc5\\xab",
|
36
|
+
:html => "ū"
|
37
|
+
},
|
38
|
+
"\\={A}" =>
|
39
|
+
{
|
40
|
+
:mc => "Ā",
|
41
|
+
:utf8 => "\\xc4\\x80",
|
42
|
+
:html => "Ā"
|
43
|
+
},
|
44
|
+
"\\={E}" =>
|
45
|
+
{
|
46
|
+
:mc => "Ē",
|
47
|
+
:utf8 => "\\xc4\\x92",
|
48
|
+
:html => "Ē"
|
49
|
+
},
|
50
|
+
"\\={\\I}" =>
|
51
|
+
{
|
52
|
+
:mc => "Ī",
|
53
|
+
:utf8 => "\\xc4\\xaa",
|
54
|
+
:html => "Ī"
|
55
|
+
},
|
56
|
+
"\\={O}" =>
|
57
|
+
{
|
58
|
+
:mc => "Ō",
|
59
|
+
:utf8 => "\\xc5\\x8c",
|
60
|
+
:html => "Ō"
|
61
|
+
},
|
62
|
+
"\\={U}" =>
|
63
|
+
{
|
64
|
+
:mc => "Ū",
|
65
|
+
:utf8 => "\\xc5\\xaa",
|
66
|
+
:html => "Ū"
|
67
|
+
}
|
68
|
+
}
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,243 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'macronconversions/conversion_structure'
|
3
|
+
|
4
|
+
=begin rdoc
|
5
|
+
== Synopsis
|
6
|
+
|
7
|
+
Text::Latex::Util::MacronConversions: module providing class methods to convert
|
8
|
+
macron (dis-)enabled strings into the opposite.
|
9
|
+
|
10
|
+
== Usage
|
11
|
+
|
12
|
+
require 'require macronconversions'
|
13
|
+
|
14
|
+
== Description
|
15
|
+
|
16
|
+
The class provides two class methods: +convert+ and +deconvert+ In
|
17
|
+
the event that you need to transform LaTeX-style markep into entities of
|
18
|
+
some sort, use the former class. In the event that you need to down-sample
|
19
|
+
macron-characters into LaTeX-style, use the latter.
|
20
|
+
|
21
|
+
== Example Code
|
22
|
+
|
23
|
+
# Basic conversion and advanced conversion
|
24
|
+
puts Text::Latex::Util::Macronconversions.convert("mon\\={e}re", 'mc') #=> monēre
|
25
|
+
|
26
|
+
# Complex de-conversion
|
27
|
+
puts MacronConversions::MacronDeConverter.new("laudāre") #=> "laud\={a}re"
|
28
|
+
|
29
|
+
# Coup de grace
|
30
|
+
puts MacronConversions::MacronDeConverter.new(
|
31
|
+
MacronConversions::MacronConverter.new('to bring up, educate: \={e}duc\={o}, \={e}duc\={a}re, \={e}duc\={a}v\={\i}, \={e}ducatus; education, educator, educable', 'mc').to_s)
|
32
|
+
|
33
|
+
== Author
|
34
|
+
|
35
|
+
Steven G. Harms, http://www.stevengharms.com
|
36
|
+
|
37
|
+
=end
|
38
|
+
# Module for manipulations on text documents
|
39
|
+
module Text
|
40
|
+
# Module for working with LaTeX-formatted text
|
41
|
+
module Latex
|
42
|
+
# Utilities for generating LaTeX-formatted text
|
43
|
+
module Util
|
44
|
+
# == Synopsis
|
45
|
+
#
|
46
|
+
# Text::Latex::Util::MacronConversions: module providing class methods to convert
|
47
|
+
# macron (dis-)enabled strings into the opposite.
|
48
|
+
#
|
49
|
+
# == Usage
|
50
|
+
#
|
51
|
+
# require 'require macronconversions'
|
52
|
+
#
|
53
|
+
# == Description
|
54
|
+
#
|
55
|
+
# The class provides two class methods: +convert+ and +deconvert+ In
|
56
|
+
# the event that you need to transform LaTeX-style markep into entities of
|
57
|
+
# some sort, use the former class. In the event that you need to down-sample
|
58
|
+
# macron-characters into LaTeX-style, use the latter.
|
59
|
+
#
|
60
|
+
# == Example Code
|
61
|
+
#
|
62
|
+
# # Basic conversion and advanced conversion
|
63
|
+
# puts Text::Latex::Util::Macronconversions.convert("mon\\={e}re", 'mc') #=> monēre
|
64
|
+
#
|
65
|
+
# # Complex de-conversion
|
66
|
+
# puts MacronConversions::MacronDeConverter.new("laudāre") #=> "laud\={a}re"
|
67
|
+
#
|
68
|
+
# # Coup de grace
|
69
|
+
# puts MacronConversions::MacronDeConverter.new(
|
70
|
+
# MacronConversions::MacronConverter.new('to bring up, educate: \={e}duc\={o}, \={e}duc\={a}re, \={e}duc\={a}v\={\i}, \={e}ducatus; education, educator, educable', 'mc').to_s)
|
71
|
+
#
|
72
|
+
# == Author
|
73
|
+
#
|
74
|
+
# Steven G. Harms, http://www.stevengharms.com
|
75
|
+
module Macronconversions
|
76
|
+
|
77
|
+
class << self
|
78
|
+
# Deconverts a string that has macron-bearing vowels from the format to the ASCII representation used by LaTeX.
|
79
|
+
#
|
80
|
+
# The method is recursive and as such the 2 optional arguments are defined after the initial call.
|
81
|
+
# Params:
|
82
|
+
# +word+ :: (a string to convert
|
83
|
+
# +from_format+ Never Directly Called: Which format of macron should be expected? See Macronconversions documentation
|
84
|
+
# +conversion_chart+ Never Directly Called: Which lookup table should the characters of word be tested against?
|
85
|
+
def deconvert(word, *arg)
|
86
|
+
return "" if word.empty?
|
87
|
+
|
88
|
+
# If the target has already been set, then we should respect that
|
89
|
+
# fact. This makes recurses over longer strings faster
|
90
|
+
#
|
91
|
+
# If it has not already been set, we derive the type heuristically
|
92
|
+
mode = ((! arg[0].nil?) or
|
93
|
+
(! arg[0]==:skip)) ?
|
94
|
+
arg[0]
|
95
|
+
:
|
96
|
+
if word =~ /\&\#/
|
97
|
+
:html
|
98
|
+
elsif word =~ /[āēīōūĀĒĪŌŪ]/
|
99
|
+
:mc
|
100
|
+
elsif word =~ /\\x/
|
101
|
+
:utf8
|
102
|
+
end
|
103
|
+
|
104
|
+
# If the mode has not been set, we should have a plain old letter
|
105
|
+
# otherwise you want to die since we won't be able to build a
|
106
|
+
# chart for a non-existant format.
|
107
|
+
raise ArgumentError if (mode.nil? and word.slice(0) !~ /^[a-z]/)
|
108
|
+
|
109
|
+
# Mutate the chart, but use the one given, if it was given (i.e.
|
110
|
+
# we are in a recursive call)
|
111
|
+
mutated_chart = {}
|
112
|
+
if arg[1].nil?
|
113
|
+
Text::Latex::Util::Macronconversions::CONVERSION_TABLE.each do |k,v|
|
114
|
+
mutated_chart[v[mode]]=k
|
115
|
+
end
|
116
|
+
else
|
117
|
+
mutated_chart = arg[1]
|
118
|
+
end
|
119
|
+
|
120
|
+
# String to which the recurse's outputs will be appended
|
121
|
+
#
|
122
|
+
# All LaTeX Macron codes begin with an '=' token. Scan for that
|
123
|
+
# using a RegEx. The value is set to firstSlash.
|
124
|
+
#
|
125
|
+
# This is just ugly, but is nothing to be afraid of.
|
126
|
+
#
|
127
|
+
# You look to see if the character is an ampersand. That means
|
128
|
+
# you've got HTML entities. Take the ending token of the entity
|
129
|
+
# and hold it, and then recursively send the tail to this method
|
130
|
+
# to be processed again. A cheap serialization is established by
|
131
|
+
# sending the logic-requiring results on to recursive invocations
|
132
|
+
#
|
133
|
+
# The same logic applies to the second if state, we're dealing
|
134
|
+
# with the representation of utf-8 characters
|
135
|
+
#
|
136
|
+
# The third case varies slightly, we have a multibyte *single*
|
137
|
+
# character. This character can be slice!d off and the tail
|
138
|
+
# recursively sent onward.
|
139
|
+
#
|
140
|
+
# Lastly, if you have a plain character, follow the same model as
|
141
|
+
# the preceeding.
|
142
|
+
|
143
|
+
return_string =
|
144
|
+
if word.slice(0) == "&"
|
145
|
+
word =~ /(&.*?;)(.*)/
|
146
|
+
_deconvert_char($1, mutated_chart) +
|
147
|
+
deconvert(word[($1.length)..-1], mode.to_sym, mutated_chart)
|
148
|
+
elsif word.slice(0) == "\\"
|
149
|
+
word =~ /(^\\x..\\x..)(.*)/
|
150
|
+
_deconvert_char($1, mutated_chart) +
|
151
|
+
deconvert(word[($1.length)..-1], mode.to_sym, mutated_chart)
|
152
|
+
elsif word.slice(0) =~ /[āēīōūĀĒĪŌŪ]/
|
153
|
+
_deconvert_char(word.slice!(0), mutated_chart) +
|
154
|
+
deconvert(word, mode.to_sym, mutated_chart)
|
155
|
+
else
|
156
|
+
# This is kinda ugly. Particularly arg1.
|
157
|
+
word.slice!(0) + deconvert(word, :skip, mutated_chart)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Allow a block to be given to mutate the string after having been fabricated
|
161
|
+
if block_given?
|
162
|
+
return_string = (yield return_string )
|
163
|
+
end
|
164
|
+
|
165
|
+
# debugger if word == ""
|
166
|
+
return_string
|
167
|
+
end
|
168
|
+
|
169
|
+
# Macronconversions::convert is the routine that scans a token for LaTeX macron
|
170
|
+
# codes, recursively. Upon the indetification of a macron-ized character, it
|
171
|
+
# passes that character to the "private" method MacronConverter#_convert_char
|
172
|
+
#
|
173
|
+
# Params:
|
174
|
+
# +word+ :: A string that uses the LaTeX standard for macron denotation
|
175
|
+
# +mode+ :: How the resultant string should be formatted (mc|utf8|html)
|
176
|
+
#
|
177
|
+
# The resultant string may be operated upon by passing an optional block.
|
178
|
+
def convert(word, mode=:mc, &b)
|
179
|
+
# Ends the recurse
|
180
|
+
return "" if word.empty?
|
181
|
+
|
182
|
+
# String to which the recurse's outputs will be appended
|
183
|
+
#
|
184
|
+
# All LaTeX Macron codes begin with an '\\={' token and end with
|
185
|
+
# '}' Scan for that using a RegEx thus creating a match and rest.
|
186
|
+
# The match is passed to _convert_char and the rest is recursed to
|
187
|
+
# this method.
|
188
|
+
return_string =
|
189
|
+
if word.slice(0) == "\\"
|
190
|
+
word =~ /(\\.*?})(.*)/
|
191
|
+
_convert_char($1,mode.to_sym) +
|
192
|
+
convert(word[($1.length)..-1], mode.to_sym)
|
193
|
+
else
|
194
|
+
word.slice(0) + convert(word[1..-1],mode)
|
195
|
+
end
|
196
|
+
|
197
|
+
# Allow a block to be given to mutate the string after having been fabricated
|
198
|
+
if block_given?
|
199
|
+
return_string = (yield return_string )
|
200
|
+
end
|
201
|
+
|
202
|
+
return_string
|
203
|
+
end
|
204
|
+
|
205
|
+
#####################################
|
206
|
+
# "Private" method
|
207
|
+
# (still available for unit testing, but you probably shouldn't mess with it)
|
208
|
+
#
|
209
|
+
# Does the lookup to convert macron bearing character to LaTeX ASCII formatting
|
210
|
+
#####################################
|
211
|
+
def _deconvert_char(c, chart)
|
212
|
+
begin
|
213
|
+
r = chart[c]
|
214
|
+
raise if r.nil?
|
215
|
+
rescue
|
216
|
+
puts "_deconvert_char failed to find a match for [#{c}]"
|
217
|
+
pp chart
|
218
|
+
raise
|
219
|
+
end
|
220
|
+
r
|
221
|
+
end
|
222
|
+
|
223
|
+
#####################################
|
224
|
+
# "Private" method
|
225
|
+
# (still available for unit testing, but you probably shouldn't mess with it)
|
226
|
+
#
|
227
|
+
# Does the lookup to convert LaTeX ASCII to macron bearing character formatting
|
228
|
+
#####################################
|
229
|
+
def _convert_char(c,mode)
|
230
|
+
begin
|
231
|
+
r = Text::Latex::Util::Macronconversions::CONVERSION_TABLE[c][mode]
|
232
|
+
raise if r.nil?
|
233
|
+
rescue
|
234
|
+
puts "_convert_char failed to find a match for [#{c}]"
|
235
|
+
raise
|
236
|
+
end
|
237
|
+
r
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
=begin rdoc
|
2
|
+
== Synopsis
|
3
|
+
|
4
|
+
Text::Latex::Util::MacronConversions: module providing class methods to convert
|
5
|
+
macron (dis-)enabled strings into the opposite.
|
6
|
+
|
7
|
+
== Usage
|
8
|
+
|
9
|
+
require 'require macronconversions'
|
10
|
+
|
11
|
+
== Description
|
12
|
+
|
13
|
+
The class provides two class methods: +convert+ and +deconvert+ In
|
14
|
+
the event that you need to transform LaTeX-style markep into entities of
|
15
|
+
some sort, use the former class. In the event that you need to down-sample
|
16
|
+
macron-characters into LaTeX-style, use the latter.
|
17
|
+
|
18
|
+
== Example Code
|
19
|
+
|
20
|
+
# Basic conversion and advanced conversion
|
21
|
+
puts Text::Latex::Util::Macronconversions.convert("mon\\={e}re", 'mc') #=> monēre
|
22
|
+
|
23
|
+
# Complex de-conversion
|
24
|
+
puts MacronConversions::MacronDeConverter.new("laudāre") #=> "laud\={a}re"
|
25
|
+
|
26
|
+
# Coup de grace
|
27
|
+
puts MacronConversions::MacronDeConverter.new(
|
28
|
+
MacronConversions::MacronConverter.new('to bring up, educate: \={e}duc\={o}, \={e}duc\={a}re, \={e}duc\={a}v\={\i}, \={e}ducatus; education, educator, educable', 'mc').to_s)
|
29
|
+
|
30
|
+
== Author
|
31
|
+
|
32
|
+
Steven G. Harms, http://www.stevengharms.com
|
33
|
+
|
34
|
+
=end
|
35
|
+
$:.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
|
36
|
+
require 'macronconversions/macronconversions'
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "macronconversions/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "macronconversions"
|
7
|
+
s.version = Text::Latex::Util::Macronconversions::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Steven G. Harms"]
|
10
|
+
s.email = ["macron_conversions@sgharms.oib.com"]
|
11
|
+
s.homepage = "http://rubygems.org/gems/macronconversions"
|
12
|
+
s.summary = %q{Convert strings with Latex-style macron notation to
|
13
|
+
strings with embedded high-byte characters or UTF-8 escape codes.}
|
14
|
+
s.description = %q{Convert strings like 'laud\={a}re' to 'laudāre' using
|
15
|
+
ASCII-compatible escape codes.}
|
16
|
+
|
17
|
+
s.rubyforge_project = "macronconversions"
|
18
|
+
|
19
|
+
s.files = `git ls-files`.split("\n")
|
20
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
21
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
22
|
+
s.require_paths = ["lib"]
|
23
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "test/unit"
|
3
|
+
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
|
5
|
+
require 'macronconversions/macronconversions'
|
6
|
+
|
7
|
+
class TestMacronConversion < Test::Unit::TestCase
|
8
|
+
def test_base_with_block
|
9
|
+
c=Text::Latex::Util::Macronconversions.convert("laud\\={a}re") do |s|
|
10
|
+
s.split(//).join('*')
|
11
|
+
end
|
12
|
+
assert_equal("l*a*u*d*ā*r*e", c)
|
13
|
+
c=Text::Latex::Util::Macronconversions.convert("laud\\={a}re") do |s|
|
14
|
+
s.split(//).length
|
15
|
+
end
|
16
|
+
assert_equal(7, c)
|
17
|
+
c=Text::Latex::Util::Macronconversions.convert("laud\\={a}re") do |s|
|
18
|
+
s.split(//)[4]
|
19
|
+
end
|
20
|
+
assert_equal("ā", c)
|
21
|
+
end
|
22
|
+
def test_conversions
|
23
|
+
# Base case
|
24
|
+
assert_equal "vanilla", Text::Latex::Util::Macronconversions.convert("vanilla")
|
25
|
+
assert_equal "laudāre", Text::Latex::Util::Macronconversions.convert("laud\\={a}re")
|
26
|
+
assert_equal "monēre", Text::Latex::Util::Macronconversions.convert("mon\\={e}re", 'mc')
|
27
|
+
assert_equal "to bring up, educate: ēducō, ēducāre, ēducāvī, ēducatus; education, educator, educable",
|
28
|
+
Text::Latex::Util::Macronconversions.convert('to bring up, educate: \={e}duc\={o}, \={e}duc\={a}re, \={e}duc\={a}v\={\i}, \={e}ducatus; education, educator, educable', 'mc')
|
29
|
+
assert_equal "laudāre", Text::Latex::Util::Macronconversions.convert("laud\\={a}re" ,:html)
|
30
|
+
assert_equal "laud\\xc4\\x81re", Text::Latex::Util::Macronconversions.convert("laud\\={a}re" ,:utf8)
|
31
|
+
end
|
32
|
+
def test_character_conversion_mc
|
33
|
+
assert_equal "ā", Text::Latex::Util::Macronconversions._convert_char("\\={a}" ,:mc)
|
34
|
+
assert_equal 'ē', Text::Latex::Util::Macronconversions._convert_char("\\={e}" ,:mc)
|
35
|
+
assert_equal "ī", Text::Latex::Util::Macronconversions._convert_char("\\={\\i}",:mc)
|
36
|
+
assert_equal "ō", Text::Latex::Util::Macronconversions._convert_char("\\={o}" ,:mc)
|
37
|
+
assert_equal "ū", Text::Latex::Util::Macronconversions._convert_char("\\={u}" ,:mc)
|
38
|
+
assert_equal "Ā", Text::Latex::Util::Macronconversions._convert_char("\\={A}" ,:mc)
|
39
|
+
assert_equal "Ē", Text::Latex::Util::Macronconversions._convert_char("\\={E}" ,:mc)
|
40
|
+
assert_equal "Ī", Text::Latex::Util::Macronconversions._convert_char("\\={\\I}",:mc)
|
41
|
+
assert_equal "Ō", Text::Latex::Util::Macronconversions._convert_char("\\={O}" ,:mc)
|
42
|
+
assert_equal "Ū", Text::Latex::Util::Macronconversions._convert_char("\\={U}" ,:mc)
|
43
|
+
end
|
44
|
+
def test_character_conversion_html
|
45
|
+
assert_equal "ā", Text::Latex::Util::Macronconversions._convert_char("\\={a}" ,:html)
|
46
|
+
assert_equal "ē", Text::Latex::Util::Macronconversions._convert_char("\\={e}" ,:html)
|
47
|
+
assert_equal "ī", Text::Latex::Util::Macronconversions._convert_char("\\={\\i}",:html)
|
48
|
+
assert_equal "ō", Text::Latex::Util::Macronconversions._convert_char("\\={o}" ,:html)
|
49
|
+
assert_equal "ū", Text::Latex::Util::Macronconversions._convert_char("\\={u}" ,:html)
|
50
|
+
assert_equal "Ā", Text::Latex::Util::Macronconversions._convert_char("\\={A}" ,:html)
|
51
|
+
assert_equal "Ē", Text::Latex::Util::Macronconversions._convert_char("\\={E}" ,:html)
|
52
|
+
assert_equal "Ī", Text::Latex::Util::Macronconversions._convert_char("\\={\\I}",:html)
|
53
|
+
assert_equal "Ō", Text::Latex::Util::Macronconversions._convert_char("\\={O}" ,:html)
|
54
|
+
assert_equal "Ū", Text::Latex::Util::Macronconversions._convert_char("\\={U}" ,:html)
|
55
|
+
end
|
56
|
+
def test_character_conversion_utf8
|
57
|
+
assert_equal "\\xc4\\x81", Text::Latex::Util::Macronconversions._convert_char("\\={a}" ,:utf8)
|
58
|
+
assert_equal "\\xc4\\x93", Text::Latex::Util::Macronconversions._convert_char("\\={e}" ,:utf8)
|
59
|
+
assert_equal "\\xc4\\xab", Text::Latex::Util::Macronconversions._convert_char("\\={\\i}",:utf8)
|
60
|
+
assert_equal "\\xc5\\x8d", Text::Latex::Util::Macronconversions._convert_char("\\={o}" ,:utf8)
|
61
|
+
assert_equal "\\xc5\\xab", Text::Latex::Util::Macronconversions._convert_char("\\={u}" ,:utf8)
|
62
|
+
assert_equal "\\xc4\\x80", Text::Latex::Util::Macronconversions._convert_char("\\={A}" ,:utf8)
|
63
|
+
assert_equal "\\xc4\\x92", Text::Latex::Util::Macronconversions._convert_char("\\={E}" ,:utf8)
|
64
|
+
assert_equal "\\xc4\\xaa", Text::Latex::Util::Macronconversions._convert_char("\\={\\I}",:utf8)
|
65
|
+
assert_equal "\\xc5\\x8c", Text::Latex::Util::Macronconversions._convert_char("\\={O}" ,:utf8)
|
66
|
+
assert_equal "\\xc5\\xaa", Text::Latex::Util::Macronconversions._convert_char("\\={U}" ,:utf8)
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "test/unit"
|
3
|
+
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
|
5
|
+
require 'macronconversions/macronconversions'
|
6
|
+
|
7
|
+
class TestMacronDeconversion < Test::Unit::TestCase
|
8
|
+
def test_basic_mc_deconversion
|
9
|
+
assert_equal "vanilla", Text::Latex::Util::Macronconversions.deconvert("vanilla")
|
10
|
+
assert_equal "laud\\={a}re", Text::Latex::Util::Macronconversions.deconvert("laudāre")
|
11
|
+
assert_equal "mon\\={e}re", Text::Latex::Util::Macronconversions.deconvert("monēre")
|
12
|
+
end
|
13
|
+
def test_basic_utf8_deconversion
|
14
|
+
assert_equal "laud\\={a}re", Text::Latex::Util::Macronconversions.deconvert("laud\\xc4\\x81re")
|
15
|
+
end
|
16
|
+
def test_basic_html_deconversion
|
17
|
+
assert_equal "laud\\={a}re", Text::Latex::Util::Macronconversions.deconvert("laudāre")
|
18
|
+
end
|
19
|
+
end
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: macronconversions
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Steven G. Harms
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-12-06 00:00:00 -08:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: "Convert strings like 'laud\\={a}re' to 'laud\xC4\x81re' using\n ASCII-compatible escape codes."
|
22
|
+
email:
|
23
|
+
- macron_conversions@sgharms.oib.com
|
24
|
+
executables:
|
25
|
+
- mconv
|
26
|
+
- mdconv
|
27
|
+
extensions: []
|
28
|
+
|
29
|
+
extra_rdoc_files: []
|
30
|
+
|
31
|
+
files:
|
32
|
+
- .gitignore
|
33
|
+
- Gemfile
|
34
|
+
- LICENSE.textile
|
35
|
+
- README.textile
|
36
|
+
- Rakefile
|
37
|
+
- bin/mconv
|
38
|
+
- bin/mdconv
|
39
|
+
- lib/macronconversions.rb
|
40
|
+
- lib/macronconversions/conversion_structure.rb
|
41
|
+
- lib/macronconversions/macronconversions.rb
|
42
|
+
- lib/macronconversions/version.rb
|
43
|
+
- macronconversions.gemspec
|
44
|
+
- test/testMacronConversion.rb
|
45
|
+
- test/testMacronDeconversion.rb
|
46
|
+
has_rdoc: true
|
47
|
+
homepage: http://rubygems.org/gems/macronconversions
|
48
|
+
licenses: []
|
49
|
+
|
50
|
+
post_install_message:
|
51
|
+
rdoc_options: []
|
52
|
+
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
segments:
|
61
|
+
- 0
|
62
|
+
version: "0"
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
none: false
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
segments:
|
69
|
+
- 0
|
70
|
+
version: "0"
|
71
|
+
requirements: []
|
72
|
+
|
73
|
+
rubyforge_project: macronconversions
|
74
|
+
rubygems_version: 1.3.7
|
75
|
+
signing_key:
|
76
|
+
specification_version: 3
|
77
|
+
summary: Convert strings with Latex-style macron notation to strings with embedded high-byte characters or UTF-8 escape codes.
|
78
|
+
test_files:
|
79
|
+
- test/testMacronConversion.rb
|
80
|
+
- test/testMacronDeconversion.rb
|