rgreek 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +18 -0
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +92 -0
- data/Rakefile +6 -0
- data/lib/rGreek.rb +5 -0
- data/lib/rgreek/lib/monkey_patches.rb +5 -0
- data/lib/rgreek/lib/morph_code.rb +188 -0
- data/lib/rgreek/lib/transcoder.rb +660 -0
- data/lib/rgreek/version.rb +3 -0
- data/lib/ui/rgreek +23 -0
- data/rGreek.gemspec +20 -0
- data/spec/monkey_patches_spec.rb +17 -0
- data/spec/spec_helper.rb +5 -0
- data/spec/transcoder_spec.rb +180 -0
- metadata +80 -0
data/lib/ui/rgreek
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH.push File.expand_path("../../", __FILE__)
|
3
|
+
|
4
|
+
require 'rgreek'
|
5
|
+
include RGreek
|
6
|
+
|
7
|
+
def respond(msg)
|
8
|
+
if ARGV.length < 2
|
9
|
+
puts msg
|
10
|
+
exit 1
|
11
|
+
end
|
12
|
+
ARGV.shift
|
13
|
+
puts yield ARGV
|
14
|
+
exit 0
|
15
|
+
end
|
16
|
+
|
17
|
+
case ARGV[0]
|
18
|
+
when "convert"
|
19
|
+
respond("please input the unicode text you would like transcoded") do |args| Transcoder.convert(args.join) end
|
20
|
+
else
|
21
|
+
puts "choose a command: convert or parse"
|
22
|
+
exit 1
|
23
|
+
end
|
data/rGreek.gemspec
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'rgreek/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "rgreek"
|
8
|
+
gem.version = RGreek::VERSION
|
9
|
+
gem.authors = ["Paul Saieg"]
|
10
|
+
gem.email = ["classicist@gmail.com"]
|
11
|
+
gem.description = %q{Light, intuituive ruby tools for working with classical Greek}
|
12
|
+
gem.summary = %q{Light, intuituive ruby tools for working with classical Greek}
|
13
|
+
|
14
|
+
gem.files = `git ls-files`.split($/)
|
15
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
16
|
+
gem.test_files = gem.files.grep(%r{^(spec|features)/})
|
17
|
+
gem.require_paths = ["lib"]
|
18
|
+
|
19
|
+
gem.add_development_dependency "rspec"
|
20
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe "Monkey Patches" do
|
6
|
+
it "should add a to_unicode_points method to String" do
|
7
|
+
"hello unicode".should respond_to(:to_unicode_points)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should give the unicode code points for each character in a string" do
|
11
|
+
"ab".to_unicode_points.should == ["0061", "0062"]
|
12
|
+
tonos = "ί"
|
13
|
+
tonos.to_unicode_points.should == ["03af"]
|
14
|
+
oxia = "ί"
|
15
|
+
oxia.to_unicode_points.should == ["1f77"]
|
16
|
+
end
|
17
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,180 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe "The Betacode Tokenizer" do
|
6
|
+
it "should give the name of a betacode token for the regular alphabet" do
|
7
|
+
Transcoder.send(:tokenize, "a").should == ["alpha"]
|
8
|
+
Transcoder.send(:tokenize, "w").should == ["omega"]
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should give the name of three betacode tokens in the regular alphabet" do
|
12
|
+
Transcoder.send(:tokenize, "abg").should == ["alpha", "beta", "gamma"]
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should give the name of a betacode token for capitals" do
|
16
|
+
Transcoder.send(:tokenize, "*a").should == ["Alpha"]
|
17
|
+
Transcoder.send(:tokenize, "e*ab*g").should == ["epsilon", "Alpha", "beta", "Gamma"]
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should give the name of a betacode token for accents" do
|
21
|
+
Transcoder.send(:tokenize, "/").should == ["oxy"]
|
22
|
+
Transcoder.send(:tokenize, "*kai/").should == ["Kappa", "alpha", "iota", "oxy"]
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should give the name of a betacode token for crazy sigma" do
|
26
|
+
Transcoder.send(:tokenize, "SS2S3").should == ["sigmaMedial","sigmaFinal", "sigmaLunate"]
|
27
|
+
Transcoder.send(:tokenize, "S3*S3").should == ["sigmaLunate", "SigmaLunate"]
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should give the name of a betacode token for koppa, sampi, and stigma" do
|
31
|
+
Transcoder.send(:tokenize, "#3*#3*#5#5#2*#2").should == ["koppa","Koppa", "Sampi", "sampi", "stigma", "Stigma"]
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should give the name of a betacode token for punctuation" do
|
35
|
+
Transcoder.send(:tokenize, "#\:;\'").should == ["prime","raisedDot", "semicolon", "elisionMark"]
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should give the name of a betacode token for Brackets and the like" do
|
39
|
+
Transcoder.send(:tokenize, "[][1]1[2]2[3]3[4]4").should == ["openingSquareBracket","closingSquareBracket", "openingParentheses", "closingParentheses", "openingAngleBracket", "closingAngleBracket", "openingCurlyBracket", "closingCurlyBracket", "openingDoubleSquareBracket", "closingDoubleSquareBracket"]
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should give the name of a betacode token for critical marks" do
|
43
|
+
Transcoder.send(:tokenize, "%%2%5").should == ["crux", "asterisk", "longVerticalBar"]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe "Betacode to Unicode C Conversion" do
|
48
|
+
|
49
|
+
it "should convert betacode letters to unicode with combined greek accents over vowels with breathing marks, spaces, and wierd punctuation" do
|
50
|
+
Transcoder.convert("*s").should == "Σ"
|
51
|
+
Transcoder.convert("pw=s, a.").should == "πῶς, α."
|
52
|
+
Transcoder.convert("pw=s ").should == "πῶς "
|
53
|
+
Transcoder.convert("pw=s").should == "πῶς"
|
54
|
+
Transcoder.convert("[4*h)/xw]4\:").should == "⟦Ἤχω⟧·"
|
55
|
+
Transcoder.convert("*h)/xw au)tw=|").should == "Ἤχω αὐτῷ"
|
56
|
+
Transcoder.convert("gnw=qi %5 seau/ton%").should == "γνῶθι | σεαύτον†"
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should convert betacode letters to unicode without greek accents" do
|
60
|
+
Transcoder.convert("kai").should == "και"
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should convert betacode letters to unicode with combined greek accents over vowels" do
|
64
|
+
Transcoder.convert("le/gw").should == "λέγω"
|
65
|
+
Transcoder.convert("kai/").should == "καί"
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should convert unicode to betacode" do
|
69
|
+
Transcoder.convert("Σ").should == "*s"
|
70
|
+
Transcoder.convert("πῶς ").should == "pw=s "
|
71
|
+
Transcoder.convert("πῶς").should == "pw=s"
|
72
|
+
Transcoder.convert("⟦Ἤχω⟧·").should == "[4*h)/xw]4\:"
|
73
|
+
Transcoder.convert("Ἤχω αὐτῷ").should == "*h)/xw au)tw=|"
|
74
|
+
Transcoder.convert("γνῶθι | σεαύτον†").should == "gnw=qi %5 seau/ton%"
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should change all known betacode tokens to unicode" do
|
78
|
+
unicodes = Transcoder.send(:convert_to_unicode, (Transcoder::BETA_CODES.values))
|
79
|
+
unicodes.length.should > 0
|
80
|
+
unicodes.split("").each do |code|
|
81
|
+
Transcoder::REVERSE_UNICODES[code].should_not == nil
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should reverse the betacode and unicode transcoding hashes without loss" do
|
86
|
+
Transcoder::BETA_CODES.keys.should == Transcoder::REVERSE_BETA_CODES.values
|
87
|
+
Transcoder::UNICODES.keys.should == Transcoder::REVERSE_UNICODES.values
|
88
|
+
|
89
|
+
Transcoder::BETA_CODES.values.should == Transcoder::REVERSE_BETA_CODES.keys
|
90
|
+
Transcoder::UNICODES.values.should == Transcoder::REVERSE_UNICODES.keys
|
91
|
+
end
|
92
|
+
|
93
|
+
it "should change roundtrip betacode -> unicode -> betacode for all known betacodes except final sigma" do
|
94
|
+
all_known_betacode_chars = Transcoder::BETA_CODES.keys.join(",")
|
95
|
+
unicodes = Transcoder.convert(all_known_betacode_chars)
|
96
|
+
result_betacode = Transcoder.convert(unicodes)
|
97
|
+
|
98
|
+
#Final Sigma ("s2") appears to be lost, but is not because we test for it by position so that we can return regular "s" in our
|
99
|
+
#generated betacode than the anoying-to-read "s2"
|
100
|
+
(all_known_betacode_chars.split(",") - result_betacode.split(",")).sort.should == ["s2"]
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should change roundtrip unicode -> betacode -> unicode for all known betacodes except final sigma" do
|
104
|
+
all_known_unicode_chars = Transcoder::UNICODES.values.join
|
105
|
+
betacodes = Transcoder.convert(all_known_unicode_chars)
|
106
|
+
result_unicode = Transcoder.convert(betacodes)
|
107
|
+
(all_known_unicode_chars.split("") - result_unicode.split("")).map do |unicode|
|
108
|
+
Transcoder.name_of_unicode_char(unicode)
|
109
|
+
end.should == ["sigmaFinal"] #sigmaFinal should be missing bc we never print "s2" (reciprocal with above)
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should transcode sigma and final sigma correctly based on position or value" do
|
113
|
+
Transcoder.convert("ς").should_not == "s2" #never output s2
|
114
|
+
Transcoder.convert("ς").should == "s"
|
115
|
+
Transcoder.convert("σα").should == "sa"
|
116
|
+
|
117
|
+
Transcoder.convert("s2").should == "ς"
|
118
|
+
Transcoder.convert("s").should == "ς"
|
119
|
+
Transcoder.convert("sa").should == "σα"
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should detect whether the input is beta or unicode" do
|
123
|
+
beta, uni = "kai/", "καί"
|
124
|
+
Transcoder.is_betacode?(beta).should == true
|
125
|
+
Transcoder.is_betacode?(uni).should == false
|
126
|
+
|
127
|
+
Transcoder.is_unicode?(uni).should == true
|
128
|
+
Transcoder.is_unicode?(beta).should == false
|
129
|
+
|
130
|
+
Transcoder.is_betacode?(Transcoder::BETA_CODES.keys.join).should == true
|
131
|
+
Transcoder.is_betacode?(Transcoder::UNICODES.values.join).should == false
|
132
|
+
|
133
|
+
Transcoder.is_unicode?(Transcoder::UNICODES.values.join).should == true
|
134
|
+
Transcoder.is_unicode?(Transcoder::BETA_CODES.keys.join).should == false
|
135
|
+
end
|
136
|
+
|
137
|
+
it "should detect accents" do
|
138
|
+
Transcoder.has_accents?("moo").should == false
|
139
|
+
Transcoder.has_accents?("le/gw").should == true
|
140
|
+
end
|
141
|
+
|
142
|
+
it "should should automatically transcode beta and unicode" do
|
143
|
+
beta, uni = "kai/s", "καίς"
|
144
|
+
Transcoder.convert(beta).should == uni
|
145
|
+
Transcoder.convert(uni).should == beta
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
describe "Tonos converter" do
|
150
|
+
it "should transcode tonos accents to oxias" do
|
151
|
+
oxia = "ί"
|
152
|
+
tonos = "ί"
|
153
|
+
tonos.should === "\u03af"
|
154
|
+
Transcoder.tonos_to_oxia(tonos).should == oxia
|
155
|
+
end
|
156
|
+
|
157
|
+
it "should convert a word with tonoi to a word with oxiai without screwing up the uninvolved chars" do
|
158
|
+
kaiw_oxia = "καίω"
|
159
|
+
kaiw_tonos = "καίω"
|
160
|
+
kaiw_oxia.should_not == kaiw_tonos
|
161
|
+
Transcoder.tonos_to_oxia(kaiw_tonos).should == kaiw_oxia
|
162
|
+
end
|
163
|
+
|
164
|
+
it "should not hurt words that do not have a tonos in them" do
|
165
|
+
kaiw_oxia = "καίωbaldinadfioadfm2<>\.o4./+-1[}{]"
|
166
|
+
Transcoder.tonos_to_oxia(kaiw_oxia).should == kaiw_oxia
|
167
|
+
end
|
168
|
+
|
169
|
+
it "should transcode omega with tonos to omega with oxia" do
|
170
|
+
tonos_omega = "ώ"
|
171
|
+
oxia_omega = "ώ"
|
172
|
+
Transcoder.tonos_to_oxia(tonos_omega).should == oxia_omega
|
173
|
+
end
|
174
|
+
|
175
|
+
# => only run when database is updated and greek or latin lemmas are added
|
176
|
+
# it "should find all greek lemmas" do
|
177
|
+
# GreekLemma.find(:all).map { |l| l.headword if !Transcoder.is_greek?(l.headword) }.compact.should == []
|
178
|
+
# end
|
179
|
+
#
|
180
|
+
end
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rgreek
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Paul Saieg
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-10-25 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: Light, intuituive ruby tools for working with classical Greek
|
31
|
+
email:
|
32
|
+
- classicist@gmail.com
|
33
|
+
executables: []
|
34
|
+
extensions: []
|
35
|
+
extra_rdoc_files: []
|
36
|
+
files:
|
37
|
+
- .gitignore
|
38
|
+
- .rspec
|
39
|
+
- Gemfile
|
40
|
+
- LICENSE.txt
|
41
|
+
- README.md
|
42
|
+
- Rakefile
|
43
|
+
- lib/rGreek.rb
|
44
|
+
- lib/rgreek/lib/monkey_patches.rb
|
45
|
+
- lib/rgreek/lib/morph_code.rb
|
46
|
+
- lib/rgreek/lib/transcoder.rb
|
47
|
+
- lib/rgreek/version.rb
|
48
|
+
- lib/ui/rgreek
|
49
|
+
- rGreek.gemspec
|
50
|
+
- spec/monkey_patches_spec.rb
|
51
|
+
- spec/spec_helper.rb
|
52
|
+
- spec/transcoder_spec.rb
|
53
|
+
homepage:
|
54
|
+
licenses: []
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options: []
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
none: false
|
61
|
+
requirements:
|
62
|
+
- - ! '>='
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ! '>='
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
requirements: []
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 1.8.24
|
74
|
+
signing_key:
|
75
|
+
specification_version: 3
|
76
|
+
summary: Light, intuituive ruby tools for working with classical Greek
|
77
|
+
test_files:
|
78
|
+
- spec/monkey_patches_spec.rb
|
79
|
+
- spec/spec_helper.rb
|
80
|
+
- spec/transcoder_spec.rb
|