rgreek 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ module RGreek
2
+ VERSION = "0.1.0"
3
+ end
data/lib/ui/rgreek ADDED
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.push File.expand_path("../../", __FILE__)
3
+
4
+ require 'rgreek'
5
+ include RGreek
6
+
7
+ def respond(msg)
8
+ if ARGV.length < 2
9
+ puts msg
10
+ exit 1
11
+ end
12
+ ARGV.shift
13
+ puts yield ARGV
14
+ exit 0
15
+ end
16
+
17
+ case ARGV[0]
18
+ when "convert"
19
+ respond("please input the unicode text you would like transcoded") do |args| Transcoder.convert(args.join) end
20
+ else
21
+ puts "choose a command: convert or parse"
22
+ exit 1
23
+ end
data/rGreek.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'rgreek/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "rgreek"
8
+ gem.version = RGreek::VERSION
9
+ gem.authors = ["Paul Saieg"]
10
+ gem.email = ["classicist@gmail.com"]
11
+ gem.description = %q{Light, intuituive ruby tools for working with classical Greek}
12
+ gem.summary = %q{Light, intuituive ruby tools for working with classical Greek}
13
+
14
+ gem.files = `git ls-files`.split($/)
15
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
+ gem.test_files = gem.files.grep(%r{^(spec|features)/})
17
+ gem.require_paths = ["lib"]
18
+
19
+ gem.add_development_dependency "rspec"
20
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe "Monkey Patches" do
6
+ it "should add a to_unicode_points method to String" do
7
+ "hello unicode".should respond_to(:to_unicode_points)
8
+ end
9
+
10
+ it "should give the unicode code points for each character in a string" do
11
+ "ab".to_unicode_points.should == ["0061", "0062"]
12
+ tonos = "ί"
13
+ tonos.to_unicode_points.should == ["03af"]
14
+ oxia = "ί"
15
+ oxia.to_unicode_points.should == ["1f77"]
16
+ end
17
+ end
@@ -0,0 +1,5 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'rgreek'
4
+ include RGreek
5
+
@@ -0,0 +1,180 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe "The Betacode Tokenizer" do
6
+ it "should give the name of a betacode token for the regular alphabet" do
7
+ Transcoder.send(:tokenize, "a").should == ["alpha"]
8
+ Transcoder.send(:tokenize, "w").should == ["omega"]
9
+ end
10
+
11
+ it "should give the name of three betacode tokens in the regular alphabet" do
12
+ Transcoder.send(:tokenize, "abg").should == ["alpha", "beta", "gamma"]
13
+ end
14
+
15
+ it "should give the name of a betacode token for capitals" do
16
+ Transcoder.send(:tokenize, "*a").should == ["Alpha"]
17
+ Transcoder.send(:tokenize, "e*ab*g").should == ["epsilon", "Alpha", "beta", "Gamma"]
18
+ end
19
+
20
+ it "should give the name of a betacode token for accents" do
21
+ Transcoder.send(:tokenize, "/").should == ["oxy"]
22
+ Transcoder.send(:tokenize, "*kai/").should == ["Kappa", "alpha", "iota", "oxy"]
23
+ end
24
+
25
+ it "should give the name of a betacode token for crazy sigma" do
26
+ Transcoder.send(:tokenize, "SS2S3").should == ["sigmaMedial","sigmaFinal", "sigmaLunate"]
27
+ Transcoder.send(:tokenize, "S3*S3").should == ["sigmaLunate", "SigmaLunate"]
28
+ end
29
+
30
+ it "should give the name of a betacode token for koppa, sampi, and stigma" do
31
+ Transcoder.send(:tokenize, "#3*#3*#5#5#2*#2").should == ["koppa","Koppa", "Sampi", "sampi", "stigma", "Stigma"]
32
+ end
33
+
34
+ it "should give the name of a betacode token for punctuation" do
35
+ Transcoder.send(:tokenize, "#\:;\'").should == ["prime","raisedDot", "semicolon", "elisionMark"]
36
+ end
37
+
38
+ it "should give the name of a betacode token for Brackets and the like" do
39
+ Transcoder.send(:tokenize, "[][1]1[2]2[3]3[4]4").should == ["openingSquareBracket","closingSquareBracket", "openingParentheses", "closingParentheses", "openingAngleBracket", "closingAngleBracket", "openingCurlyBracket", "closingCurlyBracket", "openingDoubleSquareBracket", "closingDoubleSquareBracket"]
40
+ end
41
+
42
+ it "should give the name of a betacode token for critical marks" do
43
+ Transcoder.send(:tokenize, "%%2%5").should == ["crux", "asterisk", "longVerticalBar"]
44
+ end
45
+ end
46
+
47
+ describe "Betacode to Unicode C Conversion" do
48
+
49
+ it "should convert betacode letters to unicode with combined greek accents over vowels with breathing marks, spaces, and wierd punctuation" do
50
+ Transcoder.convert("*s").should == "Σ"
51
+ Transcoder.convert("pw=s, a.").should == "πῶς, α."
52
+ Transcoder.convert("pw=s ").should == "πῶς "
53
+ Transcoder.convert("pw=s").should == "πῶς"
54
+ Transcoder.convert("[4*h)/xw]4\:").should == "⟦Ἤχω⟧·"
55
+ Transcoder.convert("*h)/xw au)tw=|").should == "Ἤχω αὐτῷ"
56
+ Transcoder.convert("gnw=qi %5 seau/ton%").should == "γνῶθι | σεαύτον†"
57
+ end
58
+
59
+ it "should convert betacode letters to unicode without greek accents" do
60
+ Transcoder.convert("kai").should == "και"
61
+ end
62
+
63
+ it "should convert betacode letters to unicode with combined greek accents over vowels" do
64
+ Transcoder.convert("le/gw").should == "λέγω"
65
+ Transcoder.convert("kai/").should == "καί"
66
+ end
67
+
68
+ it "should convert unicode to betacode" do
69
+ Transcoder.convert("Σ").should == "*s"
70
+ Transcoder.convert("πῶς ").should == "pw=s "
71
+ Transcoder.convert("πῶς").should == "pw=s"
72
+ Transcoder.convert("⟦Ἤχω⟧·").should == "[4*h)/xw]4\:"
73
+ Transcoder.convert("Ἤχω αὐτῷ").should == "*h)/xw au)tw=|"
74
+ Transcoder.convert("γνῶθι | σεαύτον†").should == "gnw=qi %5 seau/ton%"
75
+ end
76
+
77
+ it "should change all known betacode tokens to unicode" do
78
+ unicodes = Transcoder.send(:convert_to_unicode, (Transcoder::BETA_CODES.values))
79
+ unicodes.length.should > 0
80
+ unicodes.split("").each do |code|
81
+ Transcoder::REVERSE_UNICODES[code].should_not == nil
82
+ end
83
+ end
84
+
85
+ it "should reverse the betacode and unicode transcoding hashes without loss" do
86
+ Transcoder::BETA_CODES.keys.should == Transcoder::REVERSE_BETA_CODES.values
87
+ Transcoder::UNICODES.keys.should == Transcoder::REVERSE_UNICODES.values
88
+
89
+ Transcoder::BETA_CODES.values.should == Transcoder::REVERSE_BETA_CODES.keys
90
+ Transcoder::UNICODES.values.should == Transcoder::REVERSE_UNICODES.keys
91
+ end
92
+
93
+ it "should change roundtrip betacode -> unicode -> betacode for all known betacodes except final sigma" do
94
+ all_known_betacode_chars = Transcoder::BETA_CODES.keys.join(",")
95
+ unicodes = Transcoder.convert(all_known_betacode_chars)
96
+ result_betacode = Transcoder.convert(unicodes)
97
+
98
+ #Final Sigma ("s2") appears to be lost, but is not because we test for it by position so that we can return regular "s" in our
99
+ #generated betacode than the anoying-to-read "s2"
100
+ (all_known_betacode_chars.split(",") - result_betacode.split(",")).sort.should == ["s2"]
101
+ end
102
+
103
+ it "should change roundtrip unicode -> betacode -> unicode for all known betacodes except final sigma" do
104
+ all_known_unicode_chars = Transcoder::UNICODES.values.join
105
+ betacodes = Transcoder.convert(all_known_unicode_chars)
106
+ result_unicode = Transcoder.convert(betacodes)
107
+ (all_known_unicode_chars.split("") - result_unicode.split("")).map do |unicode|
108
+ Transcoder.name_of_unicode_char(unicode)
109
+ end.should == ["sigmaFinal"] #sigmaFinal should be missing bc we never print "s2" (reciprocal with above)
110
+ end
111
+
112
+ it "should transcode sigma and final sigma correctly based on position or value" do
113
+ Transcoder.convert("ς").should_not == "s2" #never output s2
114
+ Transcoder.convert("ς").should == "s"
115
+ Transcoder.convert("σα").should == "sa"
116
+
117
+ Transcoder.convert("s2").should == "ς"
118
+ Transcoder.convert("s").should == "ς"
119
+ Transcoder.convert("sa").should == "σα"
120
+ end
121
+
122
+ it "should detect whether the input is beta or unicode" do
123
+ beta, uni = "kai/", "καί"
124
+ Transcoder.is_betacode?(beta).should == true
125
+ Transcoder.is_betacode?(uni).should == false
126
+
127
+ Transcoder.is_unicode?(uni).should == true
128
+ Transcoder.is_unicode?(beta).should == false
129
+
130
+ Transcoder.is_betacode?(Transcoder::BETA_CODES.keys.join).should == true
131
+ Transcoder.is_betacode?(Transcoder::UNICODES.values.join).should == false
132
+
133
+ Transcoder.is_unicode?(Transcoder::UNICODES.values.join).should == true
134
+ Transcoder.is_unicode?(Transcoder::BETA_CODES.keys.join).should == false
135
+ end
136
+
137
+ it "should detect accents" do
138
+ Transcoder.has_accents?("moo").should == false
139
+ Transcoder.has_accents?("le/gw").should == true
140
+ end
141
+
142
+ it "should should automatically transcode beta and unicode" do
143
+ beta, uni = "kai/s", "καίς"
144
+ Transcoder.convert(beta).should == uni
145
+ Transcoder.convert(uni).should == beta
146
+ end
147
+ end
148
+
149
+ describe "Tonos converter" do
150
+ it "should transcode tonos accents to oxias" do
151
+ oxia = "ί"
152
+ tonos = "ί"
153
+ tonos.should === "\u03af"
154
+ Transcoder.tonos_to_oxia(tonos).should == oxia
155
+ end
156
+
157
+ it "should convert a word with tonoi to a word with oxiai without screwing up the uninvolved chars" do
158
+ kaiw_oxia = "καίω"
159
+ kaiw_tonos = "καίω"
160
+ kaiw_oxia.should_not == kaiw_tonos
161
+ Transcoder.tonos_to_oxia(kaiw_tonos).should == kaiw_oxia
162
+ end
163
+
164
+ it "should not hurt words that do not have a tonos in them" do
165
+ kaiw_oxia = "καίωbaldinadfioadfm2<>\.o4./+-1[}{]"
166
+ Transcoder.tonos_to_oxia(kaiw_oxia).should == kaiw_oxia
167
+ end
168
+
169
+ it "should transcode omega with tonos to omega with oxia" do
170
+ tonos_omega = "ώ"
171
+ oxia_omega = "ώ"
172
+ Transcoder.tonos_to_oxia(tonos_omega).should == oxia_omega
173
+ end
174
+
175
+ # => only run when database is updated and greek or latin lemmas are added
176
+ # it "should find all greek lemmas" do
177
+ # GreekLemma.find(:all).map { |l| l.headword if !Transcoder.is_greek?(l.headword) }.compact.should == []
178
+ # end
179
+ #
180
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rgreek
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Paul Saieg
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-25 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Light, intuituive ruby tools for working with classical Greek
31
+ email:
32
+ - classicist@gmail.com
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - .gitignore
38
+ - .rspec
39
+ - Gemfile
40
+ - LICENSE.txt
41
+ - README.md
42
+ - Rakefile
43
+ - lib/rGreek.rb
44
+ - lib/rgreek/lib/monkey_patches.rb
45
+ - lib/rgreek/lib/morph_code.rb
46
+ - lib/rgreek/lib/transcoder.rb
47
+ - lib/rgreek/version.rb
48
+ - lib/ui/rgreek
49
+ - rGreek.gemspec
50
+ - spec/monkey_patches_spec.rb
51
+ - spec/spec_helper.rb
52
+ - spec/transcoder_spec.rb
53
+ homepage:
54
+ licenses: []
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ requirements: []
72
+ rubyforge_project:
73
+ rubygems_version: 1.8.24
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: Light, intuituive ruby tools for working with classical Greek
77
+ test_files:
78
+ - spec/monkey_patches_spec.rb
79
+ - spec/spec_helper.rb
80
+ - spec/transcoder_spec.rb