medieval_latina 2.1.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,10 @@
1
+ <lexicon version='1.0' xmlns='http://www.w3.org/2005/01/pronunciation-lexicon' alphabet='ipa' xml:lang='en-US'>
2
+ <lexeme>
3
+ <grapheme>
4
+ vulnero
5
+ </grapheme>
6
+ <phoneme>
7
+ ˈvul.ne.ro
8
+ </phoneme>
9
+ </lexeme>
10
+ </lexicon>
@@ -0,0 +1,27 @@
1
+ class MedievalLatina
2
+ class Lexicon
3
+ LEXICON_DIR = File.expand_path("../../lexicons", __dir__)
4
+
5
+ def self.list_files
6
+ Dir.glob(File.join(LEXICON_DIR, "*.pls"))
7
+ end
8
+
9
+ def self.get_file_path(file_name)
10
+ path = File.join(LEXICON_DIR, file_name)
11
+ return path if File.exist?(path)
12
+ raise "Lexicon file not found: #{file_name}"
13
+ end
14
+
15
+ def self.get_file_content(file_name)
16
+ path = get_file_path(file_name)
17
+ File.read(path)
18
+ end
19
+
20
+ def self.file_names_with_contents
21
+ list_files.each_with_object({}) do |file_path, files_content|
22
+ file_name, extension = File.basename(file_path).split(".")
23
+ files_content[file_name] = get_file_content("#{file_name}.#{extension}")
24
+ end
25
+ end
26
+ end
27
+ end
@@ -1,61 +1,44 @@
1
- require "builder"
1
+ require "cgi"
2
+ require "rexml/document"
2
3
 
3
4
  class MedievalLatina
4
5
  class LexiconBuilder
5
- def self.write
6
- new.call.write
7
- end
8
-
9
- def initialize
10
- @hash = parts.each_with_object({}) do |part, hash|
11
- hash.merge(part)
12
- end
13
-
14
- FREQUENCY_LIST.each do |word, metadata|
15
- if metadata.key?(:ipa)
16
- @hash[word] = metadata[:ipa]
17
- end
18
- end
6
+ include REXML
19
7
 
20
- @xml = Builder::XmlMarkup.new(indent: 2)
8
+ def initialize(words)
9
+ @document = Document.new
10
+ @words = words
21
11
  end
22
12
 
23
13
  def call
24
- xml.instruct! :xml, encoding: "UTF-8"
25
- xml.lexicon(xmlns: URL, version: "1.0") do
26
- grouped_hash.each do |phonetics, words|
27
- xml.lexeme do
28
- words.each { |word| xml.grapheme word }
29
- xml.phoneme phonetics
30
- end
31
- end
32
- end
14
+ document.add_element "lexicon", SPECIFICATION
33
15
 
34
- self
35
- end
16
+ words.each do |word, pronunciation|
17
+ lexeme = Element.new("lexeme")
18
+ grapheme = Element.new("grapheme")
19
+ phoneme = Element.new("phoneme")
36
20
 
37
- def write
38
- File.open("lexicon.pls", "w") do |file|
39
- file.write(xml.target!)
40
- end
41
- end
42
-
43
- private
21
+ grapheme.text = CGI.unescapeHTML(word)
22
+ phoneme.text = pronunciation
44
23
 
45
- attr_reader :hash, :xml
24
+ lexeme.add_element(grapheme)
25
+ lexeme.add_element(phoneme)
46
26
 
47
- def grouped_hash
48
- hash.group_by do |_, phonetics|
49
- phonetics
50
- end.transform_values do |pairs|
51
- pairs.map(&:first)
27
+ document.root.add_element(lexeme)
52
28
  end
53
- end
54
29
 
55
- def parts
56
- [ADJECTIVES, ADVERBS, NOUNS, VERBS]
30
+ document
57
31
  end
58
32
 
59
- URL = "http://www.w3.org/2005/01/pronunciation-lexicon".freeze
33
+ private
34
+
35
+ attr_reader :document, :words
36
+
37
+ SPECIFICATION = {
38
+ "version" => "1.0",
39
+ "xmlns" => "http://www.w3.org/2005/01/pronunciation-lexicon",
40
+ "alphabet" => "ipa",
41
+ "xml:lang" => "en-US"
42
+ }.freeze
60
43
  end
61
44
  end
@@ -1,3 +1,3 @@
1
1
  class MedievalLatina
2
- VERSION = "2.1.1".freeze
2
+ VERSION = "3.0.0".freeze
3
3
  end
@@ -1,5 +1,6 @@
1
- require "medieval_latina/dictionary"
1
+ require "json"
2
2
  require "medieval_latina/initializer"
3
+ require "medieval_latina/lexicon"
3
4
  require "medieval_latina/lexicon_builder"
4
5
  require "medieval_latina/version"
5
6
  require "set"
@@ -10,8 +11,8 @@ class MedievalLatina
10
11
  if word?(string)
11
12
  metadata = DICTIONARY.fetch(string, {})
12
13
 
13
- if metadata.key?(:pronunciation)
14
- metadata[:pronunciation]
14
+ if metadata.key?("pronunciation")
15
+ metadata["pronunciation"]
15
16
  else
16
17
  new(string).call
17
18
  end
@@ -23,6 +24,15 @@ class MedievalLatina
23
24
  rejoin_words(prepared_words)
24
25
  end
25
26
 
27
+ def self.dictionary
28
+ @data ||= load_data
29
+ end
30
+
31
+ def self.load_data
32
+ file_path = File.join(File.dirname(__FILE__), "../data/dictionary.json")
33
+ JSON.parse(File.read(file_path))
34
+ end
35
+
26
36
  def self.prepare_text(text)
27
37
  text.scan(/[\p{Alnum}'-]+|[[:punct:]]+/).map do |string|
28
38
  if word?(string)
@@ -38,31 +48,47 @@ class MedievalLatina
38
48
  end
39
49
 
40
50
  def self.adjective?(word)
41
- ADJECTIVES.key?(prepare_word(word))
51
+ adjectives.key?(prepare_word(word))
42
52
  end
43
53
 
44
54
  def self.adverb?(word)
45
- ADVERBS.key?(prepare_word(word))
55
+ adverbs.key?(prepare_word(word))
46
56
  end
47
57
 
48
58
  def self.noun?(word)
49
- NOUNS.key?(prepare_word(word))
59
+ nouns.key?(prepare_word(word))
50
60
  end
51
61
 
52
62
  def self.verb?(word)
53
- VERBS.key?(prepare_word(word))
63
+ verbs.key?(prepare_word(word))
54
64
  end
55
65
 
56
66
  def self.adjectives
57
- ADJECTIVES.keys
67
+ DICTIONARY.select do |word, metadata|
68
+ metadata["part"] == "Adjective"
69
+ end
58
70
  end
59
71
 
60
72
  def self.adverbs
61
- ADVERBS.keys
73
+ DICTIONARY.select do |word, metadata|
74
+ metadata["part"] == "Adverb"
75
+ end
62
76
  end
63
77
 
64
78
  def self.nouns
65
- NOUNS.keys
79
+ DICTIONARY.select do |word, metadata|
80
+ metadata["part"] == "Noun"
81
+ end
82
+ end
83
+
84
+ def self.pronunciations_for(words)
85
+ words.map(&:downcase).each_with_object({}) do |word, hash|
86
+ metadata = DICTIONARY[word]
87
+
88
+ if metadata && metadata["ipa"]
89
+ hash[word] = metadata["ipa"]
90
+ end
91
+ end
66
92
  end
67
93
 
68
94
  def self.rejoin_words(array)
@@ -75,7 +101,9 @@ class MedievalLatina
75
101
  end
76
102
 
77
103
  def self.verbs
78
- VERBS.keys
104
+ DICTIONARY.select do |word, metadata|
105
+ metadata["part"] == "Verb"
106
+ end
79
107
  end
80
108
 
81
109
  def self.word?(string)
@@ -83,13 +111,7 @@ class MedievalLatina
83
111
  end
84
112
 
85
113
  def self.words
86
- [
87
- ADJECTIVES,
88
- ADVERBS,
89
- DICTIONARY,
90
- NOUNS,
91
- VERBS
92
- ].flat_map(&:keys).each_with_object(Set.new) { |word, set| set.add(word) }
114
+ DICTIONARY.keys.to_set
93
115
  end
94
116
 
95
117
  def initialize(word)
@@ -123,6 +145,15 @@ class MedievalLatina
123
145
  x: ->(rest) { "ks" }
124
146
  }
125
147
  CONSONENT_TEAMS = {gn: "n-y", qu: "kw"}.freeze
148
+ PARTS_OF_SPEECH = [
149
+ "Adjective",
150
+ "Adverb",
151
+ "Conjunction",
152
+ "Noun",
153
+ "Preposition",
154
+ "Pronoun",
155
+ "Verb"
156
+ ].to_set.freeze
126
157
  SOFT_C = ["e", "i", "ae", "oe"].freeze
127
158
  SOFT_G = SOFT_C
128
159
  SOFT_T = ["i"].freeze
@@ -164,13 +195,13 @@ class MedievalLatina
164
195
  end
165
196
 
166
197
  def to_team
167
- "#{character}#{rest[0]}".intern
198
+ :"#{character}#{rest[0]}"
168
199
  end
169
200
  end
170
201
 
171
202
  class Error < StandardError; end
172
203
 
173
- DICTIONARY = FREQUENCY_LIST.each_with_object({}) do |(word, metadata), hash|
204
+ DICTIONARY = dictionary.each_with_object({}) do |(word, metadata), hash|
174
205
  hash[word] = metadata
175
206
 
176
207
  sanitized_word = I18n.transliterate(word)
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
15
15
  spec.description = description
16
16
  spec.homepage = "https://github.com/jaysonvirissimo/medieval_latina"
17
17
  spec.license = "MIT"
18
- spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
18
+ spec.required_ruby_version = Gem::Requirement.new(">= 3.0.0")
19
19
  spec.metadata["allowed_push_host"] = "https://rubygems.org/"
20
20
  spec.metadata["homepage_uri"] = spec.homepage
21
21
  spec.metadata["source_code_uri"] = github_uri
@@ -29,4 +29,8 @@ Gem::Specification.new do |spec|
29
29
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
30
  spec.require_paths = ["lib"]
31
31
  spec.add_dependency "i18n"
32
+ spec.add_development_dependency "rake", "~> 12.0"
33
+ spec.add_development_dependency "rspec", "~> 3.0"
34
+ spec.add_development_dependency "nokogiri"
35
+ spec.add_development_dependency "standard"
32
36
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: medieval_latina
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.1
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jayson Virissimo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-11-30 00:00:00.000000000 Z
11
+ date: 2024-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: i18n
@@ -24,6 +24,62 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '12.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '12.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: standard
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
27
83
  description: |
28
84
  There are good text-to-speech engines for English and classical Latin, but none for medieval Latin.
29
85
  MedievalLatina converts Latin text to a kind of phonetic spelling that can be read by English text-to-speech engines.
@@ -33,6 +89,7 @@ executables: []
33
89
  extensions: []
34
90
  extra_rdoc_files: []
35
91
  files:
92
+ - ".DS_Store"
36
93
  - ".github/workflows/tests.yml"
37
94
  - ".gitignore"
38
95
  - ".rspec"
@@ -45,11 +102,22 @@ files:
45
102
  - Rakefile
46
103
  - bin/build
47
104
  - bin/console
105
+ - bin/lint
48
106
  - bin/setup
49
- - lexicon.pls
107
+ - data/dictionary.json
108
+ - lexicons/Latin00.pls
109
+ - lexicons/Latin01.pls
110
+ - lexicons/Latin02.pls
111
+ - lexicons/Latin03.pls
112
+ - lexicons/Latin04.pls
113
+ - lexicons/Latin05.pls
114
+ - lexicons/Latin06.pls
115
+ - lexicons/Latin07.pls
116
+ - lexicons/Latin08.pls
117
+ - lexicons/Latin09.pls
50
118
  - lib/medieval_latina.rb
51
- - lib/medieval_latina/dictionary.rb
52
119
  - lib/medieval_latina/initializer.rb
120
+ - lib/medieval_latina/lexicon.rb
53
121
  - lib/medieval_latina/lexicon_builder.rb
54
122
  - lib/medieval_latina/version.rb
55
123
  - medieval_latina.gemspec
@@ -69,14 +137,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
69
137
  requirements:
70
138
  - - ">="
71
139
  - !ruby/object:Gem::Version
72
- version: 2.7.0
140
+ version: 3.0.0
73
141
  required_rubygems_version: !ruby/object:Gem::Requirement
74
142
  requirements:
75
143
  - - ">="
76
144
  - !ruby/object:Gem::Version
77
145
  version: '0'
78
146
  requirements: []
79
- rubygems_version: 3.4.1
147
+ rubygems_version: 3.5.3
80
148
  signing_key:
81
149
  specification_version: 4
82
150
  summary: Transform medieval Latin text into phonetic English