medieval_latina 2.1.1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ <lexicon version='1.0' xmlns='http://www.w3.org/2005/01/pronunciation-lexicon' alphabet='ipa' xml:lang='en-US'>
2
+ <lexeme>
3
+ <grapheme>
4
+ vulnero
5
+ </grapheme>
6
+ <phoneme>
7
+ ˈvul.ne.ro
8
+ </phoneme>
9
+ </lexeme>
10
+ </lexicon>
@@ -0,0 +1,27 @@
1
+ class MedievalLatina
2
+ class Lexicon
3
+ LEXICON_DIR = File.expand_path("../../lexicons", __dir__)
4
+
5
+ def self.list_files
6
+ Dir.glob(File.join(LEXICON_DIR, "*.pls"))
7
+ end
8
+
9
+ def self.get_file_path(file_name)
10
+ path = File.join(LEXICON_DIR, file_name)
11
+ return path if File.exist?(path)
12
+ raise "Lexicon file not found: #{file_name}"
13
+ end
14
+
15
+ def self.get_file_content(file_name)
16
+ path = get_file_path(file_name)
17
+ File.read(path)
18
+ end
19
+
20
+ def self.file_names_with_contents
21
+ list_files.each_with_object({}) do |file_path, files_content|
22
+ file_name, extension = File.basename(file_path).split(".")
23
+ files_content[file_name] = get_file_content("#{file_name}.#{extension}")
24
+ end
25
+ end
26
+ end
27
+ end
@@ -1,61 +1,44 @@
1
- require "builder"
1
+ require "cgi"
2
+ require "rexml/document"
2
3
 
3
4
  class MedievalLatina
4
5
  class LexiconBuilder
5
- def self.write
6
- new.call.write
7
- end
8
-
9
- def initialize
10
- @hash = parts.each_with_object({}) do |part, hash|
11
- hash.merge(part)
12
- end
13
-
14
- FREQUENCY_LIST.each do |word, metadata|
15
- if metadata.key?(:ipa)
16
- @hash[word] = metadata[:ipa]
17
- end
18
- end
6
+ include REXML
19
7
 
20
- @xml = Builder::XmlMarkup.new(indent: 2)
8
+ def initialize(words)
9
+ @document = Document.new
10
+ @words = words
21
11
  end
22
12
 
23
13
  def call
24
- xml.instruct! :xml, encoding: "UTF-8"
25
- xml.lexicon(xmlns: URL, version: "1.0") do
26
- grouped_hash.each do |phonetics, words|
27
- xml.lexeme do
28
- words.each { |word| xml.grapheme word }
29
- xml.phoneme phonetics
30
- end
31
- end
32
- end
14
+ document.add_element "lexicon", SPECIFICATION
33
15
 
34
- self
35
- end
16
+ words.each do |word, pronunciation|
17
+ lexeme = Element.new("lexeme")
18
+ grapheme = Element.new("grapheme")
19
+ phoneme = Element.new("phoneme")
36
20
 
37
- def write
38
- File.open("lexicon.pls", "w") do |file|
39
- file.write(xml.target!)
40
- end
41
- end
42
-
43
- private
21
+ grapheme.text = CGI.unescapeHTML(word)
22
+ phoneme.text = pronunciation
44
23
 
45
- attr_reader :hash, :xml
24
+ lexeme.add_element(grapheme)
25
+ lexeme.add_element(phoneme)
46
26
 
47
- def grouped_hash
48
- hash.group_by do |_, phonetics|
49
- phonetics
50
- end.transform_values do |pairs|
51
- pairs.map(&:first)
27
+ document.root.add_element(lexeme)
52
28
  end
53
- end
54
29
 
55
- def parts
56
- [ADJECTIVES, ADVERBS, NOUNS, VERBS]
30
+ document
57
31
  end
58
32
 
59
- URL = "http://www.w3.org/2005/01/pronunciation-lexicon".freeze
33
+ private
34
+
35
+ attr_reader :document, :words
36
+
37
+ SPECIFICATION = {
38
+ "version" => "1.0",
39
+ "xmlns" => "http://www.w3.org/2005/01/pronunciation-lexicon",
40
+ "alphabet" => "ipa",
41
+ "xml:lang" => "en-US"
42
+ }.freeze
60
43
  end
61
44
  end
@@ -1,3 +1,3 @@
1
1
  class MedievalLatina
2
- VERSION = "2.1.1".freeze
2
+ VERSION = "3.0.0".freeze
3
3
  end
@@ -1,5 +1,6 @@
1
- require "medieval_latina/dictionary"
1
+ require "json"
2
2
  require "medieval_latina/initializer"
3
+ require "medieval_latina/lexicon"
3
4
  require "medieval_latina/lexicon_builder"
4
5
  require "medieval_latina/version"
5
6
  require "set"
@@ -10,8 +11,8 @@ class MedievalLatina
10
11
  if word?(string)
11
12
  metadata = DICTIONARY.fetch(string, {})
12
13
 
13
- if metadata.key?(:pronunciation)
14
- metadata[:pronunciation]
14
+ if metadata.key?("pronunciation")
15
+ metadata["pronunciation"]
15
16
  else
16
17
  new(string).call
17
18
  end
@@ -23,6 +24,15 @@ class MedievalLatina
23
24
  rejoin_words(prepared_words)
24
25
  end
25
26
 
27
+ def self.dictionary
28
+ @data ||= load_data
29
+ end
30
+
31
+ def self.load_data
32
+ file_path = File.join(File.dirname(__FILE__), "../data/dictionary.json")
33
+ JSON.parse(File.read(file_path))
34
+ end
35
+
26
36
  def self.prepare_text(text)
27
37
  text.scan(/[\p{Alnum}'-]+|[[:punct:]]+/).map do |string|
28
38
  if word?(string)
@@ -38,31 +48,47 @@ class MedievalLatina
38
48
  end
39
49
 
40
50
  def self.adjective?(word)
41
- ADJECTIVES.key?(prepare_word(word))
51
+ adjectives.key?(prepare_word(word))
42
52
  end
43
53
 
44
54
  def self.adverb?(word)
45
- ADVERBS.key?(prepare_word(word))
55
+ adverbs.key?(prepare_word(word))
46
56
  end
47
57
 
48
58
  def self.noun?(word)
49
- NOUNS.key?(prepare_word(word))
59
+ nouns.key?(prepare_word(word))
50
60
  end
51
61
 
52
62
  def self.verb?(word)
53
- VERBS.key?(prepare_word(word))
63
+ verbs.key?(prepare_word(word))
54
64
  end
55
65
 
56
66
  def self.adjectives
57
- ADJECTIVES.keys
67
+ DICTIONARY.select do |word, metadata|
68
+ metadata["part"] == "Adjective"
69
+ end
58
70
  end
59
71
 
60
72
  def self.adverbs
61
- ADVERBS.keys
73
+ DICTIONARY.select do |word, metadata|
74
+ metadata["part"] == "Adverb"
75
+ end
62
76
  end
63
77
 
64
78
  def self.nouns
65
- NOUNS.keys
79
+ DICTIONARY.select do |word, metadata|
80
+ metadata["part"] == "Noun"
81
+ end
82
+ end
83
+
84
+ def self.pronunciations_for(words)
85
+ words.map(&:downcase).each_with_object({}) do |word, hash|
86
+ metadata = DICTIONARY[word]
87
+
88
+ if metadata && metadata["ipa"]
89
+ hash[word] = metadata["ipa"]
90
+ end
91
+ end
66
92
  end
67
93
 
68
94
  def self.rejoin_words(array)
@@ -75,7 +101,9 @@ class MedievalLatina
75
101
  end
76
102
 
77
103
  def self.verbs
78
- VERBS.keys
104
+ DICTIONARY.select do |word, metadata|
105
+ metadata["part"] == "Verb"
106
+ end
79
107
  end
80
108
 
81
109
  def self.word?(string)
@@ -83,13 +111,7 @@ class MedievalLatina
83
111
  end
84
112
 
85
113
  def self.words
86
- [
87
- ADJECTIVES,
88
- ADVERBS,
89
- DICTIONARY,
90
- NOUNS,
91
- VERBS
92
- ].flat_map(&:keys).each_with_object(Set.new) { |word, set| set.add(word) }
114
+ DICTIONARY.keys.to_set
93
115
  end
94
116
 
95
117
  def initialize(word)
@@ -123,6 +145,15 @@ class MedievalLatina
123
145
  x: ->(rest) { "ks" }
124
146
  }
125
147
  CONSONENT_TEAMS = {gn: "n-y", qu: "kw"}.freeze
148
+ PARTS_OF_SPEECH = [
149
+ "Adjective",
150
+ "Adverb",
151
+ "Conjunction",
152
+ "Noun",
153
+ "Preposition",
154
+ "Pronoun",
155
+ "Verb"
156
+ ].to_set.freeze
126
157
  SOFT_C = ["e", "i", "ae", "oe"].freeze
127
158
  SOFT_G = SOFT_C
128
159
  SOFT_T = ["i"].freeze
@@ -164,13 +195,13 @@ class MedievalLatina
164
195
  end
165
196
 
166
197
  def to_team
167
- "#{character}#{rest[0]}".intern
198
+ :"#{character}#{rest[0]}"
168
199
  end
169
200
  end
170
201
 
171
202
  class Error < StandardError; end
172
203
 
173
- DICTIONARY = FREQUENCY_LIST.each_with_object({}) do |(word, metadata), hash|
204
+ DICTIONARY = dictionary.each_with_object({}) do |(word, metadata), hash|
174
205
  hash[word] = metadata
175
206
 
176
207
  sanitized_word = I18n.transliterate(word)
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
15
15
  spec.description = description
16
16
  spec.homepage = "https://github.com/jaysonvirissimo/medieval_latina"
17
17
  spec.license = "MIT"
18
- spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
18
+ spec.required_ruby_version = Gem::Requirement.new(">= 3.0.0")
19
19
  spec.metadata["allowed_push_host"] = "https://rubygems.org/"
20
20
  spec.metadata["homepage_uri"] = spec.homepage
21
21
  spec.metadata["source_code_uri"] = github_uri
@@ -29,4 +29,8 @@ Gem::Specification.new do |spec|
29
29
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
30
  spec.require_paths = ["lib"]
31
31
  spec.add_dependency "i18n"
32
+ spec.add_development_dependency "rake", "~> 12.0"
33
+ spec.add_development_dependency "rspec", "~> 3.0"
34
+ spec.add_development_dependency "nokogiri"
35
+ spec.add_development_dependency "standard"
32
36
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: medieval_latina
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.1
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jayson Virissimo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-11-30 00:00:00.000000000 Z
11
+ date: 2024-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: i18n
@@ -24,6 +24,62 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '12.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '12.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: standard
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
27
83
  description: |
28
84
  There are good text-to-speech engines for English and classical Latin, but none for medieval Latin.
29
85
  MedievalLatina converts Latin text to a kind of phonetic spelling that can be read by English text-to-speech engines.
@@ -33,6 +89,7 @@ executables: []
33
89
  extensions: []
34
90
  extra_rdoc_files: []
35
91
  files:
92
+ - ".DS_Store"
36
93
  - ".github/workflows/tests.yml"
37
94
  - ".gitignore"
38
95
  - ".rspec"
@@ -45,11 +102,22 @@ files:
45
102
  - Rakefile
46
103
  - bin/build
47
104
  - bin/console
105
+ - bin/lint
48
106
  - bin/setup
49
- - lexicon.pls
107
+ - data/dictionary.json
108
+ - lexicons/Latin00.pls
109
+ - lexicons/Latin01.pls
110
+ - lexicons/Latin02.pls
111
+ - lexicons/Latin03.pls
112
+ - lexicons/Latin04.pls
113
+ - lexicons/Latin05.pls
114
+ - lexicons/Latin06.pls
115
+ - lexicons/Latin07.pls
116
+ - lexicons/Latin08.pls
117
+ - lexicons/Latin09.pls
50
118
  - lib/medieval_latina.rb
51
- - lib/medieval_latina/dictionary.rb
52
119
  - lib/medieval_latina/initializer.rb
120
+ - lib/medieval_latina/lexicon.rb
53
121
  - lib/medieval_latina/lexicon_builder.rb
54
122
  - lib/medieval_latina/version.rb
55
123
  - medieval_latina.gemspec
@@ -69,14 +137,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
69
137
  requirements:
70
138
  - - ">="
71
139
  - !ruby/object:Gem::Version
72
- version: 2.7.0
140
+ version: 3.0.0
73
141
  required_rubygems_version: !ruby/object:Gem::Requirement
74
142
  requirements:
75
143
  - - ">="
76
144
  - !ruby/object:Gem::Version
77
145
  version: '0'
78
146
  requirements: []
79
- rubygems_version: 3.4.1
147
+ rubygems_version: 3.5.3
80
148
  signing_key:
81
149
  specification_version: 4
82
150
  summary: Transform medieval Latin text into phonetic English