medieval_latina 2.0.5 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ <lexicon version='1.0' xmlns='http://www.w3.org/2005/01/pronunciation-lexicon' alphabet='ipa' xml:lang='en-US'>
2
+ <lexeme>
3
+ <grapheme>
4
+ vulnero
5
+ </grapheme>
6
+ <phoneme>
7
+ ˈvul.ne.ro
8
+ </phoneme>
9
+ </lexeme>
10
+ </lexicon>
@@ -0,0 +1,27 @@
1
+ class MedievalLatina
2
+ class Lexicon
3
+ LEXICON_DIR = File.expand_path("../../lexicons", __dir__)
4
+
5
+ def self.list_files
6
+ Dir.glob(File.join(LEXICON_DIR, "*.pls"))
7
+ end
8
+
9
+ def self.get_file_path(file_name)
10
+ path = File.join(LEXICON_DIR, file_name)
11
+ return path if File.exist?(path)
12
+ raise "Lexicon file not found: #{file_name}"
13
+ end
14
+
15
+ def self.get_file_content(file_name)
16
+ path = get_file_path(file_name)
17
+ File.read(path)
18
+ end
19
+
20
+ def self.file_names_with_contents
21
+ list_files.each_with_object({}) do |file_path, files_content|
22
+ file_name, extension = File.basename(file_path).split(".")
23
+ files_content[file_name] = get_file_content("#{file_name}.#{extension}")
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,44 @@
1
+ require "cgi"
2
+ require "rexml/document"
3
+
4
+ class MedievalLatina
5
+ class LexiconBuilder
6
+ include REXML
7
+
8
+ def initialize(words)
9
+ @document = Document.new
10
+ @words = words
11
+ end
12
+
13
+ def call
14
+ document.add_element "lexicon", SPECIFICATION
15
+
16
+ words.each do |word, pronunciation|
17
+ lexeme = Element.new("lexeme")
18
+ grapheme = Element.new("grapheme")
19
+ phoneme = Element.new("phoneme")
20
+
21
+ grapheme.text = CGI.unescapeHTML(word)
22
+ phoneme.text = pronunciation
23
+
24
+ lexeme.add_element(grapheme)
25
+ lexeme.add_element(phoneme)
26
+
27
+ document.root.add_element(lexeme)
28
+ end
29
+
30
+ document
31
+ end
32
+
33
+ private
34
+
35
+ attr_reader :document, :words
36
+
37
+ SPECIFICATION = {
38
+ "version" => "1.0",
39
+ "xmlns" => "http://www.w3.org/2005/01/pronunciation-lexicon",
40
+ "alphabet" => "ipa",
41
+ "xml:lang" => "en-US"
42
+ }.freeze
43
+ end
44
+ end
@@ -1,3 +1,3 @@
1
1
  class MedievalLatina
2
- VERSION = "2.0.5".freeze
2
+ VERSION = "3.0.0".freeze
3
3
  end
@@ -1,5 +1,7 @@
1
- require "medieval_latina/dictionary"
1
+ require "json"
2
2
  require "medieval_latina/initializer"
3
+ require "medieval_latina/lexicon"
4
+ require "medieval_latina/lexicon_builder"
3
5
  require "medieval_latina/version"
4
6
  require "set"
5
7
 
@@ -9,8 +11,8 @@ class MedievalLatina
9
11
  if word?(string)
10
12
  metadata = DICTIONARY.fetch(string, {})
11
13
 
12
- if metadata.key?(:pronunciation)
13
- metadata[:pronunciation]
14
+ if metadata.key?("pronunciation")
15
+ metadata["pronunciation"]
14
16
  else
15
17
  new(string).call
16
18
  end
@@ -22,6 +24,15 @@ class MedievalLatina
22
24
  rejoin_words(prepared_words)
23
25
  end
24
26
 
27
+ def self.dictionary
28
+ @data ||= load_data
29
+ end
30
+
31
+ def self.load_data
32
+ file_path = File.join(File.dirname(__FILE__), "../data/dictionary.json")
33
+ JSON.parse(File.read(file_path))
34
+ end
35
+
25
36
  def self.prepare_text(text)
26
37
  text.scan(/[\p{Alnum}'-]+|[[:punct:]]+/).map do |string|
27
38
  if word?(string)
@@ -37,31 +48,47 @@ class MedievalLatina
37
48
  end
38
49
 
39
50
  def self.adjective?(word)
40
- ADJECTIVES.key?(prepare_word(word))
51
+ adjectives.key?(prepare_word(word))
41
52
  end
42
53
 
43
54
  def self.adverb?(word)
44
- ADVERBS.key?(prepare_word(word))
55
+ adverbs.key?(prepare_word(word))
45
56
  end
46
57
 
47
58
  def self.noun?(word)
48
- NOUNS.key?(prepare_word(word))
59
+ nouns.key?(prepare_word(word))
49
60
  end
50
61
 
51
62
  def self.verb?(word)
52
- VERBS.key?(prepare_word(word))
63
+ verbs.key?(prepare_word(word))
53
64
  end
54
65
 
55
66
  def self.adjectives
56
- ADJECTIVES.keys
67
+ DICTIONARY.select do |word, metadata|
68
+ metadata["part"] == "Adjective"
69
+ end
57
70
  end
58
71
 
59
72
  def self.adverbs
60
- ADVERBS.keys
73
+ DICTIONARY.select do |word, metadata|
74
+ metadata["part"] == "Adverb"
75
+ end
61
76
  end
62
77
 
63
78
  def self.nouns
64
- NOUNS.keys
79
+ DICTIONARY.select do |word, metadata|
80
+ metadata["part"] == "Noun"
81
+ end
82
+ end
83
+
84
+ def self.pronunciations_for(words)
85
+ words.map(&:downcase).each_with_object({}) do |word, hash|
86
+ metadata = DICTIONARY[word]
87
+
88
+ if metadata && metadata["ipa"]
89
+ hash[word] = metadata["ipa"]
90
+ end
91
+ end
65
92
  end
66
93
 
67
94
  def self.rejoin_words(array)
@@ -74,7 +101,9 @@ class MedievalLatina
74
101
  end
75
102
 
76
103
  def self.verbs
77
- VERBS.keys
104
+ DICTIONARY.select do |word, metadata|
105
+ metadata["part"] == "Verb"
106
+ end
78
107
  end
79
108
 
80
109
  def self.word?(string)
@@ -82,13 +111,7 @@ class MedievalLatina
82
111
  end
83
112
 
84
113
  def self.words
85
- [
86
- ADJECTIVES,
87
- ADVERBS,
88
- DICTIONARY,
89
- NOUNS,
90
- VERBS
91
- ].flat_map(&:keys).each_with_object(Set.new) { |word, set| set.add(word) }
114
+ DICTIONARY.keys.to_set
92
115
  end
93
116
 
94
117
  def initialize(word)
@@ -122,6 +145,15 @@ class MedievalLatina
122
145
  x: ->(rest) { "ks" }
123
146
  }
124
147
  CONSONENT_TEAMS = {gn: "n-y", qu: "kw"}.freeze
148
+ PARTS_OF_SPEECH = [
149
+ "Adjective",
150
+ "Adverb",
151
+ "Conjunction",
152
+ "Noun",
153
+ "Preposition",
154
+ "Pronoun",
155
+ "Verb"
156
+ ].to_set.freeze
125
157
  SOFT_C = ["e", "i", "ae", "oe"].freeze
126
158
  SOFT_G = SOFT_C
127
159
  SOFT_T = ["i"].freeze
@@ -163,13 +195,13 @@ class MedievalLatina
163
195
  end
164
196
 
165
197
  def to_team
166
- "#{character}#{rest[0]}".intern
198
+ :"#{character}#{rest[0]}"
167
199
  end
168
200
  end
169
201
 
170
202
  class Error < StandardError; end
171
203
 
172
- DICTIONARY = FREQUENCY_LIST.each_with_object({}) do |(word, metadata), hash|
204
+ DICTIONARY = dictionary.each_with_object({}) do |(word, metadata), hash|
173
205
  hash[word] = metadata
174
206
 
175
207
  sanitized_word = I18n.transliterate(word)
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
15
15
  spec.description = description
16
16
  spec.homepage = "https://github.com/jaysonvirissimo/medieval_latina"
17
17
  spec.license = "MIT"
18
- spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
18
+ spec.required_ruby_version = Gem::Requirement.new(">= 3.0.0")
19
19
  spec.metadata["allowed_push_host"] = "https://rubygems.org/"
20
20
  spec.metadata["homepage_uri"] = spec.homepage
21
21
  spec.metadata["source_code_uri"] = github_uri
@@ -29,4 +29,8 @@ Gem::Specification.new do |spec|
29
29
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
30
  spec.require_paths = ["lib"]
31
31
  spec.add_dependency "i18n"
32
+ spec.add_development_dependency "rake", "~> 12.0"
33
+ spec.add_development_dependency "rspec", "~> 3.0"
34
+ spec.add_development_dependency "nokogiri"
35
+ spec.add_development_dependency "standard"
32
36
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: medieval_latina
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.5
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jayson Virissimo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-11-28 00:00:00.000000000 Z
11
+ date: 2024-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: i18n
@@ -24,6 +24,62 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '12.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '12.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: standard
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
27
83
  description: |
28
84
  There are good text-to-speech engines for English and classical Latin, but none for medieval Latin.
29
85
  MedievalLatina converts Latin text to a kind of phonetic spelling that can be read by English text-to-speech engines.
@@ -33,6 +89,7 @@ executables: []
33
89
  extensions: []
34
90
  extra_rdoc_files: []
35
91
  files:
92
+ - ".DS_Store"
36
93
  - ".github/workflows/tests.yml"
37
94
  - ".gitignore"
38
95
  - ".rspec"
@@ -43,11 +100,25 @@ files:
43
100
  - LICENSE.txt
44
101
  - README.md
45
102
  - Rakefile
103
+ - bin/build
46
104
  - bin/console
105
+ - bin/lint
47
106
  - bin/setup
107
+ - data/dictionary.json
108
+ - lexicons/Latin00.pls
109
+ - lexicons/Latin01.pls
110
+ - lexicons/Latin02.pls
111
+ - lexicons/Latin03.pls
112
+ - lexicons/Latin04.pls
113
+ - lexicons/Latin05.pls
114
+ - lexicons/Latin06.pls
115
+ - lexicons/Latin07.pls
116
+ - lexicons/Latin08.pls
117
+ - lexicons/Latin09.pls
48
118
  - lib/medieval_latina.rb
49
- - lib/medieval_latina/dictionary.rb
50
119
  - lib/medieval_latina/initializer.rb
120
+ - lib/medieval_latina/lexicon.rb
121
+ - lib/medieval_latina/lexicon_builder.rb
51
122
  - lib/medieval_latina/version.rb
52
123
  - medieval_latina.gemspec
53
124
  homepage: https://github.com/jaysonvirissimo/medieval_latina
@@ -66,14 +137,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
66
137
  requirements:
67
138
  - - ">="
68
139
  - !ruby/object:Gem::Version
69
- version: 2.7.0
140
+ version: 3.0.0
70
141
  required_rubygems_version: !ruby/object:Gem::Requirement
71
142
  requirements:
72
143
  - - ">="
73
144
  - !ruby/object:Gem::Version
74
145
  version: '0'
75
146
  requirements: []
76
- rubygems_version: 3.4.1
147
+ rubygems_version: 3.5.3
77
148
  signing_key:
78
149
  specification_version: 4
79
150
  summary: Transform medieval Latin text into phonetic English