medieval_latina 2.1.1 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.DS_Store +0 -0
- data/.github/workflows/tests.yml +1 -5
- data/.standard.yml +1 -1
- data/.tool-versions +1 -1
- data/Gemfile +0 -6
- data/Gemfile.lock +7 -4
- data/README.md +47 -1
- data/bin/build +28 -1
- data/bin/lint +6 -0
- data/data/dictionary.json +4982 -0
- data/lexicons/Latin00.pls +2210 -0
- data/lexicons/Latin01.pls +2210 -0
- data/lexicons/Latin02.pls +2210 -0
- data/lexicons/Latin03.pls +2210 -0
- data/lexicons/Latin04.pls +2210 -0
- data/lexicons/Latin05.pls +2210 -0
- data/lexicons/Latin06.pls +2210 -0
- data/lexicons/Latin07.pls +2210 -0
- data/lexicons/Latin08.pls +2210 -0
- data/lexicons/Latin09.pls +10 -0
- data/lib/medieval_latina/lexicon.rb +27 -0
- data/lib/medieval_latina/lexicon_builder.rb +27 -44
- data/lib/medieval_latina/version.rb +1 -1
- data/lib/medieval_latina.rb +51 -20
- data/medieval_latina.gemspec +5 -1
- metadata +74 -6
- data/lexicon.pls +0 -5718
- data/lib/medieval_latina/dictionary.rb +0 -2928
@@ -0,0 +1,27 @@
|
|
1
|
+
class MedievalLatina
|
2
|
+
class Lexicon
|
3
|
+
LEXICON_DIR = File.expand_path("../../lexicons", __dir__)
|
4
|
+
|
5
|
+
def self.list_files
|
6
|
+
Dir.glob(File.join(LEXICON_DIR, "*.pls"))
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.get_file_path(file_name)
|
10
|
+
path = File.join(LEXICON_DIR, file_name)
|
11
|
+
return path if File.exist?(path)
|
12
|
+
raise "Lexicon file not found: #{file_name}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.get_file_content(file_name)
|
16
|
+
path = get_file_path(file_name)
|
17
|
+
File.read(path)
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.file_names_with_contents
|
21
|
+
list_files.each_with_object({}) do |file_path, files_content|
|
22
|
+
file_name, extension = File.basename(file_path).split(".")
|
23
|
+
files_content[file_name] = get_file_content("#{file_name}.#{extension}")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -1,61 +1,44 @@
|
|
1
|
-
require "
|
1
|
+
require "cgi"
|
2
|
+
require "rexml/document"
|
2
3
|
|
3
4
|
class MedievalLatina
|
4
5
|
class LexiconBuilder
|
5
|
-
|
6
|
-
new.call.write
|
7
|
-
end
|
8
|
-
|
9
|
-
def initialize
|
10
|
-
@hash = parts.each_with_object({}) do |part, hash|
|
11
|
-
hash.merge(part)
|
12
|
-
end
|
13
|
-
|
14
|
-
FREQUENCY_LIST.each do |word, metadata|
|
15
|
-
if metadata.key?(:ipa)
|
16
|
-
@hash[word] = metadata[:ipa]
|
17
|
-
end
|
18
|
-
end
|
6
|
+
include REXML
|
19
7
|
|
20
|
-
|
8
|
+
def initialize(words)
|
9
|
+
@document = Document.new
|
10
|
+
@words = words
|
21
11
|
end
|
22
12
|
|
23
13
|
def call
|
24
|
-
|
25
|
-
xml.lexicon(xmlns: URL, version: "1.0") do
|
26
|
-
grouped_hash.each do |phonetics, words|
|
27
|
-
xml.lexeme do
|
28
|
-
words.each { |word| xml.grapheme word }
|
29
|
-
xml.phoneme phonetics
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
14
|
+
document.add_element "lexicon", SPECIFICATION
|
33
15
|
|
34
|
-
|
35
|
-
|
16
|
+
words.each do |word, pronunciation|
|
17
|
+
lexeme = Element.new("lexeme")
|
18
|
+
grapheme = Element.new("grapheme")
|
19
|
+
phoneme = Element.new("phoneme")
|
36
20
|
|
37
|
-
|
38
|
-
|
39
|
-
file.write(xml.target!)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
private
|
21
|
+
grapheme.text = CGI.unescapeHTML(word)
|
22
|
+
phoneme.text = pronunciation
|
44
23
|
|
45
|
-
|
24
|
+
lexeme.add_element(grapheme)
|
25
|
+
lexeme.add_element(phoneme)
|
46
26
|
|
47
|
-
|
48
|
-
hash.group_by do |_, phonetics|
|
49
|
-
phonetics
|
50
|
-
end.transform_values do |pairs|
|
51
|
-
pairs.map(&:first)
|
27
|
+
document.root.add_element(lexeme)
|
52
28
|
end
|
53
|
-
end
|
54
29
|
|
55
|
-
|
56
|
-
[ADJECTIVES, ADVERBS, NOUNS, VERBS]
|
30
|
+
document
|
57
31
|
end
|
58
32
|
|
59
|
-
|
33
|
+
private
|
34
|
+
|
35
|
+
attr_reader :document, :words
|
36
|
+
|
37
|
+
SPECIFICATION = {
|
38
|
+
"version" => "1.0",
|
39
|
+
"xmlns" => "http://www.w3.org/2005/01/pronunciation-lexicon",
|
40
|
+
"alphabet" => "ipa",
|
41
|
+
"xml:lang" => "en-US"
|
42
|
+
}.freeze
|
60
43
|
end
|
61
44
|
end
|
data/lib/medieval_latina.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
require "
|
1
|
+
require "json"
|
2
2
|
require "medieval_latina/initializer"
|
3
|
+
require "medieval_latina/lexicon"
|
3
4
|
require "medieval_latina/lexicon_builder"
|
4
5
|
require "medieval_latina/version"
|
5
6
|
require "set"
|
@@ -10,8 +11,8 @@ class MedievalLatina
|
|
10
11
|
if word?(string)
|
11
12
|
metadata = DICTIONARY.fetch(string, {})
|
12
13
|
|
13
|
-
if metadata.key?(
|
14
|
-
metadata[
|
14
|
+
if metadata.key?("pronunciation")
|
15
|
+
metadata["pronunciation"]
|
15
16
|
else
|
16
17
|
new(string).call
|
17
18
|
end
|
@@ -23,6 +24,15 @@ class MedievalLatina
|
|
23
24
|
rejoin_words(prepared_words)
|
24
25
|
end
|
25
26
|
|
27
|
+
def self.dictionary
|
28
|
+
@data ||= load_data
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.load_data
|
32
|
+
file_path = File.join(File.dirname(__FILE__), "../data/dictionary.json")
|
33
|
+
JSON.parse(File.read(file_path))
|
34
|
+
end
|
35
|
+
|
26
36
|
def self.prepare_text(text)
|
27
37
|
text.scan(/[\p{Alnum}'-]+|[[:punct:]]+/).map do |string|
|
28
38
|
if word?(string)
|
@@ -38,31 +48,47 @@ class MedievalLatina
|
|
38
48
|
end
|
39
49
|
|
40
50
|
def self.adjective?(word)
|
41
|
-
|
51
|
+
adjectives.key?(prepare_word(word))
|
42
52
|
end
|
43
53
|
|
44
54
|
def self.adverb?(word)
|
45
|
-
|
55
|
+
adverbs.key?(prepare_word(word))
|
46
56
|
end
|
47
57
|
|
48
58
|
def self.noun?(word)
|
49
|
-
|
59
|
+
nouns.key?(prepare_word(word))
|
50
60
|
end
|
51
61
|
|
52
62
|
def self.verb?(word)
|
53
|
-
|
63
|
+
verbs.key?(prepare_word(word))
|
54
64
|
end
|
55
65
|
|
56
66
|
def self.adjectives
|
57
|
-
|
67
|
+
DICTIONARY.select do |word, metadata|
|
68
|
+
metadata["part"] == "Adjective"
|
69
|
+
end
|
58
70
|
end
|
59
71
|
|
60
72
|
def self.adverbs
|
61
|
-
|
73
|
+
DICTIONARY.select do |word, metadata|
|
74
|
+
metadata["part"] == "Adverb"
|
75
|
+
end
|
62
76
|
end
|
63
77
|
|
64
78
|
def self.nouns
|
65
|
-
|
79
|
+
DICTIONARY.select do |word, metadata|
|
80
|
+
metadata["part"] == "Noun"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.pronunciations_for(words)
|
85
|
+
words.map(&:downcase).each_with_object({}) do |word, hash|
|
86
|
+
metadata = DICTIONARY[word]
|
87
|
+
|
88
|
+
if metadata && metadata["ipa"]
|
89
|
+
hash[word] = metadata["ipa"]
|
90
|
+
end
|
91
|
+
end
|
66
92
|
end
|
67
93
|
|
68
94
|
def self.rejoin_words(array)
|
@@ -75,7 +101,9 @@ class MedievalLatina
|
|
75
101
|
end
|
76
102
|
|
77
103
|
def self.verbs
|
78
|
-
|
104
|
+
DICTIONARY.select do |word, metadata|
|
105
|
+
metadata["part"] == "Verb"
|
106
|
+
end
|
79
107
|
end
|
80
108
|
|
81
109
|
def self.word?(string)
|
@@ -83,13 +111,7 @@ class MedievalLatina
|
|
83
111
|
end
|
84
112
|
|
85
113
|
def self.words
|
86
|
-
|
87
|
-
ADJECTIVES,
|
88
|
-
ADVERBS,
|
89
|
-
DICTIONARY,
|
90
|
-
NOUNS,
|
91
|
-
VERBS
|
92
|
-
].flat_map(&:keys).each_with_object(Set.new) { |word, set| set.add(word) }
|
114
|
+
DICTIONARY.keys.to_set
|
93
115
|
end
|
94
116
|
|
95
117
|
def initialize(word)
|
@@ -123,6 +145,15 @@ class MedievalLatina
|
|
123
145
|
x: ->(rest) { "ks" }
|
124
146
|
}
|
125
147
|
CONSONENT_TEAMS = {gn: "n-y", qu: "kw"}.freeze
|
148
|
+
PARTS_OF_SPEECH = [
|
149
|
+
"Adjective",
|
150
|
+
"Adverb",
|
151
|
+
"Conjunction",
|
152
|
+
"Noun",
|
153
|
+
"Preposition",
|
154
|
+
"Pronoun",
|
155
|
+
"Verb"
|
156
|
+
].to_set.freeze
|
126
157
|
SOFT_C = ["e", "i", "ae", "oe"].freeze
|
127
158
|
SOFT_G = SOFT_C
|
128
159
|
SOFT_T = ["i"].freeze
|
@@ -164,13 +195,13 @@ class MedievalLatina
|
|
164
195
|
end
|
165
196
|
|
166
197
|
def to_team
|
167
|
-
"#{character}#{rest[0]}"
|
198
|
+
:"#{character}#{rest[0]}"
|
168
199
|
end
|
169
200
|
end
|
170
201
|
|
171
202
|
class Error < StandardError; end
|
172
203
|
|
173
|
-
DICTIONARY =
|
204
|
+
DICTIONARY = dictionary.each_with_object({}) do |(word, metadata), hash|
|
174
205
|
hash[word] = metadata
|
175
206
|
|
176
207
|
sanitized_word = I18n.transliterate(word)
|
data/medieval_latina.gemspec
CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
|
|
15
15
|
spec.description = description
|
16
16
|
spec.homepage = "https://github.com/jaysonvirissimo/medieval_latina"
|
17
17
|
spec.license = "MIT"
|
18
|
-
spec.required_ruby_version = Gem::Requirement.new(">=
|
18
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 3.0.0")
|
19
19
|
spec.metadata["allowed_push_host"] = "https://rubygems.org/"
|
20
20
|
spec.metadata["homepage_uri"] = spec.homepage
|
21
21
|
spec.metadata["source_code_uri"] = github_uri
|
@@ -29,4 +29,8 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
30
30
|
spec.require_paths = ["lib"]
|
31
31
|
spec.add_dependency "i18n"
|
32
|
+
spec.add_development_dependency "rake", "~> 12.0"
|
33
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
34
|
+
spec.add_development_dependency "nokogiri"
|
35
|
+
spec.add_development_dependency "standard"
|
32
36
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: medieval_latina
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jayson Virissimo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-04-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: i18n
|
@@ -24,6 +24,62 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '12.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '12.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: nokogiri
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: standard
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
27
83
|
description: |
|
28
84
|
There are good text-to-speech engines for English and classical Latin, but none for medieval Latin.
|
29
85
|
MedievalLatina converts Latin text to a kind of phonetic spelling that can be read by English text-to-speech engines.
|
@@ -33,6 +89,7 @@ executables: []
|
|
33
89
|
extensions: []
|
34
90
|
extra_rdoc_files: []
|
35
91
|
files:
|
92
|
+
- ".DS_Store"
|
36
93
|
- ".github/workflows/tests.yml"
|
37
94
|
- ".gitignore"
|
38
95
|
- ".rspec"
|
@@ -45,11 +102,22 @@ files:
|
|
45
102
|
- Rakefile
|
46
103
|
- bin/build
|
47
104
|
- bin/console
|
105
|
+
- bin/lint
|
48
106
|
- bin/setup
|
49
|
-
-
|
107
|
+
- data/dictionary.json
|
108
|
+
- lexicons/Latin00.pls
|
109
|
+
- lexicons/Latin01.pls
|
110
|
+
- lexicons/Latin02.pls
|
111
|
+
- lexicons/Latin03.pls
|
112
|
+
- lexicons/Latin04.pls
|
113
|
+
- lexicons/Latin05.pls
|
114
|
+
- lexicons/Latin06.pls
|
115
|
+
- lexicons/Latin07.pls
|
116
|
+
- lexicons/Latin08.pls
|
117
|
+
- lexicons/Latin09.pls
|
50
118
|
- lib/medieval_latina.rb
|
51
|
-
- lib/medieval_latina/dictionary.rb
|
52
119
|
- lib/medieval_latina/initializer.rb
|
120
|
+
- lib/medieval_latina/lexicon.rb
|
53
121
|
- lib/medieval_latina/lexicon_builder.rb
|
54
122
|
- lib/medieval_latina/version.rb
|
55
123
|
- medieval_latina.gemspec
|
@@ -69,14 +137,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
69
137
|
requirements:
|
70
138
|
- - ">="
|
71
139
|
- !ruby/object:Gem::Version
|
72
|
-
version:
|
140
|
+
version: 3.0.0
|
73
141
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
142
|
requirements:
|
75
143
|
- - ">="
|
76
144
|
- !ruby/object:Gem::Version
|
77
145
|
version: '0'
|
78
146
|
requirements: []
|
79
|
-
rubygems_version: 3.
|
147
|
+
rubygems_version: 3.5.3
|
80
148
|
signing_key:
|
81
149
|
specification_version: 4
|
82
150
|
summary: Transform medieval Latin text into phonetic English
|