phonetics 3.0.5 → 3.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +1 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -0
- data/VERSION +1 -1
- data/ext/c_levenshtein/phonetic_cost.c +11858 -11858
- data/lib/phonetics/code_generator.rb +1 -1
- data/lib/phonetics/distances.rb +14 -8
- data/lib/phonetics/levenshtein.rb +2 -0
- data/lib/phonetics/ruby_levenshtein.rb +6 -4
- data/lib/phonetics/transcriptions.rb +14 -6
- data/phonetics.gemspec +2 -0
- metadata +4 -4
data/lib/phonetics/distances.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'delegate'
|
4
|
+
require 'set'
|
4
5
|
|
5
6
|
module Phonetics
|
6
7
|
extend self
|
@@ -131,6 +132,8 @@ module Phonetics
|
|
131
132
|
)
|
132
133
|
# rubocop:enable Layout/TrailingWhitespace
|
133
134
|
|
135
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
136
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
134
137
|
# Parse the ChartData into a lookup table where we can retrieve attributes
|
135
138
|
# for each phoneme
|
136
139
|
def features
|
@@ -176,6 +179,8 @@ module Phonetics
|
|
176
179
|
end
|
177
180
|
end
|
178
181
|
end
|
182
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
183
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
179
184
|
|
180
185
|
def phonemes
|
181
186
|
@phonemes ||= features.keys
|
@@ -183,20 +188,20 @@ module Phonetics
|
|
183
188
|
|
184
189
|
# Given two consonants, calculate their difference by summing the
|
185
190
|
# following:
|
186
|
-
# * 0.
|
191
|
+
# * 0.3 if they are not voiced the same
|
187
192
|
# * 0.3 if they are different manners
|
188
|
-
# * Up to 0.
|
193
|
+
# * Up to 0.4 if they are the maximum position difference
|
189
194
|
def distance(phoneme1, phoneme2)
|
190
195
|
features1 = features[phoneme1]
|
191
196
|
features2 = features[phoneme2]
|
192
197
|
|
193
198
|
penalty = 0
|
194
|
-
penalty += 0.
|
199
|
+
penalty += 0.3 if features1[:voiced] != features2[:voiced]
|
195
200
|
|
196
201
|
penalty += 0.3 if features1[:manner] != features2[:manner]
|
197
202
|
|
198
|
-
# Use up to the remaining 0.
|
199
|
-
penalty += 0.
|
203
|
+
# Use up to the remaining 0.4 for penalizing differences in manner
|
204
|
+
penalty += 0.4 * ((features1[:position_index] - features2[:position_index]).abs / @position_count.to_f)
|
200
205
|
penalty
|
201
206
|
end
|
202
207
|
end
|
@@ -228,11 +233,12 @@ module Phonetics
|
|
228
233
|
|
229
234
|
def _distance(phoneme1, phoneme2)
|
230
235
|
types = [Symbols.fetch(phoneme1), Symbols.fetch(phoneme2)].sort
|
231
|
-
|
236
|
+
case types
|
237
|
+
when %i[consonant vowel]
|
232
238
|
1.0
|
233
|
-
|
239
|
+
when %i[vowel vowel]
|
234
240
|
Vowels.distance(phoneme1, phoneme2)
|
235
|
-
|
241
|
+
when %i[consonant consonant]
|
236
242
|
Consonants.distance(phoneme1, phoneme2)
|
237
243
|
end
|
238
244
|
end
|
@@ -16,10 +16,12 @@ module Phonetics
|
|
16
16
|
module Levenshtein
|
17
17
|
extend ::PhoneticsLevenshteinCBinding
|
18
18
|
|
19
|
+
# rubocop:disable Style/OptionalBooleanParameter
|
19
20
|
def self.distance(str1, str2, verbose = false)
|
20
21
|
return if str1.nil? || str2.nil?
|
21
22
|
|
22
23
|
internal_phonetic_distance(str1, str2, verbose)
|
23
24
|
end
|
25
|
+
# rubocop:enable Style/OptionalBooleanParameter
|
24
26
|
end
|
25
27
|
end
|
@@ -16,6 +16,7 @@ module Phonetics
|
|
16
16
|
class RubyLevenshtein
|
17
17
|
attr_reader :str1, :str2, :len1, :len2, :matrix
|
18
18
|
|
19
|
+
# rubocop:disable Style/OptionalBooleanParameter
|
19
20
|
def initialize(ipa_str1, ipa_str2, verbose = false)
|
20
21
|
@str1 = ipa_str1.each_char.select { |c| Phonetics.phonemes.include?(c) }.join
|
21
22
|
@str2 = ipa_str2.each_char.select { |c| Phonetics.phonemes.include?(c) }.join
|
@@ -26,6 +27,11 @@ module Phonetics
|
|
26
27
|
set_edit_distances(@str1, @str2)
|
27
28
|
end
|
28
29
|
|
30
|
+
def self.distance(str1, str2, verbose = false)
|
31
|
+
new(str1, str2, verbose).distance
|
32
|
+
end
|
33
|
+
# rubocop:enable Style/OptionalBooleanParameter
|
34
|
+
|
29
35
|
def distance
|
30
36
|
return 0 if walk.empty?
|
31
37
|
|
@@ -33,10 +39,6 @@ module Phonetics
|
|
33
39
|
walk.last[:distance]
|
34
40
|
end
|
35
41
|
|
36
|
-
def self.distance(str1, str2, verbose = false)
|
37
|
-
new(str1, str2, verbose).distance
|
38
|
-
end
|
39
|
-
|
40
42
|
private
|
41
43
|
|
42
44
|
def walk
|
@@ -10,7 +10,8 @@ module Phonetics
|
|
10
10
|
|
11
11
|
module Transcriptions
|
12
12
|
extend self
|
13
|
-
|
13
|
+
|
14
|
+
TranscriptionFile = File.join(__dir__, '..', 'common_ipa_transcriptions.json')
|
14
15
|
TranscriptionsURL = 'https://jackdanger.com/common_ipa_transcriptions.json'
|
15
16
|
|
16
17
|
SourcesByPreference = [/wiktionary/, /cmu/, /phonemicchart.com/].freeze
|
@@ -21,7 +22,7 @@ module Phonetics
|
|
21
22
|
return unless entry['ipa']
|
22
23
|
|
23
24
|
SourcesByPreference.each do |preferred_source|
|
24
|
-
entry['ipa'].
|
25
|
+
entry['ipa'].each_key do |source|
|
25
26
|
return entry['ipa'][source] if source =~ preferred_source
|
26
27
|
end
|
27
28
|
end
|
@@ -30,21 +31,24 @@ module Phonetics
|
|
30
31
|
|
31
32
|
def transcriptions
|
32
33
|
@transcriptions ||= begin
|
33
|
-
download! unless File.exist?(
|
34
|
+
download! unless File.exist?(TranscriptionFile)
|
34
35
|
load_from_disk!
|
35
36
|
end
|
36
37
|
end
|
37
38
|
|
38
39
|
# Lazily loaded from JSON file on disk
|
39
40
|
def load_from_disk!
|
40
|
-
@transcriptions = JSON.parse(File.read(
|
41
|
+
@transcriptions = JSON.parse(File.read(TranscriptionFile))
|
41
42
|
end
|
42
43
|
|
44
|
+
# rubocop:disable Security/Open
|
43
45
|
def download!
|
44
46
|
File.open(Transcriptions, 'w') { |f| f.write(URI.open(TranscriptionsURL).read) }
|
45
47
|
end
|
48
|
+
# rubocop:enable Security/Open
|
46
49
|
|
47
|
-
|
50
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
51
|
+
def trie(max_rarity = nil)
|
48
52
|
# Let's turn this:
|
49
53
|
#
|
50
54
|
# "century": {
|
@@ -109,9 +113,12 @@ module Phonetics
|
|
109
113
|
# },
|
110
114
|
# },
|
111
115
|
#
|
112
|
-
@
|
116
|
+
@tries ||= {}
|
117
|
+
@tries[max_rarity] ||= begin
|
113
118
|
base_trie = {}
|
114
119
|
transcriptions.each do |key, entry|
|
120
|
+
next if max_rarity && (entry['rarity'].nil? || entry['rarity'] > max_rarity)
|
121
|
+
|
115
122
|
entry_data = {
|
116
123
|
word: key,
|
117
124
|
rarity: entry['rarity'],
|
@@ -123,6 +130,7 @@ module Phonetics
|
|
123
130
|
base_trie.freeze
|
124
131
|
end
|
125
132
|
end
|
133
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
126
134
|
|
127
135
|
def walk(ipa)
|
128
136
|
ipa.each_char.reduce(trie) { |acc, char| acc[char] }
|
data/phonetics.gemspec
CHANGED
@@ -11,6 +11,8 @@ Gem::Specification.new do |spec|
|
|
11
11
|
spec.homepage = 'https://github.com/JackDanger/phonetics'
|
12
12
|
spec.license = 'MIT'
|
13
13
|
|
14
|
+
spec.required_ruby_version = '>= 2.5'
|
15
|
+
|
14
16
|
spec.extensions = ['ext/c_levenshtein/extconf.rb']
|
15
17
|
|
16
18
|
# Specify which files should be added to the gem when it is released.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: phonetics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jack Danger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -149,14 +149,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
149
149
|
requirements:
|
150
150
|
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
|
-
version: '
|
152
|
+
version: '2.5'
|
153
153
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
154
154
|
requirements:
|
155
155
|
- - ">="
|
156
156
|
- !ruby/object:Gem::Version
|
157
157
|
version: '0'
|
158
158
|
requirements: []
|
159
|
-
rubygems_version: 3.0.3
|
159
|
+
rubygems_version: 3.0.3.1
|
160
160
|
signing_key:
|
161
161
|
specification_version: 4
|
162
162
|
summary: tools for linguistic code using the International Phonetic Alphabet
|