phonetics 3.0.5 → 3.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +1 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -0
- data/VERSION +1 -1
- data/ext/c_levenshtein/phonetic_cost.c +11858 -11858
- data/lib/phonetics/code_generator.rb +1 -1
- data/lib/phonetics/distances.rb +14 -8
- data/lib/phonetics/levenshtein.rb +2 -0
- data/lib/phonetics/ruby_levenshtein.rb +6 -4
- data/lib/phonetics/transcriptions.rb +14 -6
- data/phonetics.gemspec +2 -0
- metadata +4 -4
data/lib/phonetics/distances.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'delegate'
|
4
|
+
require 'set'
|
4
5
|
|
5
6
|
module Phonetics
|
6
7
|
extend self
|
@@ -131,6 +132,8 @@ module Phonetics
|
|
131
132
|
)
|
132
133
|
# rubocop:enable Layout/TrailingWhitespace
|
133
134
|
|
135
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
136
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
134
137
|
# Parse the ChartData into a lookup table where we can retrieve attributes
|
135
138
|
# for each phoneme
|
136
139
|
def features
|
@@ -176,6 +179,8 @@ module Phonetics
|
|
176
179
|
end
|
177
180
|
end
|
178
181
|
end
|
182
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
183
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
179
184
|
|
180
185
|
def phonemes
|
181
186
|
@phonemes ||= features.keys
|
@@ -183,20 +188,20 @@ module Phonetics
|
|
183
188
|
|
184
189
|
# Given two consonants, calculate their difference by summing the
|
185
190
|
# following:
|
186
|
-
# * 0.
|
191
|
+
# * 0.3 if they are not voiced the same
|
187
192
|
# * 0.3 if they are different manners
|
188
|
-
# * Up to 0.
|
193
|
+
# * Up to 0.4 if they are the maximum position difference
|
189
194
|
def distance(phoneme1, phoneme2)
|
190
195
|
features1 = features[phoneme1]
|
191
196
|
features2 = features[phoneme2]
|
192
197
|
|
193
198
|
penalty = 0
|
194
|
-
penalty += 0.
|
199
|
+
penalty += 0.3 if features1[:voiced] != features2[:voiced]
|
195
200
|
|
196
201
|
penalty += 0.3 if features1[:manner] != features2[:manner]
|
197
202
|
|
198
|
-
# Use up to the remaining 0.
|
199
|
-
penalty += 0.
|
203
|
+
# Use up to the remaining 0.4 for penalizing differences in manner
|
204
|
+
penalty += 0.4 * ((features1[:position_index] - features2[:position_index]).abs / @position_count.to_f)
|
200
205
|
penalty
|
201
206
|
end
|
202
207
|
end
|
@@ -228,11 +233,12 @@ module Phonetics
|
|
228
233
|
|
229
234
|
def _distance(phoneme1, phoneme2)
|
230
235
|
types = [Symbols.fetch(phoneme1), Symbols.fetch(phoneme2)].sort
|
231
|
-
|
236
|
+
case types
|
237
|
+
when %i[consonant vowel]
|
232
238
|
1.0
|
233
|
-
|
239
|
+
when %i[vowel vowel]
|
234
240
|
Vowels.distance(phoneme1, phoneme2)
|
235
|
-
|
241
|
+
when %i[consonant consonant]
|
236
242
|
Consonants.distance(phoneme1, phoneme2)
|
237
243
|
end
|
238
244
|
end
|
@@ -16,10 +16,12 @@ module Phonetics
|
|
16
16
|
module Levenshtein
|
17
17
|
extend ::PhoneticsLevenshteinCBinding
|
18
18
|
|
19
|
+
# rubocop:disable Style/OptionalBooleanParameter
|
19
20
|
def self.distance(str1, str2, verbose = false)
|
20
21
|
return if str1.nil? || str2.nil?
|
21
22
|
|
22
23
|
internal_phonetic_distance(str1, str2, verbose)
|
23
24
|
end
|
25
|
+
# rubocop:enable Style/OptionalBooleanParameter
|
24
26
|
end
|
25
27
|
end
|
@@ -16,6 +16,7 @@ module Phonetics
|
|
16
16
|
class RubyLevenshtein
|
17
17
|
attr_reader :str1, :str2, :len1, :len2, :matrix
|
18
18
|
|
19
|
+
# rubocop:disable Style/OptionalBooleanParameter
|
19
20
|
def initialize(ipa_str1, ipa_str2, verbose = false)
|
20
21
|
@str1 = ipa_str1.each_char.select { |c| Phonetics.phonemes.include?(c) }.join
|
21
22
|
@str2 = ipa_str2.each_char.select { |c| Phonetics.phonemes.include?(c) }.join
|
@@ -26,6 +27,11 @@ module Phonetics
|
|
26
27
|
set_edit_distances(@str1, @str2)
|
27
28
|
end
|
28
29
|
|
30
|
+
def self.distance(str1, str2, verbose = false)
|
31
|
+
new(str1, str2, verbose).distance
|
32
|
+
end
|
33
|
+
# rubocop:enable Style/OptionalBooleanParameter
|
34
|
+
|
29
35
|
def distance
|
30
36
|
return 0 if walk.empty?
|
31
37
|
|
@@ -33,10 +39,6 @@ module Phonetics
|
|
33
39
|
walk.last[:distance]
|
34
40
|
end
|
35
41
|
|
36
|
-
def self.distance(str1, str2, verbose = false)
|
37
|
-
new(str1, str2, verbose).distance
|
38
|
-
end
|
39
|
-
|
40
42
|
private
|
41
43
|
|
42
44
|
def walk
|
@@ -10,7 +10,8 @@ module Phonetics
|
|
10
10
|
|
11
11
|
module Transcriptions
|
12
12
|
extend self
|
13
|
-
|
13
|
+
|
14
|
+
TranscriptionFile = File.join(__dir__, '..', 'common_ipa_transcriptions.json')
|
14
15
|
TranscriptionsURL = 'https://jackdanger.com/common_ipa_transcriptions.json'
|
15
16
|
|
16
17
|
SourcesByPreference = [/wiktionary/, /cmu/, /phonemicchart.com/].freeze
|
@@ -21,7 +22,7 @@ module Phonetics
|
|
21
22
|
return unless entry['ipa']
|
22
23
|
|
23
24
|
SourcesByPreference.each do |preferred_source|
|
24
|
-
entry['ipa'].
|
25
|
+
entry['ipa'].each_key do |source|
|
25
26
|
return entry['ipa'][source] if source =~ preferred_source
|
26
27
|
end
|
27
28
|
end
|
@@ -30,21 +31,24 @@ module Phonetics
|
|
30
31
|
|
31
32
|
def transcriptions
|
32
33
|
@transcriptions ||= begin
|
33
|
-
download! unless File.exist?(
|
34
|
+
download! unless File.exist?(TranscriptionFile)
|
34
35
|
load_from_disk!
|
35
36
|
end
|
36
37
|
end
|
37
38
|
|
38
39
|
# Lazily loaded from JSON file on disk
|
39
40
|
def load_from_disk!
|
40
|
-
@transcriptions = JSON.parse(File.read(
|
41
|
+
@transcriptions = JSON.parse(File.read(TranscriptionFile))
|
41
42
|
end
|
42
43
|
|
44
|
+
# rubocop:disable Security/Open
|
43
45
|
def download!
|
44
46
|
File.open(Transcriptions, 'w') { |f| f.write(URI.open(TranscriptionsURL).read) }
|
45
47
|
end
|
48
|
+
# rubocop:enable Security/Open
|
46
49
|
|
47
|
-
|
50
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
51
|
+
def trie(max_rarity = nil)
|
48
52
|
# Let's turn this:
|
49
53
|
#
|
50
54
|
# "century": {
|
@@ -109,9 +113,12 @@ module Phonetics
|
|
109
113
|
# },
|
110
114
|
# },
|
111
115
|
#
|
112
|
-
@
|
116
|
+
@tries ||= {}
|
117
|
+
@tries[max_rarity] ||= begin
|
113
118
|
base_trie = {}
|
114
119
|
transcriptions.each do |key, entry|
|
120
|
+
next if max_rarity && (entry['rarity'].nil? || entry['rarity'] > max_rarity)
|
121
|
+
|
115
122
|
entry_data = {
|
116
123
|
word: key,
|
117
124
|
rarity: entry['rarity'],
|
@@ -123,6 +130,7 @@ module Phonetics
|
|
123
130
|
base_trie.freeze
|
124
131
|
end
|
125
132
|
end
|
133
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
126
134
|
|
127
135
|
def walk(ipa)
|
128
136
|
ipa.each_char.reduce(trie) { |acc, char| acc[char] }
|
data/phonetics.gemspec
CHANGED
@@ -11,6 +11,8 @@ Gem::Specification.new do |spec|
|
|
11
11
|
spec.homepage = 'https://github.com/JackDanger/phonetics'
|
12
12
|
spec.license = 'MIT'
|
13
13
|
|
14
|
+
spec.required_ruby_version = '>= 2.5'
|
15
|
+
|
14
16
|
spec.extensions = ['ext/c_levenshtein/extconf.rb']
|
15
17
|
|
16
18
|
# Specify which files should be added to the gem when it is released.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: phonetics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jack Danger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -149,14 +149,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
149
149
|
requirements:
|
150
150
|
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
|
-
version: '
|
152
|
+
version: '2.5'
|
153
153
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
154
154
|
requirements:
|
155
155
|
- - ">="
|
156
156
|
- !ruby/object:Gem::Version
|
157
157
|
version: '0'
|
158
158
|
requirements: []
|
159
|
-
rubygems_version: 3.0.3
|
159
|
+
rubygems_version: 3.0.3.1
|
160
160
|
signing_key:
|
161
161
|
specification_version: 4
|
162
162
|
summary: tools for linguistic code using the International Phonetic Alphabet
|