phonetics 3.0.5 → 3.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ module Phonetics
7
7
  class CodeGenerator
8
8
  attr_reader :writer
9
9
 
10
- def initialize(writer = STDOUT)
10
+ def initialize(writer = $stdout)
11
11
  @writer = writer
12
12
  end
13
13
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'delegate'
4
+ require 'set'
4
5
 
5
6
  module Phonetics
6
7
  extend self
@@ -131,6 +132,8 @@ module Phonetics
131
132
  )
132
133
  # rubocop:enable Layout/TrailingWhitespace
133
134
 
135
+ # rubocop:disable Metrics/CyclomaticComplexity
136
+ # rubocop:disable Metrics/PerceivedComplexity
134
137
  # Parse the ChartData into a lookup table where we can retrieve attributes
135
138
  # for each phoneme
136
139
  def features
@@ -176,6 +179,8 @@ module Phonetics
176
179
  end
177
180
  end
178
181
  end
182
+ # rubocop:enable Metrics/CyclomaticComplexity
183
+ # rubocop:enable Metrics/PerceivedComplexity
179
184
 
180
185
  def phonemes
181
186
  @phonemes ||= features.keys
@@ -183,20 +188,20 @@ module Phonetics
183
188
 
184
189
  # Given two consonants, calculate their difference by summing the
185
190
  # following:
186
- # * 0.1 if they are not voiced the same
191
+ # * 0.3 if they are not voiced the same
187
192
  # * 0.3 if they are different manners
188
- # * Up to 0.6 if they are the maximum position difference
193
+ # * Up to 0.4 if they are the maximum position difference
189
194
  def distance(phoneme1, phoneme2)
190
195
  features1 = features[phoneme1]
191
196
  features2 = features[phoneme2]
192
197
 
193
198
  penalty = 0
194
- penalty += 0.1 if features1[:voiced] != features2[:voiced]
199
+ penalty += 0.3 if features1[:voiced] != features2[:voiced]
195
200
 
196
201
  penalty += 0.3 if features1[:manner] != features2[:manner]
197
202
 
198
- # Use up to the remaining 0.6 for penalizing differences in manner
199
- penalty += 0.6 * ((features1[:position_index] - features2[:position_index]).abs / @position_count.to_f)
203
+ # Use up to the remaining 0.4 for penalizing differences in manner
204
+ penalty += 0.4 * ((features1[:position_index] - features2[:position_index]).abs / @position_count.to_f)
200
205
  penalty
201
206
  end
202
207
  end
@@ -228,11 +233,12 @@ module Phonetics
228
233
 
229
234
  def _distance(phoneme1, phoneme2)
230
235
  types = [Symbols.fetch(phoneme1), Symbols.fetch(phoneme2)].sort
231
- if types == %i[consonant vowel]
236
+ case types
237
+ when %i[consonant vowel]
232
238
  1.0
233
- elsif types == %i[vowel vowel]
239
+ when %i[vowel vowel]
234
240
  Vowels.distance(phoneme1, phoneme2)
235
- elsif types == %i[consonant consonant]
241
+ when %i[consonant consonant]
236
242
  Consonants.distance(phoneme1, phoneme2)
237
243
  end
238
244
  end
@@ -16,10 +16,12 @@ module Phonetics
16
16
  module Levenshtein
17
17
  extend ::PhoneticsLevenshteinCBinding
18
18
 
19
+ # rubocop:disable Style/OptionalBooleanParameter
19
20
  def self.distance(str1, str2, verbose = false)
20
21
  return if str1.nil? || str2.nil?
21
22
 
22
23
  internal_phonetic_distance(str1, str2, verbose)
23
24
  end
25
+ # rubocop:enable Style/OptionalBooleanParameter
24
26
  end
25
27
  end
@@ -16,6 +16,7 @@ module Phonetics
16
16
  class RubyLevenshtein
17
17
  attr_reader :str1, :str2, :len1, :len2, :matrix
18
18
 
19
+ # rubocop:disable Style/OptionalBooleanParameter
19
20
  def initialize(ipa_str1, ipa_str2, verbose = false)
20
21
  @str1 = ipa_str1.each_char.select { |c| Phonetics.phonemes.include?(c) }.join
21
22
  @str2 = ipa_str2.each_char.select { |c| Phonetics.phonemes.include?(c) }.join
@@ -26,6 +27,11 @@ module Phonetics
26
27
  set_edit_distances(@str1, @str2)
27
28
  end
28
29
 
30
+ def self.distance(str1, str2, verbose = false)
31
+ new(str1, str2, verbose).distance
32
+ end
33
+ # rubocop:enable Style/OptionalBooleanParameter
34
+
29
35
  def distance
30
36
  return 0 if walk.empty?
31
37
 
@@ -33,10 +39,6 @@ module Phonetics
33
39
  walk.last[:distance]
34
40
  end
35
41
 
36
- def self.distance(str1, str2, verbose = false)
37
- new(str1, str2, verbose).distance
38
- end
39
-
40
42
  private
41
43
 
42
44
  def walk
@@ -10,7 +10,8 @@ module Phonetics
10
10
 
11
11
  module Transcriptions
12
12
  extend self
13
- Transcriptions = File.join(__dir__, '..', 'common_ipa_transcriptions.json')
13
+
14
+ TranscriptionFile = File.join(__dir__, '..', 'common_ipa_transcriptions.json')
14
15
  TranscriptionsURL = 'https://jackdanger.com/common_ipa_transcriptions.json'
15
16
 
16
17
  SourcesByPreference = [/wiktionary/, /cmu/, /phonemicchart.com/].freeze
@@ -21,7 +22,7 @@ module Phonetics
21
22
  return unless entry['ipa']
22
23
 
23
24
  SourcesByPreference.each do |preferred_source|
24
- entry['ipa'].keys.each do |source|
25
+ entry['ipa'].each_key do |source|
25
26
  return entry['ipa'][source] if source =~ preferred_source
26
27
  end
27
28
  end
@@ -30,21 +31,24 @@ module Phonetics
30
31
 
31
32
  def transcriptions
32
33
  @transcriptions ||= begin
33
- download! unless File.exist?(Transcriptions)
34
+ download! unless File.exist?(TranscriptionFile)
34
35
  load_from_disk!
35
36
  end
36
37
  end
37
38
 
38
39
  # Lazily loaded from JSON file on disk
39
40
  def load_from_disk!
40
- @transcriptions = JSON.parse(File.read(Transcriptions))
41
+ @transcriptions = JSON.parse(File.read(TranscriptionFile))
41
42
  end
42
43
 
44
+ # rubocop:disable Security/Open
43
45
  def download!
44
46
  File.open(Transcriptions, 'w') { |f| f.write(URI.open(TranscriptionsURL).read) }
45
47
  end
48
+ # rubocop:enable Security/Open
46
49
 
47
- def trie
50
+ # rubocop:disable Metrics/CyclomaticComplexity
51
+ def trie(max_rarity = nil)
48
52
  # Let's turn this:
49
53
  #
50
54
  # "century": {
@@ -109,9 +113,12 @@ module Phonetics
109
113
  # },
110
114
  # },
111
115
  #
112
- @trie ||= begin
116
+ @tries ||= {}
117
+ @tries[max_rarity] ||= begin
113
118
  base_trie = {}
114
119
  transcriptions.each do |key, entry|
120
+ next if max_rarity && (entry['rarity'].nil? || entry['rarity'] > max_rarity)
121
+
115
122
  entry_data = {
116
123
  word: key,
117
124
  rarity: entry['rarity'],
@@ -123,6 +130,7 @@ module Phonetics
123
130
  base_trie.freeze
124
131
  end
125
132
  end
133
+ # rubocop:enable Metrics/CyclomaticComplexity
126
134
 
127
135
  def walk(ipa)
128
136
  ipa.each_char.reduce(trie) { |acc, char| acc[char] }
data/phonetics.gemspec CHANGED
@@ -11,6 +11,8 @@ Gem::Specification.new do |spec|
11
11
  spec.homepage = 'https://github.com/JackDanger/phonetics'
12
12
  spec.license = 'MIT'
13
13
 
14
+ spec.required_ruby_version = '>= 2.5'
15
+
14
16
  spec.extensions = ['ext/c_levenshtein/extconf.rb']
15
17
 
16
18
  # Specify which files should be added to the gem when it is released.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: phonetics
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.5
4
+ version: 3.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jack Danger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-12-27 00:00:00.000000000 Z
11
+ date: 2022-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -149,14 +149,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - ">="
151
151
  - !ruby/object:Gem::Version
152
- version: '0'
152
+ version: '2.5'
153
153
  required_rubygems_version: !ruby/object:Gem::Requirement
154
154
  requirements:
155
155
  - - ">="
156
156
  - !ruby/object:Gem::Version
157
157
  version: '0'
158
158
  requirements: []
159
- rubygems_version: 3.0.3
159
+ rubygems_version: 3.0.3.1
160
160
  signing_key:
161
161
  specification_version: 4
162
162
  summary: tools for linguistic code using the International Phonetic Alphabet