phonetics 3.0.5 → 3.0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,7 +7,7 @@ module Phonetics
7
7
  class CodeGenerator
8
8
  attr_reader :writer
9
9
 
10
- def initialize(writer = STDOUT)
10
+ def initialize(writer = $stdout)
11
11
  @writer = writer
12
12
  end
13
13
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'delegate'
4
+ require 'set'
4
5
 
5
6
  module Phonetics
6
7
  extend self
@@ -131,6 +132,8 @@ module Phonetics
131
132
  )
132
133
  # rubocop:enable Layout/TrailingWhitespace
133
134
 
135
+ # rubocop:disable Metrics/CyclomaticComplexity
136
+ # rubocop:disable Metrics/PerceivedComplexity
134
137
  # Parse the ChartData into a lookup table where we can retrieve attributes
135
138
  # for each phoneme
136
139
  def features
@@ -176,6 +179,8 @@ module Phonetics
176
179
  end
177
180
  end
178
181
  end
182
+ # rubocop:enable Metrics/CyclomaticComplexity
183
+ # rubocop:enable Metrics/PerceivedComplexity
179
184
 
180
185
  def phonemes
181
186
  @phonemes ||= features.keys
@@ -183,20 +188,20 @@ module Phonetics
183
188
 
184
189
  # Given two consonants, calculate their difference by summing the
185
190
  # following:
186
- # * 0.1 if they are not voiced the same
191
+ # * 0.3 if they are not voiced the same
187
192
  # * 0.3 if they are different manners
188
- # * Up to 0.6 if they are the maximum position difference
193
+ # * Up to 0.4 if they are the maximum position difference
189
194
  def distance(phoneme1, phoneme2)
190
195
  features1 = features[phoneme1]
191
196
  features2 = features[phoneme2]
192
197
 
193
198
  penalty = 0
194
- penalty += 0.1 if features1[:voiced] != features2[:voiced]
199
+ penalty += 0.3 if features1[:voiced] != features2[:voiced]
195
200
 
196
201
  penalty += 0.3 if features1[:manner] != features2[:manner]
197
202
 
198
- # Use up to the remaining 0.6 for penalizing differences in manner
199
- penalty += 0.6 * ((features1[:position_index] - features2[:position_index]).abs / @position_count.to_f)
203
+ # Use up to the remaining 0.4 for penalizing differences in manner
204
+ penalty += 0.4 * ((features1[:position_index] - features2[:position_index]).abs / @position_count.to_f)
200
205
  penalty
201
206
  end
202
207
  end
@@ -228,11 +233,12 @@ module Phonetics
228
233
 
229
234
  def _distance(phoneme1, phoneme2)
230
235
  types = [Symbols.fetch(phoneme1), Symbols.fetch(phoneme2)].sort
231
- if types == %i[consonant vowel]
236
+ case types
237
+ when %i[consonant vowel]
232
238
  1.0
233
- elsif types == %i[vowel vowel]
239
+ when %i[vowel vowel]
234
240
  Vowels.distance(phoneme1, phoneme2)
235
- elsif types == %i[consonant consonant]
241
+ when %i[consonant consonant]
236
242
  Consonants.distance(phoneme1, phoneme2)
237
243
  end
238
244
  end
@@ -16,10 +16,12 @@ module Phonetics
16
16
  module Levenshtein
17
17
  extend ::PhoneticsLevenshteinCBinding
18
18
 
19
+ # rubocop:disable Style/OptionalBooleanParameter
19
20
  def self.distance(str1, str2, verbose = false)
20
21
  return if str1.nil? || str2.nil?
21
22
 
22
23
  internal_phonetic_distance(str1, str2, verbose)
23
24
  end
25
+ # rubocop:enable Style/OptionalBooleanParameter
24
26
  end
25
27
  end
@@ -16,6 +16,7 @@ module Phonetics
16
16
  class RubyLevenshtein
17
17
  attr_reader :str1, :str2, :len1, :len2, :matrix
18
18
 
19
+ # rubocop:disable Style/OptionalBooleanParameter
19
20
  def initialize(ipa_str1, ipa_str2, verbose = false)
20
21
  @str1 = ipa_str1.each_char.select { |c| Phonetics.phonemes.include?(c) }.join
21
22
  @str2 = ipa_str2.each_char.select { |c| Phonetics.phonemes.include?(c) }.join
@@ -26,6 +27,11 @@ module Phonetics
26
27
  set_edit_distances(@str1, @str2)
27
28
  end
28
29
 
30
+ def self.distance(str1, str2, verbose = false)
31
+ new(str1, str2, verbose).distance
32
+ end
33
+ # rubocop:enable Style/OptionalBooleanParameter
34
+
29
35
  def distance
30
36
  return 0 if walk.empty?
31
37
 
@@ -33,10 +39,6 @@ module Phonetics
33
39
  walk.last[:distance]
34
40
  end
35
41
 
36
- def self.distance(str1, str2, verbose = false)
37
- new(str1, str2, verbose).distance
38
- end
39
-
40
42
  private
41
43
 
42
44
  def walk
@@ -10,7 +10,8 @@ module Phonetics
10
10
 
11
11
  module Transcriptions
12
12
  extend self
13
- Transcriptions = File.join(__dir__, '..', 'common_ipa_transcriptions.json')
13
+
14
+ TranscriptionFile = File.join(__dir__, '..', 'common_ipa_transcriptions.json')
14
15
  TranscriptionsURL = 'https://jackdanger.com/common_ipa_transcriptions.json'
15
16
 
16
17
  SourcesByPreference = [/wiktionary/, /cmu/, /phonemicchart.com/].freeze
@@ -21,7 +22,7 @@ module Phonetics
21
22
  return unless entry['ipa']
22
23
 
23
24
  SourcesByPreference.each do |preferred_source|
24
- entry['ipa'].keys.each do |source|
25
+ entry['ipa'].each_key do |source|
25
26
  return entry['ipa'][source] if source =~ preferred_source
26
27
  end
27
28
  end
@@ -30,21 +31,24 @@ module Phonetics
30
31
 
31
32
  def transcriptions
32
33
  @transcriptions ||= begin
33
- download! unless File.exist?(Transcriptions)
34
+ download! unless File.exist?(TranscriptionFile)
34
35
  load_from_disk!
35
36
  end
36
37
  end
37
38
 
38
39
  # Lazily loaded from JSON file on disk
39
40
  def load_from_disk!
40
- @transcriptions = JSON.parse(File.read(Transcriptions))
41
+ @transcriptions = JSON.parse(File.read(TranscriptionFile))
41
42
  end
42
43
 
44
+ # rubocop:disable Security/Open
43
45
  def download!
44
46
  File.open(Transcriptions, 'w') { |f| f.write(URI.open(TranscriptionsURL).read) }
45
47
  end
48
+ # rubocop:enable Security/Open
46
49
 
47
- def trie
50
+ # rubocop:disable Metrics/CyclomaticComplexity
51
+ def trie(max_rarity = nil)
48
52
  # Let's turn this:
49
53
  #
50
54
  # "century": {
@@ -109,9 +113,12 @@ module Phonetics
109
113
  # },
110
114
  # },
111
115
  #
112
- @trie ||= begin
116
+ @tries ||= {}
117
+ @tries[max_rarity] ||= begin
113
118
  base_trie = {}
114
119
  transcriptions.each do |key, entry|
120
+ next if max_rarity && (entry['rarity'].nil? || entry['rarity'] > max_rarity)
121
+
115
122
  entry_data = {
116
123
  word: key,
117
124
  rarity: entry['rarity'],
@@ -123,6 +130,7 @@ module Phonetics
123
130
  base_trie.freeze
124
131
  end
125
132
  end
133
+ # rubocop:enable Metrics/CyclomaticComplexity
126
134
 
127
135
  def walk(ipa)
128
136
  ipa.each_char.reduce(trie) { |acc, char| acc[char] }
data/phonetics.gemspec CHANGED
@@ -11,6 +11,8 @@ Gem::Specification.new do |spec|
11
11
  spec.homepage = 'https://github.com/JackDanger/phonetics'
12
12
  spec.license = 'MIT'
13
13
 
14
+ spec.required_ruby_version = '>= 2.5'
15
+
14
16
  spec.extensions = ['ext/c_levenshtein/extconf.rb']
15
17
 
16
18
  # Specify which files should be added to the gem when it is released.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: phonetics
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.5
4
+ version: 3.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jack Danger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-12-27 00:00:00.000000000 Z
11
+ date: 2022-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -149,14 +149,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - ">="
151
151
  - !ruby/object:Gem::Version
152
- version: '0'
152
+ version: '2.5'
153
153
  required_rubygems_version: !ruby/object:Gem::Requirement
154
154
  requirements:
155
155
  - - ">="
156
156
  - !ruby/object:Gem::Version
157
157
  version: '0'
158
158
  requirements: []
159
- rubygems_version: 3.0.3
159
+ rubygems_version: 3.0.3.1
160
160
  signing_key:
161
161
  specification_version: 4
162
162
  summary: tools for linguistic code using the International Phonetic Alphabet