phonetics 2.0.1 → 3.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/phonetics.rb CHANGED
@@ -1,239 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'delegate'
4
-
5
- module Phonetics
6
- extend self
7
-
8
- # This subclass of the stdlib's String allows us to iterate over each phoneme
9
- # in a string without monkeypatching
10
- #
11
- # Usage:
12
- # Phonetics::String.new("wətɛvɝ").each_phoneme.to_a
13
- # => ["w", "ə", "t", "ɛ", "v", "ɝ"]
14
- class String < SimpleDelegator
15
- # Group all phonemes by how many characters they have. Use this to walk
16
- # through a string finding phonemes (looking for longest ones first)
17
- def self.phonemes_by_length
18
- @phonemes_by_length ||= Phonetics.phonemes.each_with_object(
19
- # This relies on the impicit stable key ordering of Hash objects in Ruby
20
- # 2+ to keep the keys in descending order.
21
- 4 => Set.new, 3 => Set.new, 2 => Set.new, 1 => Set.new
22
- ) do |str, acc|
23
- acc[str.chars.size] << str
24
- end
25
- end
26
-
27
- def each_phoneme
28
- idx = 0
29
- Enumerator.new do |y|
30
- while idx < chars.length
31
- found = false
32
- self.class.phonemes_by_length.each do |size, phonemes|
33
- next unless idx + size <= chars.length
34
-
35
- candidate = chars[idx..idx + size - 1].join
36
- next unless phonemes.include?(candidate)
37
-
38
- y.yield candidate
39
- idx += size
40
- found = true
41
- break
42
- end
43
- idx += 1 unless found
44
- end
45
- end
46
- end
47
- end
48
-
49
- module Vowels
50
- extend self
51
-
52
- FormantFrequencies = {
53
- # https://en.wikipedia.org/wiki/Formant#Phonetics
54
- 'i' => { F1: 240, F2: 2400, rounded: false },
55
- 'y' => { F1: 235, F2: 2100, rounded: false },
56
- 'ɪ' => { F1: 300, F2: 2100, rounded: false }, # Guessing From other vowels
57
- 'e' => { F1: 390, F2: 2300, rounded: false },
58
- 'ø' => { F1: 370, F2: 1900, rounded: true },
59
- 'ɛ' => { F1: 610, F2: 1900, rounded: false },
60
- 'œ' => { F1: 585, F2: 1710, rounded: true },
61
- 'a' => { F1: 850, F2: 1610, rounded: false },
62
- 'ɶ' => { F1: 820, F2: 1530, rounded: true },
63
- 'ɑ' => { F1: 750, F2: 940, rounded: false },
64
- 'ɒ' => { F1: 700, F2: 760, rounded: true },
65
-
66
- 'ʌ' => { F1: 600, F2: 1170, rounded: false },
67
- # copying 'ʌ' for other mid-vowel formants
68
- 'ə' => { F1: 600, F2: 1170, rounded: false },
69
- 'ɝ' => { F1: 600, F2: 1170, rounded: false, rhotic: true },
70
-
71
- 'ɔ' => { F1: 500, F2: 700, rounded: true },
72
- 'ɤ' => { F1: 460, F2: 1310, rounded: false },
73
- 'o' => { F1: 360, F2: 640, rounded: true },
74
- 'ɯ' => { F1: 300, F2: 1390, rounded: false },
75
- 'æ' => { F1: 800, F2: 1900, rounded: false }, # Guessing From other vowels
76
- 'u' => { F1: 350, F2: 650, rounded: true }, # Guessing From other vowels
77
- 'ʊ' => { F1: 350, F2: 650, rounded: true },
78
- # Frequencies from http://videoweb.nie.edu.sg/phonetic/vowels/measurements.html
79
- }.freeze
80
-
81
- def phonemes
82
- @phonemes ||= FormantFrequencies.keys
83
- end
84
-
85
- # Given two vowels, calculate the (pythagorean) distance between them using
86
- # their F1 and F2 frequencies as x/y coordinates.
87
- # The return value is scaled to a value between 0 and 1
88
- # TODO: account for rhoticity (F3)
89
- def distance(phoneme1, phoneme2)
90
- formants1 = FormantFrequencies.fetch(phoneme1)
91
- formants2 = FormantFrequencies.fetch(phoneme2)
92
-
93
- @minmax_f1 ||= FormantFrequencies.values.minmax { |a, b| a[:F1] <=> b[:F1] }.map { |h| h[:F1] }
94
- @minmax_f2 ||= FormantFrequencies.values.minmax { |a, b| a[:F2] <=> b[:F2] }.map { |h| h[:F2] }
95
-
96
- # Get an x and y value for each input phoneme scaled between 0.0 and 1.0
97
- # We'll use the scaled f1 as the 'x' and the scaled f2 as the 'y'
98
- scaled_phoneme1_f1 = (formants1[:F1] - @minmax_f1[0]) / @minmax_f1[1].to_f
99
- scaled_phoneme1_f2 = (formants1[:F2] - @minmax_f2[0]) / @minmax_f2[1].to_f
100
- scaled_phoneme2_f1 = (formants2[:F1] - @minmax_f1[0]) / @minmax_f1[1].to_f
101
- scaled_phoneme2_f2 = (formants2[:F2] - @minmax_f2[0]) / @minmax_f2[1].to_f
102
-
103
- f1_distance = (scaled_phoneme1_f1 - scaled_phoneme2_f1).abs
104
- f2_distance = (scaled_phoneme1_f2 - scaled_phoneme2_f2).abs
105
-
106
- # When we have four values we can use the pythagorean theorem on them
107
- # (order doesn't matter)
108
- Math.sqrt((f1_distance**2) + (f2_distance**2))
109
- end
110
- end
111
-
112
- module Consonants
113
- extend self
114
-
115
- # This chart (columns 2 through the end, anyway) is a direct port of
116
- # https://en.wikipedia.org/wiki/International_Phonetic_Alphabet#Letters
117
- # We store the consonant table in this format to make updating it easier.
118
- #
119
- # rubocop:disable Layout/TrailingWhitespace
120
- ChartData = %( | Labio-velar | Bi-labial | Labio-dental | Linguo-labial | Dental | Alveolar | Post-alveolar | Retro-flex | Palatal | Velar | Uvular | Pharyngeal | Glottal
121
- Nasal | | m̥ m | ɱ | n̼ | | n̥ n | | ɳ̊ ɳ | ɲ̊ ɲ | ŋ̊ ŋ | ɴ | |
122
- Stop | | p b | p̪ b̪ | t̼ d̼ | | t d | | ʈ ɖ | c ɟ | k g | q ɢ | ʡ | ʔ
123
- Sibilant fricative | | | | | | s z | ʃ ʒ | ʂ ʐ | ɕ ʑ | | | |
124
- Non-sibilant fricative | | ɸ β | f v | θ̼ ð̼ | θ ð | θ̠ ð̠ | ɹ̠̊˔ ɹ̠˔ | ɻ˔ | ç ʝ | x ɣ | χ ʁ | ħ ʕ | h ɦ
125
- Approximant | w | | ʋ̥ ʋ | | | ɹ̥ ɹ | | ɻ̊ ɻ | j̊ j | ɰ̊ ɰ | | | ʔ̞
126
- Tap/flap | | ⱱ̟ | ⱱ | ɾ̼ | | ɾ̥ ɾ | | ɽ̊ ɽ | | | ɢ̆ | ʡ̆ |
127
- Trill | | ʙ̥ ʙ | | | | r̥ r | | | | | ʀ̥ ʀ | ʜ ʢ |
128
- Lateral fricative | | | | | | ɬ ɮ | | ɭ̊˔ ɭ˔ | ʎ̝̊ ʎ̝ | ʟ̝̊ ʟ̝ | | |
129
- Lateral approximant | | | | | | l̥ l | | ɭ̊ ɭ | ʎ̥ ʎ | ʟ̥ ʟ | ʟ̠ | |
130
- Lateral tap/flap | | | | | | ɺ | | ɭ̆ | ʎ̆ | ʟ̆ | | |
131
- )
132
- # rubocop:enable Layout/TrailingWhitespace
133
-
134
- # Parse the ChartData into a lookup table where we can retrieve attributes
135
- # for each phoneme
136
- def features
137
- @features ||= begin
138
- header, *manners = ChartData.lines
139
-
140
- _, *positions = header.chomp.split(' | ')
141
- positions.map(&:strip!)
142
-
143
- # Remove any trailing blank lines
144
- manners.pop while manners.last.to_s.strip.empty?
145
-
146
- position_indexes = Hash[*positions.each_with_index.to_a.flatten]
147
-
148
- @position_count = positions.size
149
-
150
- manners.each_with_object({}) do |row, phonemes|
151
- manner, *columns = row.chomp.split(' | ')
152
- manner.strip!
153
- positions.zip(columns).each do |position, phoneme_text|
154
- data = {
155
- position: position,
156
- position_index: position_indexes[position],
157
- manner: manner,
158
- }
159
- # If there is a character in the first byte then this articulation
160
- # has a voiceless phoneme. The symbol may use additional characters
161
- # as part of the phoneme symbol.
162
- unless phoneme_text[0] == ' '
163
- # Take the first non-blank character string
164
- symbol = phoneme_text.chars.take_while { |char| char != ' ' }.join
165
- phoneme_text = phoneme_text[symbol.chars.size..-1]
166
-
167
- phonemes[symbol] = data.merge(voiced: false)
168
- end
169
- # If there's a character anywhere left in the string then this
170
- # articulation has a voiced phoneme
171
- unless phoneme_text.strip.empty?
172
- symbol = phoneme_text.strip
173
- phonemes[symbol] = data.merge(voiced: true)
174
- end
175
- end
176
- end
177
- end
178
- end
179
-
180
- def phonemes
181
- @phonemes ||= features.keys
182
- end
183
-
184
- # Given two consonants, calculate their difference by summing the
185
- # following:
186
- # * 0.1 if they are not voiced the same
187
- # * 0.3 if they are different manners
188
- # * Up to 0.6 if they are the maximum position difference
189
- def distance(phoneme1, phoneme2)
190
- features1 = features[phoneme1]
191
- features2 = features[phoneme2]
192
-
193
- penalty = 0
194
- penalty += 0.1 if features1[:voiced] != features2[:voiced]
195
-
196
- penalty += 0.3 if features1[:manner] != features2[:manner]
197
-
198
- # Use up to the remaining 0.6 for penalizing differences in manner
199
- penalty += 0.6 * ((features1[:position_index] - features2[:position_index]).abs / @position_count.to_f)
200
- penalty
201
- end
202
- end
203
-
204
- def phonemes
205
- Vowels.phonemes + Consonants.phonemes
206
- end
207
-
208
- Symbols = Consonants.phonemes.reduce({}) { |acc, p| acc.update p => :consonant }.merge(
209
- Vowels.phonemes.reduce({}) { |acc, p| acc.update p => :vowel }
210
- )
211
-
212
- def distance(phoneme1, phoneme2)
213
- return 0 if phoneme1 == phoneme2
214
-
215
- distance_map.fetch(phoneme1).fetch(phoneme2)
216
- end
217
-
218
- def distance_map
219
- @distance_map ||= phonemes.permutation(2).each_with_object(Hash.new { |h, k| h[k] = {} }) do |pair, scores|
220
- p1, p2 = *pair
221
- score = _distance(p1, p2)
222
- scores[p1][p2] = score
223
- scores[p2][p1] = score
224
- end
225
- end
226
-
227
- private
228
-
229
- def _distance(phoneme1, phoneme2)
230
- types = [Symbols.fetch(phoneme1), Symbols.fetch(phoneme2)].sort
231
- if types == %i[consonant vowel]
232
- 1.0
233
- elsif types == %i[vowel vowel]
234
- Vowels.distance(phoneme1, phoneme2)
235
- elsif types == %i[consonant consonant]
236
- Consonants.distance(phoneme1, phoneme2)
237
- end
238
- end
239
- end
3
+ require 'phonetics/distances'
4
+ require 'phonetics/transcriptions'
@@ -0,0 +1,239 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'delegate'
4
+
5
+ module Phonetics
6
+ extend self
7
+
8
+ # This subclass of the stdlib's String allows us to iterate over each phoneme
9
+ # in a string without monkeypatching
10
+ #
11
+ # Usage:
12
+ # Phonetics::String.new("wətɛvɝ").each_phoneme.to_a
13
+ # => ["w", "ə", "t", "ɛ", "v", "ɝ"]
14
+ class String < SimpleDelegator
15
+ # Group all phonemes by how many characters they have. Use this to walk
16
+ # through a string finding phonemes (looking for longest ones first)
17
+ def self.phonemes_by_length
18
+ @phonemes_by_length ||= Phonetics.phonemes.each_with_object(
19
+ # This relies on the impicit stable key ordering of Hash objects in Ruby
20
+ # 2+ to keep the keys in descending order.
21
+ 4 => Set.new, 3 => Set.new, 2 => Set.new, 1 => Set.new
22
+ ) do |str, acc|
23
+ acc[str.chars.size] << str
24
+ end
25
+ end
26
+
27
+ def each_phoneme
28
+ idx = 0
29
+ Enumerator.new do |y|
30
+ while idx < chars.length
31
+ found = false
32
+ self.class.phonemes_by_length.each do |size, phonemes|
33
+ next unless idx + size <= chars.length
34
+
35
+ candidate = chars[idx..idx + size - 1].join
36
+ next unless phonemes.include?(candidate)
37
+
38
+ y.yield candidate
39
+ idx += size
40
+ found = true
41
+ break
42
+ end
43
+ idx += 1 unless found
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ module Vowels
50
+ extend self
51
+
52
+ FormantFrequencies = {
53
+ # https://en.wikipedia.org/wiki/Formant#Phonetics
54
+ 'i' => { F1: 240, F2: 2400, rounded: false },
55
+ 'y' => { F1: 235, F2: 2100, rounded: false },
56
+ 'ɪ' => { F1: 300, F2: 2100, rounded: false }, # Guessing From other vowels
57
+ 'e' => { F1: 390, F2: 2300, rounded: false },
58
+ 'ø' => { F1: 370, F2: 1900, rounded: true },
59
+ 'ɛ' => { F1: 610, F2: 1900, rounded: false },
60
+ 'œ' => { F1: 585, F2: 1710, rounded: true },
61
+ 'a' => { F1: 850, F2: 1610, rounded: false },
62
+ 'ɶ' => { F1: 820, F2: 1530, rounded: true },
63
+ 'ɑ' => { F1: 750, F2: 940, rounded: false },
64
+ 'ɒ' => { F1: 700, F2: 760, rounded: true },
65
+
66
+ 'ʌ' => { F1: 600, F2: 1170, rounded: false },
67
+ # copying 'ʌ' for other mid-vowel formants
68
+ 'ə' => { F1: 600, F2: 1170, rounded: false },
69
+ 'ɝ' => { F1: 600, F2: 1170, rounded: false, rhotic: true },
70
+
71
+ 'ɔ' => { F1: 500, F2: 700, rounded: true },
72
+ 'ɤ' => { F1: 460, F2: 1310, rounded: false },
73
+ 'o' => { F1: 360, F2: 640, rounded: true },
74
+ 'ɯ' => { F1: 300, F2: 1390, rounded: false },
75
+ 'æ' => { F1: 800, F2: 1900, rounded: false }, # Guessing From other vowels
76
+ 'u' => { F1: 350, F2: 650, rounded: true }, # Guessing From other vowels
77
+ 'ʊ' => { F1: 350, F2: 650, rounded: true },
78
+ # Frequencies from http://videoweb.nie.edu.sg/phonetic/vowels/measurements.html
79
+ }.freeze
80
+
81
+ def phonemes
82
+ @phonemes ||= FormantFrequencies.keys
83
+ end
84
+
85
+ # Given two vowels, calculate the (pythagorean) distance between them using
86
+ # their F1 and F2 frequencies as x/y coordinates.
87
+ # The return value is scaled to a value between 0 and 1
88
+ # TODO: account for rhoticity (F3)
89
+ def distance(phoneme1, phoneme2)
90
+ formants1 = FormantFrequencies.fetch(phoneme1)
91
+ formants2 = FormantFrequencies.fetch(phoneme2)
92
+
93
+ @minmax_f1 ||= FormantFrequencies.values.minmax { |a, b| a[:F1] <=> b[:F1] }.map { |h| h[:F1] }
94
+ @minmax_f2 ||= FormantFrequencies.values.minmax { |a, b| a[:F2] <=> b[:F2] }.map { |h| h[:F2] }
95
+
96
+ # Get an x and y value for each input phoneme scaled between 0.0 and 1.0
97
+ # We'll use the scaled f1 as the 'x' and the scaled f2 as the 'y'
98
+ scaled_phoneme1_f1 = (formants1[:F1] - @minmax_f1[0]) / @minmax_f1[1].to_f
99
+ scaled_phoneme1_f2 = (formants1[:F2] - @minmax_f2[0]) / @minmax_f2[1].to_f
100
+ scaled_phoneme2_f1 = (formants2[:F1] - @minmax_f1[0]) / @minmax_f1[1].to_f
101
+ scaled_phoneme2_f2 = (formants2[:F2] - @minmax_f2[0]) / @minmax_f2[1].to_f
102
+
103
+ f1_distance = (scaled_phoneme1_f1 - scaled_phoneme2_f1).abs
104
+ f2_distance = (scaled_phoneme1_f2 - scaled_phoneme2_f2).abs
105
+
106
+ # When we have four values we can use the pythagorean theorem on them
107
+ # (order doesn't matter)
108
+ Math.sqrt((f1_distance**2) + (f2_distance**2))
109
+ end
110
+ end
111
+
112
+ module Consonants
113
+ extend self
114
+
115
+ # This chart (columns 2 through the end, anyway) is a direct port of
116
+ # https://en.wikipedia.org/wiki/International_Phonetic_Alphabet#Letters
117
+ # We store the consonant table in this format to make updating it easier.
118
+ #
119
+ # rubocop:disable Layout/TrailingWhitespace
120
+ ChartData = %( | Labio-velar | Bi-labial | Labio-dental | Linguo-labial | Dental | Alveolar | Post-alveolar | Retro-flex | Palatal | Velar | Uvular | Pharyngeal | Glottal
121
+ Nasal | | m̥ m | ɱ | n̼ | | n̥ n | | ɳ̊ ɳ | ɲ̊ ɲ | ŋ̊ ŋ | ɴ | |
122
+ Stop | | p b | p̪ b̪ | t̼ d̼ | | t d | | ʈ ɖ | c ɟ | k g | q ɢ | ʡ | ʔ
123
+ Sibilant fricative | | | | | | s z | ʃ ʒ | ʂ ʐ | ɕ ʑ | | | |
124
+ Non-sibilant fricative | | ɸ β | f v | θ̼ ð̼ | θ ð | θ̠ ð̠ | ɹ̠̊˔ ɹ̠˔ | ɻ˔ | ç ʝ | x ɣ | χ ʁ | ħ ʕ | h ɦ
125
+ Approximant | w | | ʋ̥ ʋ | | | ɹ̥ ɹ | | ɻ̊ ɻ | j̊ j | ɰ̊ ɰ | | | ʔ̞
126
+ Tap/flap | | ⱱ̟ | ⱱ | ɾ̼ | | ɾ̥ ɾ | | ɽ̊ ɽ | | | ɢ̆ | ʡ̆ |
127
+ Trill | | ʙ̥ ʙ | | | | r̥ r | | | | | ʀ̥ ʀ | ʜ ʢ |
128
+ Lateral fricative | | | | | | ɬ ɮ | | ɭ̊˔ ɭ˔ | ʎ̝̊ ʎ̝ | ʟ̝̊ ʟ̝ | | |
129
+ Lateral approximant | | | | | | l̥ l | | ɭ̊ ɭ | ʎ̥ ʎ | ʟ̥ ʟ | ʟ̠ | |
130
+ Lateral tap/flap | | | | | | ɺ | | ɭ̆ | ʎ̆ | ʟ̆ | | |
131
+ )
132
+ # rubocop:enable Layout/TrailingWhitespace
133
+
134
+ # Parse the ChartData into a lookup table where we can retrieve attributes
135
+ # for each phoneme
136
+ def features
137
+ @features ||= begin
138
+ header, *manners = ChartData.lines
139
+
140
+ _, *positions = header.chomp.split(' | ')
141
+ positions.map(&:strip!)
142
+
143
+ # Remove any trailing blank lines
144
+ manners.pop while manners.last.to_s.strip.empty?
145
+
146
+ position_indexes = Hash[*positions.each_with_index.to_a.flatten]
147
+
148
+ @position_count = positions.size
149
+
150
+ manners.each_with_object({}) do |row, phonemes|
151
+ manner, *columns = row.chomp.split(' | ')
152
+ manner.strip!
153
+ positions.zip(columns).each do |position, phoneme_text|
154
+ data = {
155
+ position: position,
156
+ position_index: position_indexes[position],
157
+ manner: manner,
158
+ }
159
+ # If there is a character in the first byte then this articulation
160
+ # has a voiceless phoneme. The symbol may use additional characters
161
+ # as part of the phoneme symbol.
162
+ unless phoneme_text[0] == ' '
163
+ # Take the first non-blank character string
164
+ symbol = phoneme_text.chars.take_while { |char| char != ' ' }.join
165
+ phoneme_text = phoneme_text[symbol.chars.size..-1]
166
+
167
+ phonemes[symbol] = data.merge(voiced: false)
168
+ end
169
+ # If there's a character anywhere left in the string then this
170
+ # articulation has a voiced phoneme
171
+ unless phoneme_text.strip.empty?
172
+ symbol = phoneme_text.strip
173
+ phonemes[symbol] = data.merge(voiced: true)
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end
179
+
180
+ def phonemes
181
+ @phonemes ||= features.keys
182
+ end
183
+
184
+ # Given two consonants, calculate their difference by summing the
185
+ # following:
186
+ # * 0.1 if they are not voiced the same
187
+ # * 0.3 if they are different manners
188
+ # * Up to 0.6 if they are the maximum position difference
189
+ def distance(phoneme1, phoneme2)
190
+ features1 = features[phoneme1]
191
+ features2 = features[phoneme2]
192
+
193
+ penalty = 0
194
+ penalty += 0.1 if features1[:voiced] != features2[:voiced]
195
+
196
+ penalty += 0.3 if features1[:manner] != features2[:manner]
197
+
198
+ # Use up to the remaining 0.6 for penalizing differences in manner
199
+ penalty += 0.6 * ((features1[:position_index] - features2[:position_index]).abs / @position_count.to_f)
200
+ penalty
201
+ end
202
+ end
203
+
204
+ def phonemes
205
+ Vowels.phonemes + Consonants.phonemes
206
+ end
207
+
208
+ Symbols = Consonants.phonemes.reduce({}) { |acc, p| acc.update p => :consonant }.merge(
209
+ Vowels.phonemes.reduce({}) { |acc, p| acc.update p => :vowel }
210
+ )
211
+
212
+ def distance(phoneme1, phoneme2)
213
+ return 0 if phoneme1 == phoneme2
214
+
215
+ distance_map.fetch(phoneme1).fetch(phoneme2)
216
+ end
217
+
218
+ def distance_map
219
+ @distance_map ||= phonemes.permutation(2).each_with_object(Hash.new { |h, k| h[k] = {} }) do |pair, scores|
220
+ p1, p2 = *pair
221
+ score = _distance(p1, p2)
222
+ scores[p1][p2] = score
223
+ scores[p2][p1] = score
224
+ end
225
+ end
226
+
227
+ private
228
+
229
+ def _distance(phoneme1, phoneme2)
230
+ types = [Symbols.fetch(phoneme1), Symbols.fetch(phoneme2)].sort
231
+ if types == %i[consonant vowel]
232
+ 1.0
233
+ elsif types == %i[vowel vowel]
234
+ Vowels.distance(phoneme1, phoneme2)
235
+ elsif types == %i[consonant consonant]
236
+ Consonants.distance(phoneme1, phoneme2)
237
+ end
238
+ end
239
+ end