phonetics 3.0.8 → 3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/gempush.yml +4 -4
- data/.github/workflows/test.yml +4 -6
- data/.rubocop.yml +3 -1
- data/CHANGELOG +4 -0
- data/Makefile +3 -0
- data/VERSION +1 -1
- data/ext/c_levenshtein/phonetic_cost.c +412 -412
- data/lib/phonetics/code_generator.rb +1 -1
- data/lib/phonetics/distances.rb +8 -4
- data/lib/phonetics/transcriptions.rb +7 -3
- metadata +4 -3
data/lib/phonetics/distances.rb
CHANGED
@@ -106,7 +106,11 @@ module Phonetics
|
|
106
106
|
|
107
107
|
# When we have four values we can use the pythagorean theorem on them
|
108
108
|
# (order doesn't matter)
|
109
|
-
Math.sqrt((f1_distance**2) + (f2_distance**2))
|
109
|
+
sqrt = Math.sqrt((f1_distance**2) + (f2_distance**2))
|
110
|
+
|
111
|
+
# Vowels are more similiar to each other than consonants, so we apply a
|
112
|
+
# penalty softening here
|
113
|
+
sqrt / 2.0
|
110
114
|
end
|
111
115
|
end
|
112
116
|
|
@@ -155,9 +159,9 @@ module Phonetics
|
|
155
159
|
manner.strip!
|
156
160
|
positions.zip(columns).each do |position, phoneme_text|
|
157
161
|
data = {
|
158
|
-
|
162
|
+
position: position,
|
159
163
|
position_index: position_indexes[position],
|
160
|
-
|
164
|
+
manner: manner,
|
161
165
|
}
|
162
166
|
# If there is a character in the first byte then this articulation
|
163
167
|
# has a voiceless phoneme. The symbol may use additional characters
|
@@ -165,7 +169,7 @@ module Phonetics
|
|
165
169
|
unless phoneme_text[0] == ' '
|
166
170
|
# Take the first non-blank character string
|
167
171
|
symbol = phoneme_text.chars.take_while { |char| char != ' ' }.join
|
168
|
-
phoneme_text = phoneme_text[symbol.chars.size
|
172
|
+
phoneme_text = phoneme_text[symbol.chars.size..]
|
169
173
|
|
170
174
|
phonemes[symbol] = data.merge(voiced: false)
|
171
175
|
end
|
@@ -29,6 +29,10 @@ module Phonetics
|
|
29
29
|
nil
|
30
30
|
end
|
31
31
|
|
32
|
+
def words
|
33
|
+
transcriptions.keys
|
34
|
+
end
|
35
|
+
|
32
36
|
def transcriptions
|
33
37
|
@transcriptions ||= begin
|
34
38
|
download! unless File.exist?(TranscriptionFile)
|
@@ -120,10 +124,10 @@ module Phonetics
|
|
120
124
|
next if max_rarity && (entry['rarity'].nil? || entry['rarity'] > max_rarity)
|
121
125
|
|
122
126
|
entry_data = {
|
123
|
-
|
127
|
+
word: key,
|
124
128
|
rarity: entry['rarity'],
|
125
129
|
}
|
126
|
-
entry.fetch('ipa', []).
|
130
|
+
entry.fetch('ipa', []).each_value do |transcription|
|
127
131
|
base_trie = construct_trie(base_trie, transcription, entry_data)
|
128
132
|
end
|
129
133
|
end
|
@@ -155,7 +159,7 @@ module Phonetics
|
|
155
159
|
next_char = chars_remaining[0]
|
156
160
|
subtrie[next_char] ||= {}
|
157
161
|
subtrie[next_char][:path] ||= subtrie[:path].to_s + next_char
|
158
|
-
subtrie[next_char] = construct_trie(subtrie[next_char], chars_remaining[1
|
162
|
+
subtrie[next_char] = construct_trie(subtrie[next_char], chars_remaining[1..], entry_data, depth + 1)
|
159
163
|
end
|
160
164
|
subtrie
|
161
165
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: phonetics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0
|
4
|
+
version: 3.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jack Danger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-05-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -107,6 +107,7 @@ files:
|
|
107
107
|
- ".gitignore"
|
108
108
|
- ".rspec"
|
109
109
|
- ".rubocop.yml"
|
110
|
+
- CHANGELOG
|
110
111
|
- CODE_OF_CONDUCT.md
|
111
112
|
- Dockerfile
|
112
113
|
- Gemfile
|
@@ -156,7 +157,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
156
157
|
- !ruby/object:Gem::Version
|
157
158
|
version: '0'
|
158
159
|
requirements: []
|
159
|
-
rubygems_version: 3.
|
160
|
+
rubygems_version: 3.3.7
|
160
161
|
signing_key:
|
161
162
|
specification_version: 4
|
162
163
|
summary: tools for linguistic code using the International Phonetic Alphabet
|