phonetics 3.0.8 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/gempush.yml +4 -4
- data/.github/workflows/test.yml +4 -6
- data/.rubocop.yml +3 -1
- data/CHANGELOG +4 -0
- data/Makefile +3 -0
- data/VERSION +1 -1
- data/ext/c_levenshtein/phonetic_cost.c +412 -412
- data/lib/phonetics/code_generator.rb +1 -1
- data/lib/phonetics/distances.rb +8 -4
- data/lib/phonetics/transcriptions.rb +7 -3
- metadata +4 -3
data/lib/phonetics/distances.rb
CHANGED
@@ -106,7 +106,11 @@ module Phonetics
|
|
106
106
|
|
107
107
|
# When we have four values we can use the pythagorean theorem on them
|
108
108
|
# (order doesn't matter)
|
109
|
-
Math.sqrt((f1_distance**2) + (f2_distance**2))
|
109
|
+
sqrt = Math.sqrt((f1_distance**2) + (f2_distance**2))
|
110
|
+
|
111
|
+
# Vowels are more similiar to each other than consonants, so we apply a
|
112
|
+
# penalty softening here
|
113
|
+
sqrt / 2.0
|
110
114
|
end
|
111
115
|
end
|
112
116
|
|
@@ -155,9 +159,9 @@ module Phonetics
|
|
155
159
|
manner.strip!
|
156
160
|
positions.zip(columns).each do |position, phoneme_text|
|
157
161
|
data = {
|
158
|
-
|
162
|
+
position: position,
|
159
163
|
position_index: position_indexes[position],
|
160
|
-
|
164
|
+
manner: manner,
|
161
165
|
}
|
162
166
|
# If there is a character in the first byte then this articulation
|
163
167
|
# has a voiceless phoneme. The symbol may use additional characters
|
@@ -165,7 +169,7 @@ module Phonetics
|
|
165
169
|
unless phoneme_text[0] == ' '
|
166
170
|
# Take the first non-blank character string
|
167
171
|
symbol = phoneme_text.chars.take_while { |char| char != ' ' }.join
|
168
|
-
phoneme_text = phoneme_text[symbol.chars.size
|
172
|
+
phoneme_text = phoneme_text[symbol.chars.size..]
|
169
173
|
|
170
174
|
phonemes[symbol] = data.merge(voiced: false)
|
171
175
|
end
|
@@ -29,6 +29,10 @@ module Phonetics
|
|
29
29
|
nil
|
30
30
|
end
|
31
31
|
|
32
|
+
def words
|
33
|
+
transcriptions.keys
|
34
|
+
end
|
35
|
+
|
32
36
|
def transcriptions
|
33
37
|
@transcriptions ||= begin
|
34
38
|
download! unless File.exist?(TranscriptionFile)
|
@@ -120,10 +124,10 @@ module Phonetics
|
|
120
124
|
next if max_rarity && (entry['rarity'].nil? || entry['rarity'] > max_rarity)
|
121
125
|
|
122
126
|
entry_data = {
|
123
|
-
|
127
|
+
word: key,
|
124
128
|
rarity: entry['rarity'],
|
125
129
|
}
|
126
|
-
entry.fetch('ipa', []).
|
130
|
+
entry.fetch('ipa', []).each_value do |transcription|
|
127
131
|
base_trie = construct_trie(base_trie, transcription, entry_data)
|
128
132
|
end
|
129
133
|
end
|
@@ -155,7 +159,7 @@ module Phonetics
|
|
155
159
|
next_char = chars_remaining[0]
|
156
160
|
subtrie[next_char] ||= {}
|
157
161
|
subtrie[next_char][:path] ||= subtrie[:path].to_s + next_char
|
158
|
-
subtrie[next_char] = construct_trie(subtrie[next_char], chars_remaining[1
|
162
|
+
subtrie[next_char] = construct_trie(subtrie[next_char], chars_remaining[1..], entry_data, depth + 1)
|
159
163
|
end
|
160
164
|
subtrie
|
161
165
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: phonetics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0
|
4
|
+
version: 3.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jack Danger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-05-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -107,6 +107,7 @@ files:
|
|
107
107
|
- ".gitignore"
|
108
108
|
- ".rspec"
|
109
109
|
- ".rubocop.yml"
|
110
|
+
- CHANGELOG
|
110
111
|
- CODE_OF_CONDUCT.md
|
111
112
|
- Dockerfile
|
112
113
|
- Gemfile
|
@@ -156,7 +157,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
156
157
|
- !ruby/object:Gem::Version
|
157
158
|
version: '0'
|
158
159
|
requirements: []
|
159
|
-
rubygems_version: 3.
|
160
|
+
rubygems_version: 3.3.7
|
160
161
|
signing_key:
|
161
162
|
specification_version: 4
|
162
163
|
summary: tools for linguistic code using the International Phonetic Alphabet
|