phonetics 3.0.7 → 3.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/gempush.yml +4 -4
- data/.github/workflows/test.yml +4 -5
- data/.rubocop.yml +4 -1
- data/VERSION +1 -1
- data/lib/phonetics/code_generator.rb +2 -2
- data/lib/phonetics/distances.rb +12 -6
- data/lib/phonetics/levenshtein.rb +2 -0
- data/lib/phonetics/ruby_levenshtein.rb +6 -4
- data/lib/phonetics/transcriptions.rb +13 -7
- data/phonetics.gemspec +2 -0
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e47d71b000fa07bd5cdfdf7e2321512370020280e1d9b227547f21673f8f533
|
4
|
+
data.tar.gz: f917449d2c0f4276b3fd214506e99fd12621f4af1fedfebc7f5bcaff62c22ee1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f804627ad6de25b5c9cadef635abd957fb29eea9f7a7a9be7701b98c07c34afd1d6938135b66887a853a50e88d78a2680f9314ad7ee513513f598209ce703015
|
7
|
+
data.tar.gz: c8300d8122b3539da7fe1b687edd0c7a2d4afc98bd908568925a0153d378bb4c73daf6c8a39b5f12524645da942311dc4f469a1650999c90b1c4e34fb7e203d7
|
@@ -3,7 +3,7 @@ name: Ruby Gem
|
|
3
3
|
on:
|
4
4
|
push:
|
5
5
|
branches:
|
6
|
-
-
|
6
|
+
- main
|
7
7
|
|
8
8
|
jobs:
|
9
9
|
build:
|
@@ -12,10 +12,10 @@ jobs:
|
|
12
12
|
|
13
13
|
steps:
|
14
14
|
- uses: actions/checkout@master
|
15
|
-
- name: Set up Ruby
|
16
|
-
uses:
|
15
|
+
- name: Set up Ruby 3.1
|
16
|
+
uses: ruby/setup-ruby@v1
|
17
17
|
with:
|
18
|
-
ruby-version:
|
18
|
+
ruby-version: 3.1.1
|
19
19
|
|
20
20
|
- name: Publish to RubyGems
|
21
21
|
run: |
|
data/.github/workflows/test.yml
CHANGED
@@ -9,12 +9,11 @@ jobs:
|
|
9
9
|
strategy:
|
10
10
|
matrix:
|
11
11
|
ruby:
|
12
|
-
- '
|
13
|
-
- '
|
12
|
+
- '3.1.5'
|
13
|
+
- '3.3.1'
|
14
14
|
steps:
|
15
|
-
- uses: actions/checkout@
|
16
|
-
-
|
17
|
-
uses: actions/setup-ruby@v1
|
15
|
+
- uses: actions/checkout@v4
|
16
|
+
- uses: ruby/setup-ruby@v1
|
18
17
|
with:
|
19
18
|
ruby-version: ${{ matrix.ruby }}
|
20
19
|
architecture: 'x64'
|
data/.rubocop.yml
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
AllCops:
|
3
3
|
Exclude:
|
4
4
|
- Rakefile
|
5
|
+
- tmp/**/*
|
5
6
|
|
6
7
|
Layout/LineLength:
|
7
8
|
Enabled: false
|
@@ -44,4 +45,6 @@ Style/TrailingCommaInHashLiteral:
|
|
44
45
|
|
45
46
|
Layout/HashAlignment:
|
46
47
|
EnforcedHashRocketStyle: separator
|
47
|
-
|
48
|
+
|
49
|
+
Gemspec/RequiredRubyVersion:
|
50
|
+
Enabled: false
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.0.
|
1
|
+
3.0.9
|
@@ -7,7 +7,7 @@ module Phonetics
|
|
7
7
|
class CodeGenerator
|
8
8
|
attr_reader :writer
|
9
9
|
|
10
|
-
def initialize(writer =
|
10
|
+
def initialize(writer = $stdout)
|
11
11
|
@writer = writer
|
12
12
|
end
|
13
13
|
|
@@ -67,7 +67,7 @@ module Phonetics
|
|
67
67
|
|
68
68
|
def ruby_source
|
69
69
|
location = caller_locations.first
|
70
|
-
"#{location.path.split('/')[-4
|
70
|
+
"#{location.path.split('/')[-4..].join('/')}:#{location.lineno}"
|
71
71
|
end
|
72
72
|
|
73
73
|
def indent(depth, line)
|
data/lib/phonetics/distances.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'delegate'
|
4
|
+
require 'set'
|
4
5
|
|
5
6
|
module Phonetics
|
6
7
|
extend self
|
@@ -131,6 +132,8 @@ module Phonetics
|
|
131
132
|
)
|
132
133
|
# rubocop:enable Layout/TrailingWhitespace
|
133
134
|
|
135
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
136
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
134
137
|
# Parse the ChartData into a lookup table where we can retrieve attributes
|
135
138
|
# for each phoneme
|
136
139
|
def features
|
@@ -152,9 +155,9 @@ module Phonetics
|
|
152
155
|
manner.strip!
|
153
156
|
positions.zip(columns).each do |position, phoneme_text|
|
154
157
|
data = {
|
155
|
-
|
158
|
+
position: position,
|
156
159
|
position_index: position_indexes[position],
|
157
|
-
|
160
|
+
manner: manner,
|
158
161
|
}
|
159
162
|
# If there is a character in the first byte then this articulation
|
160
163
|
# has a voiceless phoneme. The symbol may use additional characters
|
@@ -162,7 +165,7 @@ module Phonetics
|
|
162
165
|
unless phoneme_text[0] == ' '
|
163
166
|
# Take the first non-blank character string
|
164
167
|
symbol = phoneme_text.chars.take_while { |char| char != ' ' }.join
|
165
|
-
phoneme_text = phoneme_text[symbol.chars.size
|
168
|
+
phoneme_text = phoneme_text[symbol.chars.size..]
|
166
169
|
|
167
170
|
phonemes[symbol] = data.merge(voiced: false)
|
168
171
|
end
|
@@ -176,6 +179,8 @@ module Phonetics
|
|
176
179
|
end
|
177
180
|
end
|
178
181
|
end
|
182
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
183
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
179
184
|
|
180
185
|
def phonemes
|
181
186
|
@phonemes ||= features.keys
|
@@ -228,11 +233,12 @@ module Phonetics
|
|
228
233
|
|
229
234
|
def _distance(phoneme1, phoneme2)
|
230
235
|
types = [Symbols.fetch(phoneme1), Symbols.fetch(phoneme2)].sort
|
231
|
-
|
236
|
+
case types
|
237
|
+
when %i[consonant vowel]
|
232
238
|
1.0
|
233
|
-
|
239
|
+
when %i[vowel vowel]
|
234
240
|
Vowels.distance(phoneme1, phoneme2)
|
235
|
-
|
241
|
+
when %i[consonant consonant]
|
236
242
|
Consonants.distance(phoneme1, phoneme2)
|
237
243
|
end
|
238
244
|
end
|
@@ -16,10 +16,12 @@ module Phonetics
|
|
16
16
|
module Levenshtein
|
17
17
|
extend ::PhoneticsLevenshteinCBinding
|
18
18
|
|
19
|
+
# rubocop:disable Style/OptionalBooleanParameter
|
19
20
|
def self.distance(str1, str2, verbose = false)
|
20
21
|
return if str1.nil? || str2.nil?
|
21
22
|
|
22
23
|
internal_phonetic_distance(str1, str2, verbose)
|
23
24
|
end
|
25
|
+
# rubocop:enable Style/OptionalBooleanParameter
|
24
26
|
end
|
25
27
|
end
|
@@ -16,6 +16,7 @@ module Phonetics
|
|
16
16
|
class RubyLevenshtein
|
17
17
|
attr_reader :str1, :str2, :len1, :len2, :matrix
|
18
18
|
|
19
|
+
# rubocop:disable Style/OptionalBooleanParameter
|
19
20
|
def initialize(ipa_str1, ipa_str2, verbose = false)
|
20
21
|
@str1 = ipa_str1.each_char.select { |c| Phonetics.phonemes.include?(c) }.join
|
21
22
|
@str2 = ipa_str2.each_char.select { |c| Phonetics.phonemes.include?(c) }.join
|
@@ -26,6 +27,11 @@ module Phonetics
|
|
26
27
|
set_edit_distances(@str1, @str2)
|
27
28
|
end
|
28
29
|
|
30
|
+
def self.distance(str1, str2, verbose = false)
|
31
|
+
new(str1, str2, verbose).distance
|
32
|
+
end
|
33
|
+
# rubocop:enable Style/OptionalBooleanParameter
|
34
|
+
|
29
35
|
def distance
|
30
36
|
return 0 if walk.empty?
|
31
37
|
|
@@ -33,10 +39,6 @@ module Phonetics
|
|
33
39
|
walk.last[:distance]
|
34
40
|
end
|
35
41
|
|
36
|
-
def self.distance(str1, str2, verbose = false)
|
37
|
-
new(str1, str2, verbose).distance
|
38
|
-
end
|
39
|
-
|
40
42
|
private
|
41
43
|
|
42
44
|
def walk
|
@@ -22,13 +22,17 @@ module Phonetics
|
|
22
22
|
return unless entry['ipa']
|
23
23
|
|
24
24
|
SourcesByPreference.each do |preferred_source|
|
25
|
-
entry['ipa'].
|
25
|
+
entry['ipa'].each_key do |source|
|
26
26
|
return entry['ipa'][source] if source =~ preferred_source
|
27
27
|
end
|
28
28
|
end
|
29
29
|
nil
|
30
30
|
end
|
31
31
|
|
32
|
+
def words
|
33
|
+
transcriptions.keys
|
34
|
+
end
|
35
|
+
|
32
36
|
def transcriptions
|
33
37
|
@transcriptions ||= begin
|
34
38
|
download! unless File.exist?(TranscriptionFile)
|
@@ -41,10 +45,13 @@ module Phonetics
|
|
41
45
|
@transcriptions = JSON.parse(File.read(TranscriptionFile))
|
42
46
|
end
|
43
47
|
|
48
|
+
# rubocop:disable Security/Open
|
44
49
|
def download!
|
45
50
|
File.open(Transcriptions, 'w') { |f| f.write(URI.open(TranscriptionsURL).read) }
|
46
51
|
end
|
52
|
+
# rubocop:enable Security/Open
|
47
53
|
|
54
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
48
55
|
def trie(max_rarity = nil)
|
49
56
|
# Let's turn this:
|
50
57
|
#
|
@@ -114,21 +121,20 @@ module Phonetics
|
|
114
121
|
@tries[max_rarity] ||= begin
|
115
122
|
base_trie = {}
|
116
123
|
transcriptions.each do |key, entry|
|
117
|
-
if max_rarity
|
118
|
-
next if entry['rarity'].nil? || entry['rarity'] > max_rarity
|
119
|
-
end
|
124
|
+
next if max_rarity && (entry['rarity'].nil? || entry['rarity'] > max_rarity)
|
120
125
|
|
121
126
|
entry_data = {
|
122
|
-
|
127
|
+
word: key,
|
123
128
|
rarity: entry['rarity'],
|
124
129
|
}
|
125
|
-
entry.fetch('ipa', []).
|
130
|
+
entry.fetch('ipa', []).each_value do |transcription|
|
126
131
|
base_trie = construct_trie(base_trie, transcription, entry_data)
|
127
132
|
end
|
128
133
|
end
|
129
134
|
base_trie.freeze
|
130
135
|
end
|
131
136
|
end
|
137
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
132
138
|
|
133
139
|
def walk(ipa)
|
134
140
|
ipa.each_char.reduce(trie) { |acc, char| acc[char] }
|
@@ -153,7 +159,7 @@ module Phonetics
|
|
153
159
|
next_char = chars_remaining[0]
|
154
160
|
subtrie[next_char] ||= {}
|
155
161
|
subtrie[next_char][:path] ||= subtrie[:path].to_s + next_char
|
156
|
-
subtrie[next_char] = construct_trie(subtrie[next_char], chars_remaining[1
|
162
|
+
subtrie[next_char] = construct_trie(subtrie[next_char], chars_remaining[1..], entry_data, depth + 1)
|
157
163
|
end
|
158
164
|
subtrie
|
159
165
|
end
|
data/phonetics.gemspec
CHANGED
@@ -11,6 +11,8 @@ Gem::Specification.new do |spec|
|
|
11
11
|
spec.homepage = 'https://github.com/JackDanger/phonetics'
|
12
12
|
spec.license = 'MIT'
|
13
13
|
|
14
|
+
spec.required_ruby_version = '>= 2.5'
|
15
|
+
|
14
16
|
spec.extensions = ['ext/c_levenshtein/extconf.rb']
|
15
17
|
|
16
18
|
# Specify which files should be added to the gem when it is released.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: phonetics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jack Danger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-04-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -149,14 +149,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
149
149
|
requirements:
|
150
150
|
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
|
-
version: '
|
152
|
+
version: '2.5'
|
153
153
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
154
154
|
requirements:
|
155
155
|
- - ">="
|
156
156
|
- !ruby/object:Gem::Version
|
157
157
|
version: '0'
|
158
158
|
requirements: []
|
159
|
-
rubygems_version: 3.
|
159
|
+
rubygems_version: 3.3.7
|
160
160
|
signing_key:
|
161
161
|
specification_version: 4
|
162
162
|
summary: tools for linguistic code using the International Phonetic Alphabet
|