phonetics 3.0.7 → 3.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/gempush.yml +4 -4
- data/.github/workflows/test.yml +4 -5
- data/.rubocop.yml +4 -1
- data/VERSION +1 -1
- data/lib/phonetics/code_generator.rb +2 -2
- data/lib/phonetics/distances.rb +12 -6
- data/lib/phonetics/levenshtein.rb +2 -0
- data/lib/phonetics/ruby_levenshtein.rb +6 -4
- data/lib/phonetics/transcriptions.rb +13 -7
- data/phonetics.gemspec +2 -0
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e47d71b000fa07bd5cdfdf7e2321512370020280e1d9b227547f21673f8f533
|
4
|
+
data.tar.gz: f917449d2c0f4276b3fd214506e99fd12621f4af1fedfebc7f5bcaff62c22ee1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f804627ad6de25b5c9cadef635abd957fb29eea9f7a7a9be7701b98c07c34afd1d6938135b66887a853a50e88d78a2680f9314ad7ee513513f598209ce703015
|
7
|
+
data.tar.gz: c8300d8122b3539da7fe1b687edd0c7a2d4afc98bd908568925a0153d378bb4c73daf6c8a39b5f12524645da942311dc4f469a1650999c90b1c4e34fb7e203d7
|
@@ -3,7 +3,7 @@ name: Ruby Gem
|
|
3
3
|
on:
|
4
4
|
push:
|
5
5
|
branches:
|
6
|
-
-
|
6
|
+
- main
|
7
7
|
|
8
8
|
jobs:
|
9
9
|
build:
|
@@ -12,10 +12,10 @@ jobs:
|
|
12
12
|
|
13
13
|
steps:
|
14
14
|
- uses: actions/checkout@master
|
15
|
-
- name: Set up Ruby
|
16
|
-
uses:
|
15
|
+
- name: Set up Ruby 3.1
|
16
|
+
uses: ruby/setup-ruby@v1
|
17
17
|
with:
|
18
|
-
ruby-version:
|
18
|
+
ruby-version: 3.1.1
|
19
19
|
|
20
20
|
- name: Publish to RubyGems
|
21
21
|
run: |
|
data/.github/workflows/test.yml
CHANGED
@@ -9,12 +9,11 @@ jobs:
|
|
9
9
|
strategy:
|
10
10
|
matrix:
|
11
11
|
ruby:
|
12
|
-
- '
|
13
|
-
- '
|
12
|
+
- '3.1.5'
|
13
|
+
- '3.3.1'
|
14
14
|
steps:
|
15
|
-
- uses: actions/checkout@
|
16
|
-
-
|
17
|
-
uses: actions/setup-ruby@v1
|
15
|
+
- uses: actions/checkout@v4
|
16
|
+
- uses: ruby/setup-ruby@v1
|
18
17
|
with:
|
19
18
|
ruby-version: ${{ matrix.ruby }}
|
20
19
|
architecture: 'x64'
|
data/.rubocop.yml
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
AllCops:
|
3
3
|
Exclude:
|
4
4
|
- Rakefile
|
5
|
+
- tmp/**/*
|
5
6
|
|
6
7
|
Layout/LineLength:
|
7
8
|
Enabled: false
|
@@ -44,4 +45,6 @@ Style/TrailingCommaInHashLiteral:
|
|
44
45
|
|
45
46
|
Layout/HashAlignment:
|
46
47
|
EnforcedHashRocketStyle: separator
|
47
|
-
|
48
|
+
|
49
|
+
Gemspec/RequiredRubyVersion:
|
50
|
+
Enabled: false
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.0.
|
1
|
+
3.0.9
|
@@ -7,7 +7,7 @@ module Phonetics
|
|
7
7
|
class CodeGenerator
|
8
8
|
attr_reader :writer
|
9
9
|
|
10
|
-
def initialize(writer =
|
10
|
+
def initialize(writer = $stdout)
|
11
11
|
@writer = writer
|
12
12
|
end
|
13
13
|
|
@@ -67,7 +67,7 @@ module Phonetics
|
|
67
67
|
|
68
68
|
def ruby_source
|
69
69
|
location = caller_locations.first
|
70
|
-
"#{location.path.split('/')[-4
|
70
|
+
"#{location.path.split('/')[-4..].join('/')}:#{location.lineno}"
|
71
71
|
end
|
72
72
|
|
73
73
|
def indent(depth, line)
|
data/lib/phonetics/distances.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'delegate'
|
4
|
+
require 'set'
|
4
5
|
|
5
6
|
module Phonetics
|
6
7
|
extend self
|
@@ -131,6 +132,8 @@ module Phonetics
|
|
131
132
|
)
|
132
133
|
# rubocop:enable Layout/TrailingWhitespace
|
133
134
|
|
135
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
136
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
134
137
|
# Parse the ChartData into a lookup table where we can retrieve attributes
|
135
138
|
# for each phoneme
|
136
139
|
def features
|
@@ -152,9 +155,9 @@ module Phonetics
|
|
152
155
|
manner.strip!
|
153
156
|
positions.zip(columns).each do |position, phoneme_text|
|
154
157
|
data = {
|
155
|
-
|
158
|
+
position: position,
|
156
159
|
position_index: position_indexes[position],
|
157
|
-
|
160
|
+
manner: manner,
|
158
161
|
}
|
159
162
|
# If there is a character in the first byte then this articulation
|
160
163
|
# has a voiceless phoneme. The symbol may use additional characters
|
@@ -162,7 +165,7 @@ module Phonetics
|
|
162
165
|
unless phoneme_text[0] == ' '
|
163
166
|
# Take the first non-blank character string
|
164
167
|
symbol = phoneme_text.chars.take_while { |char| char != ' ' }.join
|
165
|
-
phoneme_text = phoneme_text[symbol.chars.size
|
168
|
+
phoneme_text = phoneme_text[symbol.chars.size..]
|
166
169
|
|
167
170
|
phonemes[symbol] = data.merge(voiced: false)
|
168
171
|
end
|
@@ -176,6 +179,8 @@ module Phonetics
|
|
176
179
|
end
|
177
180
|
end
|
178
181
|
end
|
182
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
183
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
179
184
|
|
180
185
|
def phonemes
|
181
186
|
@phonemes ||= features.keys
|
@@ -228,11 +233,12 @@ module Phonetics
|
|
228
233
|
|
229
234
|
def _distance(phoneme1, phoneme2)
|
230
235
|
types = [Symbols.fetch(phoneme1), Symbols.fetch(phoneme2)].sort
|
231
|
-
|
236
|
+
case types
|
237
|
+
when %i[consonant vowel]
|
232
238
|
1.0
|
233
|
-
|
239
|
+
when %i[vowel vowel]
|
234
240
|
Vowels.distance(phoneme1, phoneme2)
|
235
|
-
|
241
|
+
when %i[consonant consonant]
|
236
242
|
Consonants.distance(phoneme1, phoneme2)
|
237
243
|
end
|
238
244
|
end
|
@@ -16,10 +16,12 @@ module Phonetics
|
|
16
16
|
module Levenshtein
|
17
17
|
extend ::PhoneticsLevenshteinCBinding
|
18
18
|
|
19
|
+
# rubocop:disable Style/OptionalBooleanParameter
|
19
20
|
def self.distance(str1, str2, verbose = false)
|
20
21
|
return if str1.nil? || str2.nil?
|
21
22
|
|
22
23
|
internal_phonetic_distance(str1, str2, verbose)
|
23
24
|
end
|
25
|
+
# rubocop:enable Style/OptionalBooleanParameter
|
24
26
|
end
|
25
27
|
end
|
@@ -16,6 +16,7 @@ module Phonetics
|
|
16
16
|
class RubyLevenshtein
|
17
17
|
attr_reader :str1, :str2, :len1, :len2, :matrix
|
18
18
|
|
19
|
+
# rubocop:disable Style/OptionalBooleanParameter
|
19
20
|
def initialize(ipa_str1, ipa_str2, verbose = false)
|
20
21
|
@str1 = ipa_str1.each_char.select { |c| Phonetics.phonemes.include?(c) }.join
|
21
22
|
@str2 = ipa_str2.each_char.select { |c| Phonetics.phonemes.include?(c) }.join
|
@@ -26,6 +27,11 @@ module Phonetics
|
|
26
27
|
set_edit_distances(@str1, @str2)
|
27
28
|
end
|
28
29
|
|
30
|
+
def self.distance(str1, str2, verbose = false)
|
31
|
+
new(str1, str2, verbose).distance
|
32
|
+
end
|
33
|
+
# rubocop:enable Style/OptionalBooleanParameter
|
34
|
+
|
29
35
|
def distance
|
30
36
|
return 0 if walk.empty?
|
31
37
|
|
@@ -33,10 +39,6 @@ module Phonetics
|
|
33
39
|
walk.last[:distance]
|
34
40
|
end
|
35
41
|
|
36
|
-
def self.distance(str1, str2, verbose = false)
|
37
|
-
new(str1, str2, verbose).distance
|
38
|
-
end
|
39
|
-
|
40
42
|
private
|
41
43
|
|
42
44
|
def walk
|
@@ -22,13 +22,17 @@ module Phonetics
|
|
22
22
|
return unless entry['ipa']
|
23
23
|
|
24
24
|
SourcesByPreference.each do |preferred_source|
|
25
|
-
entry['ipa'].
|
25
|
+
entry['ipa'].each_key do |source|
|
26
26
|
return entry['ipa'][source] if source =~ preferred_source
|
27
27
|
end
|
28
28
|
end
|
29
29
|
nil
|
30
30
|
end
|
31
31
|
|
32
|
+
def words
|
33
|
+
transcriptions.keys
|
34
|
+
end
|
35
|
+
|
32
36
|
def transcriptions
|
33
37
|
@transcriptions ||= begin
|
34
38
|
download! unless File.exist?(TranscriptionFile)
|
@@ -41,10 +45,13 @@ module Phonetics
|
|
41
45
|
@transcriptions = JSON.parse(File.read(TranscriptionFile))
|
42
46
|
end
|
43
47
|
|
48
|
+
# rubocop:disable Security/Open
|
44
49
|
def download!
|
45
50
|
File.open(Transcriptions, 'w') { |f| f.write(URI.open(TranscriptionsURL).read) }
|
46
51
|
end
|
52
|
+
# rubocop:enable Security/Open
|
47
53
|
|
54
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
48
55
|
def trie(max_rarity = nil)
|
49
56
|
# Let's turn this:
|
50
57
|
#
|
@@ -114,21 +121,20 @@ module Phonetics
|
|
114
121
|
@tries[max_rarity] ||= begin
|
115
122
|
base_trie = {}
|
116
123
|
transcriptions.each do |key, entry|
|
117
|
-
if max_rarity
|
118
|
-
next if entry['rarity'].nil? || entry['rarity'] > max_rarity
|
119
|
-
end
|
124
|
+
next if max_rarity && (entry['rarity'].nil? || entry['rarity'] > max_rarity)
|
120
125
|
|
121
126
|
entry_data = {
|
122
|
-
|
127
|
+
word: key,
|
123
128
|
rarity: entry['rarity'],
|
124
129
|
}
|
125
|
-
entry.fetch('ipa', []).
|
130
|
+
entry.fetch('ipa', []).each_value do |transcription|
|
126
131
|
base_trie = construct_trie(base_trie, transcription, entry_data)
|
127
132
|
end
|
128
133
|
end
|
129
134
|
base_trie.freeze
|
130
135
|
end
|
131
136
|
end
|
137
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
132
138
|
|
133
139
|
def walk(ipa)
|
134
140
|
ipa.each_char.reduce(trie) { |acc, char| acc[char] }
|
@@ -153,7 +159,7 @@ module Phonetics
|
|
153
159
|
next_char = chars_remaining[0]
|
154
160
|
subtrie[next_char] ||= {}
|
155
161
|
subtrie[next_char][:path] ||= subtrie[:path].to_s + next_char
|
156
|
-
subtrie[next_char] = construct_trie(subtrie[next_char], chars_remaining[1
|
162
|
+
subtrie[next_char] = construct_trie(subtrie[next_char], chars_remaining[1..], entry_data, depth + 1)
|
157
163
|
end
|
158
164
|
subtrie
|
159
165
|
end
|
data/phonetics.gemspec
CHANGED
@@ -11,6 +11,8 @@ Gem::Specification.new do |spec|
|
|
11
11
|
spec.homepage = 'https://github.com/JackDanger/phonetics'
|
12
12
|
spec.license = 'MIT'
|
13
13
|
|
14
|
+
spec.required_ruby_version = '>= 2.5'
|
15
|
+
|
14
16
|
spec.extensions = ['ext/c_levenshtein/extconf.rb']
|
15
17
|
|
16
18
|
# Specify which files should be added to the gem when it is released.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: phonetics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jack Danger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-04-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -149,14 +149,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
149
149
|
requirements:
|
150
150
|
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
|
-
version: '
|
152
|
+
version: '2.5'
|
153
153
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
154
154
|
requirements:
|
155
155
|
- - ">="
|
156
156
|
- !ruby/object:Gem::Version
|
157
157
|
version: '0'
|
158
158
|
requirements: []
|
159
|
-
rubygems_version: 3.
|
159
|
+
rubygems_version: 3.3.7
|
160
160
|
signing_key:
|
161
161
|
specification_version: 4
|
162
162
|
summary: tools for linguistic code using the International Phonetic Alphabet
|