promper 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/LICENSE.txt +21 -0
- data/README.md +47 -0
- data/Rakefile +2 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/db.rb +10 -0
- data/lib/db/master.yml +416576 -0
- data/lib/gab_tree.rb +165 -0
- data/lib/gab_tree_processor.rb +69 -0
- data/lib/matching.rb +232 -0
- data/lib/promper.rb +39 -0
- data/lib/promper/version.rb +3 -0
- data/promper.gemspec +60 -0
- metadata +152 -0
data/lib/gab_tree.rb
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
class GabTree
|
2
|
+
require 'tree'
|
3
|
+
require 'string_to_ipa'
|
4
|
+
require './matching'
|
5
|
+
require 'test/unit'
|
6
|
+
|
7
|
+
|
8
|
+
attr_reader :match_critera, :phoneme_convert
|
9
|
+
attr_accessor :root_node, :node_counter, :target_phoneme
|
10
|
+
|
11
|
+
def initialize(target_phoneme, args={})
|
12
|
+
@target_phoneme = target_phoneme
|
13
|
+
@phoneme_convert = args[:phoneme_convert] || default_phoneme_convert
|
14
|
+
@match_critera = args[:match_criteria] || default_match_criteria
|
15
|
+
|
16
|
+
if @phoneme_convert == true
|
17
|
+
@target_phoneme = target_phoneme_convert
|
18
|
+
else
|
19
|
+
@target_phoneme = default_target_phoneme
|
20
|
+
end
|
21
|
+
|
22
|
+
@node_counter = 0
|
23
|
+
@root_node = Tree::TreeNode.new(
|
24
|
+
node_counter_to_string, ["NONE IM THE ROOT!", @target_phoneme,
|
25
|
+
state: :ready_to_continue])
|
26
|
+
end
|
27
|
+
|
28
|
+
def default_phoneme_convert
|
29
|
+
false
|
30
|
+
end
|
31
|
+
|
32
|
+
def default_match_criteria
|
33
|
+
ExactMatch.new
|
34
|
+
end
|
35
|
+
|
36
|
+
def target_phoneme_convert
|
37
|
+
if @phoneme_convert == true
|
38
|
+
list_of_words = @target_phoneme.split
|
39
|
+
ipa_list = list_of_words.map do |word|
|
40
|
+
word.to_ipa.tr('ˈˌ', '')
|
41
|
+
end
|
42
|
+
ipa_list = ipa_list.join
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def default_target_phoneme
|
47
|
+
target_phoneme.downcase.gsub(/\s+/, "")
|
48
|
+
end
|
49
|
+
|
50
|
+
def node_counter_to_string
|
51
|
+
node_counter.to_s
|
52
|
+
end
|
53
|
+
|
54
|
+
def iterate_until_finished
|
55
|
+
until finished? do
|
56
|
+
iterate_over_leaves
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def finished?
|
61
|
+
@finished_check = false
|
62
|
+
|
63
|
+
nodes = check_leaves
|
64
|
+
nodes.each do |node|
|
65
|
+
if check_node_state(node) == :completed # How can I do this on one line?
|
66
|
+
@finished_check = true
|
67
|
+
elsif check_node_state(node) == :aborted
|
68
|
+
@finished_check = true
|
69
|
+
else
|
70
|
+
@finished_check = false
|
71
|
+
break
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
@finished_check
|
76
|
+
end
|
77
|
+
|
78
|
+
def iterate_over_leaves
|
79
|
+
nodes = check_leaves
|
80
|
+
|
81
|
+
nodes.each do |node|
|
82
|
+
|
83
|
+
unless check_node_state(node) == :ready_to_continue
|
84
|
+
next
|
85
|
+
else
|
86
|
+
if checked_all_target_phonemes?(node)
|
87
|
+
change_node_state_completed(node)
|
88
|
+
else
|
89
|
+
try_to_make_leaves(node)
|
90
|
+
change_node_state_completed(node)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
if failedToMakeLeaves?(node)
|
95
|
+
change_node_state_aborted(node)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def check_leaves
|
101
|
+
root_node.each_leaf
|
102
|
+
end
|
103
|
+
|
104
|
+
def check_node_state(node)
|
105
|
+
node.content[2][:state]
|
106
|
+
end
|
107
|
+
|
108
|
+
def checked_all_target_phonemes?(node)
|
109
|
+
begin
|
110
|
+
node.content[1].length == 0
|
111
|
+
rescue
|
112
|
+
true
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def change_node_state_aborted(node)
|
117
|
+
node.content[2][:state] = :aborted
|
118
|
+
end
|
119
|
+
|
120
|
+
def change_node_state_completed(node)
|
121
|
+
node.content[2][:state] = :completed
|
122
|
+
end
|
123
|
+
|
124
|
+
def failedToMakeLeaves?(node)
|
125
|
+
node.is_leaf? && !checked_all_target_phonemes?(node)
|
126
|
+
end
|
127
|
+
|
128
|
+
def try_to_make_leaves(node)
|
129
|
+
@remaining_phoneme = get_remaining_phoneme_from(node)
|
130
|
+
@match_array = make_match_array(@remaining_phoneme)
|
131
|
+
unless @match_array == nil
|
132
|
+
make_child_nodes(@match_array, node)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def get_remaining_phoneme_from(node)
|
137
|
+
node.content[1]
|
138
|
+
end
|
139
|
+
|
140
|
+
def make_match_array(node_target_phoneme) # Consider renaming to remaining_phoneme
|
141
|
+
match_array = match_critera.match(node_target_phoneme)
|
142
|
+
match_array
|
143
|
+
end
|
144
|
+
|
145
|
+
# Put as a separate method to remind me I have to figure out
|
146
|
+
# what is going on! For some reason I can't increment
|
147
|
+
# the counter if I use node_counter += 1. Strange!
|
148
|
+
def node_counter_increment
|
149
|
+
@node_counter += 1
|
150
|
+
end
|
151
|
+
|
152
|
+
def make_child_nodes(match_array, node)
|
153
|
+
match_array.each do |matched_phoneme, remaining_phoneme_chunk|
|
154
|
+
node_counter_increment
|
155
|
+
node << Tree::TreeNode.new(
|
156
|
+
node_counter_to_string, [matched_phoneme, remaining_phoneme_chunk,
|
157
|
+
state: :ready_to_continue])
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
|
165
|
+
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require './gab_tree'
|
2
|
+
class GabTreeProcessor < GabTree
|
3
|
+
attr_reader :gab_tree
|
4
|
+
def initialize(gab_tree) # Root node of gab_tree
|
5
|
+
@gab_tree = gab_tree
|
6
|
+
end
|
7
|
+
|
8
|
+
def result
|
9
|
+
leaf_phoneme_array_from_completed_tree
|
10
|
+
end
|
11
|
+
|
12
|
+
def check_leaves # This seems to duplicate the GabTree class code.
|
13
|
+
gab_tree.each_leaf
|
14
|
+
end
|
15
|
+
|
16
|
+
def leaf_phoneme_array_from_completed_tree
|
17
|
+
nodes = check_leaves()
|
18
|
+
node_parents_path = node_parents_path_from_nodes(nodes)
|
19
|
+
parents_content = conv_node_parents_path_to_parents_content(node_parents_path)
|
20
|
+
leafs_content = get_leaf_content(nodes)
|
21
|
+
zipped_content = leafs_content.zip(parents_content)
|
22
|
+
massage_zipped_content(zipped_content)
|
23
|
+
end
|
24
|
+
|
25
|
+
def only_keep_completed_nodes(nodes)
|
26
|
+
nodes.each do |node|
|
27
|
+
if node.content[2][:state] != :completed
|
28
|
+
nodes.delete(node)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def node_parents_path_from_nodes(nodes)
|
34
|
+
nodes = nodes.select {|node| node.content[2][:state] == :completed }
|
35
|
+
nodes.map do |node|
|
36
|
+
parents = node.parentage
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def conv_node_parents_path_to_parents_content(node_parents_path)
|
41
|
+
node_parents_path.map do |node_path|
|
42
|
+
content_from_node_path(node_path)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def content_from_node_path(node_path)
|
47
|
+
node_path.map do |node|
|
48
|
+
node.content[0]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_leaf_content(nodes)
|
53
|
+
nodes = nodes.select {|node| node.content[2][:state] == :completed }
|
54
|
+
nodes.map do |node|
|
55
|
+
node.content[0]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def massage_zipped_content(zipped_content)
|
60
|
+
flattened_matches = zipped_content.map do |match|
|
61
|
+
match.flatten
|
62
|
+
end
|
63
|
+
|
64
|
+
flattened_matches.map do |match|
|
65
|
+
match.pop
|
66
|
+
match.reverse
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/lib/matching.rb
ADDED
@@ -0,0 +1,232 @@
|
|
1
|
+
|
2
|
+
require './db'
|
3
|
+
|
4
|
+
class BasicMatching
|
5
|
+
include DataBase
|
6
|
+
|
7
|
+
def removeTooLongPotentialMatches(
|
8
|
+
potential_match_array, node_target_phoneme)
|
9
|
+
potential_match_array.each do |word|
|
10
|
+
if word.length > node_target_phoneme.length
|
11
|
+
potential_match_array.delete(word)
|
12
|
+
else
|
13
|
+
next
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class ExactMatch < BasicMatching
|
20
|
+
def initialize
|
21
|
+
super
|
22
|
+
end
|
23
|
+
|
24
|
+
def determine_first_letter(node_target_phoneme)
|
25
|
+
node_target_phoneme[0]
|
26
|
+
end
|
27
|
+
|
28
|
+
def pull_relevant_values_from_dict(key)
|
29
|
+
db[key]
|
30
|
+
end
|
31
|
+
|
32
|
+
def match(node_target_phoneme)
|
33
|
+
first_letter = determine_first_letter(node_target_phoneme)
|
34
|
+
relevant_values = pull_relevant_values_from_dict(first_letter)
|
35
|
+
matches = relevant_values.select {|word, phoneme_representation| passes_tests?(word, node_target_phoneme)}
|
36
|
+
phoneme_chunks = determine_remaining_phoneme_chunks(matches, node_target_phoneme)
|
37
|
+
combine_matches_and_phoneme_chunks(matches,phoneme_chunks)
|
38
|
+
end
|
39
|
+
|
40
|
+
def passes_tests?(word, node_target_phoneme)
|
41
|
+
letter_match?(word, node_target_phoneme) &&
|
42
|
+
length_okay?(word, node_target_phoneme) &&
|
43
|
+
remaining_letter_match?(word, node_target_phoneme)
|
44
|
+
end
|
45
|
+
|
46
|
+
def letter_match?(word, node_target_phoneme, index=0)
|
47
|
+
word[index] == node_target_phoneme[index]
|
48
|
+
end
|
49
|
+
|
50
|
+
def length_okay?(word, node_target_phoneme)
|
51
|
+
word.length <= node_target_phoneme.length
|
52
|
+
end
|
53
|
+
|
54
|
+
def remaining_letter_match?(word, node_target_phoneme)
|
55
|
+
word_length = word.length
|
56
|
+
word_length.times do |letter|
|
57
|
+
unless letter_match?(word, node_target_phoneme, index=letter)
|
58
|
+
return false
|
59
|
+
end
|
60
|
+
end
|
61
|
+
true
|
62
|
+
end
|
63
|
+
|
64
|
+
def determine_remaining_phoneme_chunks(final_matches, node_target_phoneme)
|
65
|
+
chunks = final_matches.map do |phonemes|
|
66
|
+
phoneme_length = phonemes.length
|
67
|
+
this_chunk = node_target_phoneme[phoneme_length..-1]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def combine_matches_and_phoneme_chunks(final_matches,phoneme_chunks)
|
72
|
+
final_matches.zip(phoneme_chunks)
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
class LooseMatch < BasicMatching
|
78
|
+
attr_reader :basic_equivalence_dict, :prev_switch
|
79
|
+
attr_accessor :node_target_phoneme, :previously_searched_phonemes
|
80
|
+
def initialize
|
81
|
+
super
|
82
|
+
@basic_equivalence_dict = {
|
83
|
+
"i" => %w[ɪ ə],
|
84
|
+
"z" => %w[s],
|
85
|
+
"ə" => %w[i ɪ ʌ æ u],
|
86
|
+
"u" => %w[ə],
|
87
|
+
"ɪ" => %w[i],
|
88
|
+
"d" => %w[t],
|
89
|
+
"æ" => %w[ə ʌ ɛ],
|
90
|
+
"s" => %w[z],
|
91
|
+
"ɝ" => %w[r],
|
92
|
+
"r" => %w[ɝ],
|
93
|
+
"a" => %w[ʌ ə],
|
94
|
+
"p" => %w[b],
|
95
|
+
"t" => %w[d],
|
96
|
+
"g" => %w[k],
|
97
|
+
"ʌ" => %w[ə æ i a],
|
98
|
+
"ɛ" => %w[æ],
|
99
|
+
"b" => %w[p],
|
100
|
+
"f" => %w[v],
|
101
|
+
"v" => %w[f],
|
102
|
+
"k" => %w[g],
|
103
|
+
"j" => %w[ʃ],
|
104
|
+
"ʃ" => %w[j]
|
105
|
+
}
|
106
|
+
@previously_searched_phonemes = {}
|
107
|
+
end
|
108
|
+
|
109
|
+
def add_to_prev_phones(node_target_phoneme, matches)
|
110
|
+
previously_searched_phonemes[node_target_phoneme] = matches
|
111
|
+
end
|
112
|
+
|
113
|
+
def one_phoneme_remaining?(node_target_phoneme)
|
114
|
+
begin
|
115
|
+
node_target_phoneme.length == 1
|
116
|
+
rescue
|
117
|
+
false
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def bail
|
122
|
+
[node_target_phoneme, ""]
|
123
|
+
end
|
124
|
+
|
125
|
+
def no_phonemes_remaining?(node_target_phoneme)
|
126
|
+
node_target_phoneme == nil ||
|
127
|
+
node_target_phoneme.length == 0
|
128
|
+
end
|
129
|
+
|
130
|
+
def target_phoneme_convert(node_target_phoneme)
|
131
|
+
list_of_words = node_target_phoneme.split
|
132
|
+
ipa_list = list_of_words.map do |word|
|
133
|
+
word.to_ipa.tr('ˈˌ', '')
|
134
|
+
end
|
135
|
+
ipa_list = ipa_list.join
|
136
|
+
end
|
137
|
+
|
138
|
+
def match(node_target_phoneme)
|
139
|
+
if previously_searched_phonemes.key? node_target_phoneme
|
140
|
+
return previously_searched_phonemes[node_target_phoneme]
|
141
|
+
end
|
142
|
+
if one_phoneme_remaining?(node_target_phoneme)
|
143
|
+
bail
|
144
|
+
elsif no_phonemes_remaining?(node_target_phoneme)
|
145
|
+
return nil
|
146
|
+
else
|
147
|
+
first_letter = determine_first_letter(node_target_phoneme)
|
148
|
+
begin
|
149
|
+
relevant_values = pull_relevant_values_from_dict(first_letter)
|
150
|
+
|
151
|
+
matches = get_relevant_matches(relevant_values, node_target_phoneme)
|
152
|
+
if matches.empty?
|
153
|
+
add_to_prev_phones(node_target_phoneme, nil)
|
154
|
+
return nil
|
155
|
+
else
|
156
|
+
chunks = matches.map do |word, phoneme_representation|
|
157
|
+
determine_remaining_phoneme_chunks(phoneme_representation, node_target_phoneme)
|
158
|
+
end
|
159
|
+
matches = post_processing(chunks, matches)
|
160
|
+
end
|
161
|
+
rescue
|
162
|
+
puts "Inner rescue"
|
163
|
+
puts $!, $@
|
164
|
+
end
|
165
|
+
end
|
166
|
+
add_to_prev_phones(node_target_phoneme, matches)
|
167
|
+
matches
|
168
|
+
end
|
169
|
+
|
170
|
+
def determine_first_letter(node_target_phoneme)
|
171
|
+
node_target_phoneme[0]
|
172
|
+
end
|
173
|
+
|
174
|
+
def pull_relevant_values_from_dict(key)
|
175
|
+
initial_values = db[key]
|
176
|
+
if basic_equivalence_dict.has_key?(key)
|
177
|
+
equilvalent_keys = basic_equivalence_dict[key]
|
178
|
+
equilvalent_keys.map do |key|
|
179
|
+
db[key].each do |word|
|
180
|
+
initial_values << word
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
initial_values
|
185
|
+
end
|
186
|
+
|
187
|
+
def get_relevant_matches(relevant_values, node_target_phoneme)
|
188
|
+
rel = relevant_values.map {
|
189
|
+
|word, phoneme_representation| [word, phoneme_representation] if passes_tests?(phoneme_representation, node_target_phoneme)}
|
190
|
+
rel.compact
|
191
|
+
end
|
192
|
+
|
193
|
+
def passes_tests?(word, node_target_phoneme)
|
194
|
+
letter_match?(word, node_target_phoneme) &&
|
195
|
+
length_okay?(word, node_target_phoneme) &&
|
196
|
+
remaining_letter_match?(word, node_target_phoneme)
|
197
|
+
end
|
198
|
+
|
199
|
+
def letter_match?(word, node_target_phoneme, index=0)
|
200
|
+
word[index] == node_target_phoneme[index] ||
|
201
|
+
(basic_equivalence_dict.key?(word[index]) &&
|
202
|
+
basic_equivalence_dict[word[index]].include?(node_target_phoneme[index]))
|
203
|
+
end
|
204
|
+
|
205
|
+
def length_okay?(word, node_target_phoneme)
|
206
|
+
word.length <= node_target_phoneme.length
|
207
|
+
end
|
208
|
+
|
209
|
+
def remaining_letter_match?(word, node_target_phoneme)
|
210
|
+
word_length = word.length
|
211
|
+
word_length.times do |letter|
|
212
|
+
unless letter_match?(word, node_target_phoneme, index=letter)
|
213
|
+
return false
|
214
|
+
end
|
215
|
+
end
|
216
|
+
true
|
217
|
+
end
|
218
|
+
|
219
|
+
def determine_remaining_phoneme_chunks(phoneme_representation, node_target_phoneme)
|
220
|
+
phoneme_length = phoneme_representation.length
|
221
|
+
node_target_phoneme[phoneme_length..-1]
|
222
|
+
end
|
223
|
+
|
224
|
+
def post_processing(chunks, matches)
|
225
|
+
matches.zip(chunks).each{ |word, chunk| word[1] = chunk }
|
226
|
+
matches
|
227
|
+
end
|
228
|
+
|
229
|
+
def combine_matches_and_phoneme_chunks(final_matches,phoneme_chunks)
|
230
|
+
final_matches.zip(phoneme_chunks)
|
231
|
+
end
|
232
|
+
end
|