promper 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/LICENSE.txt +21 -0
- data/README.md +47 -0
- data/Rakefile +2 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/db.rb +10 -0
- data/lib/db/master.yml +416576 -0
- data/lib/gab_tree.rb +165 -0
- data/lib/gab_tree_processor.rb +69 -0
- data/lib/matching.rb +232 -0
- data/lib/promper.rb +39 -0
- data/lib/promper/version.rb +3 -0
- data/promper.gemspec +60 -0
- metadata +152 -0
data/lib/gab_tree.rb
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
class GabTree
|
2
|
+
require 'tree'
|
3
|
+
require 'string_to_ipa'
|
4
|
+
require './matching'
|
5
|
+
require 'test/unit'
|
6
|
+
|
7
|
+
|
8
|
+
attr_reader :match_critera, :phoneme_convert
|
9
|
+
attr_accessor :root_node, :node_counter, :target_phoneme
|
10
|
+
|
11
|
+
def initialize(target_phoneme, args={})
|
12
|
+
@target_phoneme = target_phoneme
|
13
|
+
@phoneme_convert = args[:phoneme_convert] || default_phoneme_convert
|
14
|
+
@match_critera = args[:match_criteria] || default_match_criteria
|
15
|
+
|
16
|
+
if @phoneme_convert == true
|
17
|
+
@target_phoneme = target_phoneme_convert
|
18
|
+
else
|
19
|
+
@target_phoneme = default_target_phoneme
|
20
|
+
end
|
21
|
+
|
22
|
+
@node_counter = 0
|
23
|
+
@root_node = Tree::TreeNode.new(
|
24
|
+
node_counter_to_string, ["NONE IM THE ROOT!", @target_phoneme,
|
25
|
+
state: :ready_to_continue])
|
26
|
+
end
|
27
|
+
|
28
|
+
def default_phoneme_convert
|
29
|
+
false
|
30
|
+
end
|
31
|
+
|
32
|
+
def default_match_criteria
|
33
|
+
ExactMatch.new
|
34
|
+
end
|
35
|
+
|
36
|
+
def target_phoneme_convert
|
37
|
+
if @phoneme_convert == true
|
38
|
+
list_of_words = @target_phoneme.split
|
39
|
+
ipa_list = list_of_words.map do |word|
|
40
|
+
word.to_ipa.tr('ˈˌ', '')
|
41
|
+
end
|
42
|
+
ipa_list = ipa_list.join
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def default_target_phoneme
|
47
|
+
target_phoneme.downcase.gsub(/\s+/, "")
|
48
|
+
end
|
49
|
+
|
50
|
+
def node_counter_to_string
|
51
|
+
node_counter.to_s
|
52
|
+
end
|
53
|
+
|
54
|
+
def iterate_until_finished
|
55
|
+
until finished? do
|
56
|
+
iterate_over_leaves
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def finished?
|
61
|
+
@finished_check = false
|
62
|
+
|
63
|
+
nodes = check_leaves
|
64
|
+
nodes.each do |node|
|
65
|
+
if check_node_state(node) == :completed # How can I do this on one line?
|
66
|
+
@finished_check = true
|
67
|
+
elsif check_node_state(node) == :aborted
|
68
|
+
@finished_check = true
|
69
|
+
else
|
70
|
+
@finished_check = false
|
71
|
+
break
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
@finished_check
|
76
|
+
end
|
77
|
+
|
78
|
+
def iterate_over_leaves
|
79
|
+
nodes = check_leaves
|
80
|
+
|
81
|
+
nodes.each do |node|
|
82
|
+
|
83
|
+
unless check_node_state(node) == :ready_to_continue
|
84
|
+
next
|
85
|
+
else
|
86
|
+
if checked_all_target_phonemes?(node)
|
87
|
+
change_node_state_completed(node)
|
88
|
+
else
|
89
|
+
try_to_make_leaves(node)
|
90
|
+
change_node_state_completed(node)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
if failedToMakeLeaves?(node)
|
95
|
+
change_node_state_aborted(node)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def check_leaves
|
101
|
+
root_node.each_leaf
|
102
|
+
end
|
103
|
+
|
104
|
+
def check_node_state(node)
|
105
|
+
node.content[2][:state]
|
106
|
+
end
|
107
|
+
|
108
|
+
def checked_all_target_phonemes?(node)
|
109
|
+
begin
|
110
|
+
node.content[1].length == 0
|
111
|
+
rescue
|
112
|
+
true
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def change_node_state_aborted(node)
|
117
|
+
node.content[2][:state] = :aborted
|
118
|
+
end
|
119
|
+
|
120
|
+
def change_node_state_completed(node)
|
121
|
+
node.content[2][:state] = :completed
|
122
|
+
end
|
123
|
+
|
124
|
+
def failedToMakeLeaves?(node)
|
125
|
+
node.is_leaf? && !checked_all_target_phonemes?(node)
|
126
|
+
end
|
127
|
+
|
128
|
+
def try_to_make_leaves(node)
|
129
|
+
@remaining_phoneme = get_remaining_phoneme_from(node)
|
130
|
+
@match_array = make_match_array(@remaining_phoneme)
|
131
|
+
unless @match_array == nil
|
132
|
+
make_child_nodes(@match_array, node)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def get_remaining_phoneme_from(node)
|
137
|
+
node.content[1]
|
138
|
+
end
|
139
|
+
|
140
|
+
def make_match_array(node_target_phoneme) # Consider renaming to remaining_phoneme
|
141
|
+
match_array = match_critera.match(node_target_phoneme)
|
142
|
+
match_array
|
143
|
+
end
|
144
|
+
|
145
|
+
# Put as a separate method to remind me I have to figure out
|
146
|
+
# what is going on! For some reason I can't increment
|
147
|
+
# the counter if I use node_counter += 1. Strange!
|
148
|
+
def node_counter_increment
|
149
|
+
@node_counter += 1
|
150
|
+
end
|
151
|
+
|
152
|
+
def make_child_nodes(match_array, node)
|
153
|
+
match_array.each do |matched_phoneme, remaining_phoneme_chunk|
|
154
|
+
node_counter_increment
|
155
|
+
node << Tree::TreeNode.new(
|
156
|
+
node_counter_to_string, [matched_phoneme, remaining_phoneme_chunk,
|
157
|
+
state: :ready_to_continue])
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
|
165
|
+
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require './gab_tree'
|
2
|
+
class GabTreeProcessor < GabTree
|
3
|
+
attr_reader :gab_tree
|
4
|
+
def initialize(gab_tree) # Root node of gab_tree
|
5
|
+
@gab_tree = gab_tree
|
6
|
+
end
|
7
|
+
|
8
|
+
def result
|
9
|
+
leaf_phoneme_array_from_completed_tree
|
10
|
+
end
|
11
|
+
|
12
|
+
def check_leaves # This seems to duplicate the GabTree class code.
|
13
|
+
gab_tree.each_leaf
|
14
|
+
end
|
15
|
+
|
16
|
+
def leaf_phoneme_array_from_completed_tree
|
17
|
+
nodes = check_leaves()
|
18
|
+
node_parents_path = node_parents_path_from_nodes(nodes)
|
19
|
+
parents_content = conv_node_parents_path_to_parents_content(node_parents_path)
|
20
|
+
leafs_content = get_leaf_content(nodes)
|
21
|
+
zipped_content = leafs_content.zip(parents_content)
|
22
|
+
massage_zipped_content(zipped_content)
|
23
|
+
end
|
24
|
+
|
25
|
+
def only_keep_completed_nodes(nodes)
|
26
|
+
nodes.each do |node|
|
27
|
+
if node.content[2][:state] != :completed
|
28
|
+
nodes.delete(node)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def node_parents_path_from_nodes(nodes)
|
34
|
+
nodes = nodes.select {|node| node.content[2][:state] == :completed }
|
35
|
+
nodes.map do |node|
|
36
|
+
parents = node.parentage
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def conv_node_parents_path_to_parents_content(node_parents_path)
|
41
|
+
node_parents_path.map do |node_path|
|
42
|
+
content_from_node_path(node_path)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def content_from_node_path(node_path)
|
47
|
+
node_path.map do |node|
|
48
|
+
node.content[0]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_leaf_content(nodes)
|
53
|
+
nodes = nodes.select {|node| node.content[2][:state] == :completed }
|
54
|
+
nodes.map do |node|
|
55
|
+
node.content[0]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def massage_zipped_content(zipped_content)
|
60
|
+
flattened_matches = zipped_content.map do |match|
|
61
|
+
match.flatten
|
62
|
+
end
|
63
|
+
|
64
|
+
flattened_matches.map do |match|
|
65
|
+
match.pop
|
66
|
+
match.reverse
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/lib/matching.rb
ADDED
@@ -0,0 +1,232 @@
|
|
1
|
+
|
2
|
+
require './db'
|
3
|
+
|
4
|
+
class BasicMatching
|
5
|
+
include DataBase
|
6
|
+
|
7
|
+
def removeTooLongPotentialMatches(
|
8
|
+
potential_match_array, node_target_phoneme)
|
9
|
+
potential_match_array.each do |word|
|
10
|
+
if word.length > node_target_phoneme.length
|
11
|
+
potential_match_array.delete(word)
|
12
|
+
else
|
13
|
+
next
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class ExactMatch < BasicMatching
|
20
|
+
def initialize
|
21
|
+
super
|
22
|
+
end
|
23
|
+
|
24
|
+
def determine_first_letter(node_target_phoneme)
|
25
|
+
node_target_phoneme[0]
|
26
|
+
end
|
27
|
+
|
28
|
+
def pull_relevant_values_from_dict(key)
|
29
|
+
db[key]
|
30
|
+
end
|
31
|
+
|
32
|
+
def match(node_target_phoneme)
|
33
|
+
first_letter = determine_first_letter(node_target_phoneme)
|
34
|
+
relevant_values = pull_relevant_values_from_dict(first_letter)
|
35
|
+
matches = relevant_values.select {|word, phoneme_representation| passes_tests?(word, node_target_phoneme)}
|
36
|
+
phoneme_chunks = determine_remaining_phoneme_chunks(matches, node_target_phoneme)
|
37
|
+
combine_matches_and_phoneme_chunks(matches,phoneme_chunks)
|
38
|
+
end
|
39
|
+
|
40
|
+
def passes_tests?(word, node_target_phoneme)
|
41
|
+
letter_match?(word, node_target_phoneme) &&
|
42
|
+
length_okay?(word, node_target_phoneme) &&
|
43
|
+
remaining_letter_match?(word, node_target_phoneme)
|
44
|
+
end
|
45
|
+
|
46
|
+
def letter_match?(word, node_target_phoneme, index=0)
|
47
|
+
word[index] == node_target_phoneme[index]
|
48
|
+
end
|
49
|
+
|
50
|
+
def length_okay?(word, node_target_phoneme)
|
51
|
+
word.length <= node_target_phoneme.length
|
52
|
+
end
|
53
|
+
|
54
|
+
def remaining_letter_match?(word, node_target_phoneme)
|
55
|
+
word_length = word.length
|
56
|
+
word_length.times do |letter|
|
57
|
+
unless letter_match?(word, node_target_phoneme, index=letter)
|
58
|
+
return false
|
59
|
+
end
|
60
|
+
end
|
61
|
+
true
|
62
|
+
end
|
63
|
+
|
64
|
+
def determine_remaining_phoneme_chunks(final_matches, node_target_phoneme)
|
65
|
+
chunks = final_matches.map do |phonemes|
|
66
|
+
phoneme_length = phonemes.length
|
67
|
+
this_chunk = node_target_phoneme[phoneme_length..-1]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def combine_matches_and_phoneme_chunks(final_matches,phoneme_chunks)
|
72
|
+
final_matches.zip(phoneme_chunks)
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
class LooseMatch < BasicMatching
|
78
|
+
attr_reader :basic_equivalence_dict, :prev_switch
|
79
|
+
attr_accessor :node_target_phoneme, :previously_searched_phonemes
|
80
|
+
def initialize
|
81
|
+
super
|
82
|
+
@basic_equivalence_dict = {
|
83
|
+
"i" => %w[ɪ ə],
|
84
|
+
"z" => %w[s],
|
85
|
+
"ə" => %w[i ɪ ʌ æ u],
|
86
|
+
"u" => %w[ə],
|
87
|
+
"ɪ" => %w[i],
|
88
|
+
"d" => %w[t],
|
89
|
+
"æ" => %w[ə ʌ ɛ],
|
90
|
+
"s" => %w[z],
|
91
|
+
"ɝ" => %w[r],
|
92
|
+
"r" => %w[ɝ],
|
93
|
+
"a" => %w[ʌ ə],
|
94
|
+
"p" => %w[b],
|
95
|
+
"t" => %w[d],
|
96
|
+
"g" => %w[k],
|
97
|
+
"ʌ" => %w[ə æ i a],
|
98
|
+
"ɛ" => %w[æ],
|
99
|
+
"b" => %w[p],
|
100
|
+
"f" => %w[v],
|
101
|
+
"v" => %w[f],
|
102
|
+
"k" => %w[g],
|
103
|
+
"j" => %w[ʃ],
|
104
|
+
"ʃ" => %w[j]
|
105
|
+
}
|
106
|
+
@previously_searched_phonemes = {}
|
107
|
+
end
|
108
|
+
|
109
|
+
def add_to_prev_phones(node_target_phoneme, matches)
|
110
|
+
previously_searched_phonemes[node_target_phoneme] = matches
|
111
|
+
end
|
112
|
+
|
113
|
+
def one_phoneme_remaining?(node_target_phoneme)
|
114
|
+
begin
|
115
|
+
node_target_phoneme.length == 1
|
116
|
+
rescue
|
117
|
+
false
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def bail
|
122
|
+
[node_target_phoneme, ""]
|
123
|
+
end
|
124
|
+
|
125
|
+
def no_phonemes_remaining?(node_target_phoneme)
|
126
|
+
node_target_phoneme == nil ||
|
127
|
+
node_target_phoneme.length == 0
|
128
|
+
end
|
129
|
+
|
130
|
+
def target_phoneme_convert(node_target_phoneme)
|
131
|
+
list_of_words = node_target_phoneme.split
|
132
|
+
ipa_list = list_of_words.map do |word|
|
133
|
+
word.to_ipa.tr('ˈˌ', '')
|
134
|
+
end
|
135
|
+
ipa_list = ipa_list.join
|
136
|
+
end
|
137
|
+
|
138
|
+
def match(node_target_phoneme)
|
139
|
+
if previously_searched_phonemes.key? node_target_phoneme
|
140
|
+
return previously_searched_phonemes[node_target_phoneme]
|
141
|
+
end
|
142
|
+
if one_phoneme_remaining?(node_target_phoneme)
|
143
|
+
bail
|
144
|
+
elsif no_phonemes_remaining?(node_target_phoneme)
|
145
|
+
return nil
|
146
|
+
else
|
147
|
+
first_letter = determine_first_letter(node_target_phoneme)
|
148
|
+
begin
|
149
|
+
relevant_values = pull_relevant_values_from_dict(first_letter)
|
150
|
+
|
151
|
+
matches = get_relevant_matches(relevant_values, node_target_phoneme)
|
152
|
+
if matches.empty?
|
153
|
+
add_to_prev_phones(node_target_phoneme, nil)
|
154
|
+
return nil
|
155
|
+
else
|
156
|
+
chunks = matches.map do |word, phoneme_representation|
|
157
|
+
determine_remaining_phoneme_chunks(phoneme_representation, node_target_phoneme)
|
158
|
+
end
|
159
|
+
matches = post_processing(chunks, matches)
|
160
|
+
end
|
161
|
+
rescue
|
162
|
+
puts "Inner rescue"
|
163
|
+
puts $!, $@
|
164
|
+
end
|
165
|
+
end
|
166
|
+
add_to_prev_phones(node_target_phoneme, matches)
|
167
|
+
matches
|
168
|
+
end
|
169
|
+
|
170
|
+
def determine_first_letter(node_target_phoneme)
|
171
|
+
node_target_phoneme[0]
|
172
|
+
end
|
173
|
+
|
174
|
+
def pull_relevant_values_from_dict(key)
|
175
|
+
initial_values = db[key]
|
176
|
+
if basic_equivalence_dict.has_key?(key)
|
177
|
+
equilvalent_keys = basic_equivalence_dict[key]
|
178
|
+
equilvalent_keys.map do |key|
|
179
|
+
db[key].each do |word|
|
180
|
+
initial_values << word
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
initial_values
|
185
|
+
end
|
186
|
+
|
187
|
+
def get_relevant_matches(relevant_values, node_target_phoneme)
|
188
|
+
rel = relevant_values.map {
|
189
|
+
|word, phoneme_representation| [word, phoneme_representation] if passes_tests?(phoneme_representation, node_target_phoneme)}
|
190
|
+
rel.compact
|
191
|
+
end
|
192
|
+
|
193
|
+
def passes_tests?(word, node_target_phoneme)
|
194
|
+
letter_match?(word, node_target_phoneme) &&
|
195
|
+
length_okay?(word, node_target_phoneme) &&
|
196
|
+
remaining_letter_match?(word, node_target_phoneme)
|
197
|
+
end
|
198
|
+
|
199
|
+
def letter_match?(word, node_target_phoneme, index=0)
|
200
|
+
word[index] == node_target_phoneme[index] ||
|
201
|
+
(basic_equivalence_dict.key?(word[index]) &&
|
202
|
+
basic_equivalence_dict[word[index]].include?(node_target_phoneme[index]))
|
203
|
+
end
|
204
|
+
|
205
|
+
def length_okay?(word, node_target_phoneme)
|
206
|
+
word.length <= node_target_phoneme.length
|
207
|
+
end
|
208
|
+
|
209
|
+
def remaining_letter_match?(word, node_target_phoneme)
|
210
|
+
word_length = word.length
|
211
|
+
word_length.times do |letter|
|
212
|
+
unless letter_match?(word, node_target_phoneme, index=letter)
|
213
|
+
return false
|
214
|
+
end
|
215
|
+
end
|
216
|
+
true
|
217
|
+
end
|
218
|
+
|
219
|
+
def determine_remaining_phoneme_chunks(phoneme_representation, node_target_phoneme)
|
220
|
+
phoneme_length = phoneme_representation.length
|
221
|
+
node_target_phoneme[phoneme_length..-1]
|
222
|
+
end
|
223
|
+
|
224
|
+
def post_processing(chunks, matches)
|
225
|
+
matches.zip(chunks).each{ |word, chunk| word[1] = chunk }
|
226
|
+
matches
|
227
|
+
end
|
228
|
+
|
229
|
+
def combine_matches_and_phoneme_chunks(final_matches,phoneme_chunks)
|
230
|
+
final_matches.zip(phoneme_chunks)
|
231
|
+
end
|
232
|
+
end
|