poefy 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,208 @@
1
+ #!/usr/bin/env ruby
2
+ # Encoding: UTF-8
3
+
4
+ ################################################################################
5
+ # Two methods for assessing permutations of an input array versus an
6
+ # array of conditions for each element.
7
+ # Both methods return an output array consisting of samples from an
8
+ # input array, for which output[0] satisfies condition[0], etc.
9
+ ################################################################################
10
+ # '#conditional_permutation' returns a complete permutation of an array.
11
+ # i.e. output length == array length
12
+ # Any elements in the array that are extra to the number of conditions will
13
+ # be assumed valid.
14
+ # array = [1,2,3,4,5].shuffle
15
+ # conditions = [
16
+ # proc { |arr, elem| elem < 2},
17
+ # proc { |arr, elem| elem > 2},
18
+ # proc { |arr, elem| elem > 1}
19
+ # ]
20
+ # possible output = [1,3,4,5,2]
21
+ #
22
+ #
23
+ # ToDo: This is now not used! Need to add 'current_array' argument.
24
+ #
25
+ #
26
+ ################################################################################
27
+ # '#conditional_selection' returns an array that satisfies only the conditions.
28
+ # i.e. output length == conditions length
29
+ # array = [1,2,3,4,5].shuffle
30
+ # conditions = [
31
+ # proc { |arr, elem| elem < 2},
32
+ # proc { |arr, elem| elem > 2},
33
+ # proc { |arr, elem| elem > 1}
34
+ # ]
35
+ # possible output = [1,5,3]
36
+ ################################################################################
37
+ # Condition array:
38
+ # Must contain boolean procs using args |arr, elem|
39
+ # 'arr' is a reference to the current array that has been built up
40
+ # through the recursion chain.
41
+ # 'elem' is a reference to the current element.
42
+ ################################################################################
43
+
44
+ module Poefy
45
+
46
+ module ConditionalSatisfaction
47
+
48
+ # Delete the first matching value in an array.
49
+ def delete_first array, value
50
+ array.delete_at(array.index(value) || array.length)
51
+ end
52
+
53
+ # Make sure each line ends with a different word.
54
+ # This is intented to be used in 'conditions' procs.
55
+ def diff_end arr, elem
56
+ !arr.map{ |i| i['final_word'] }.include?(elem['final_word'])
57
+ end
58
+
59
+ # See if a line matches to a particular 'poetic_form'
60
+ def validate_line line, poetic_form
61
+ valid = true
62
+ if poetic_form[:syllable] and poetic_form[:syllable] != 0
63
+ valid = valid && [*poetic_form[:syllable]].include?(line['syllables'])
64
+ end
65
+ if poetic_form[:regex]
66
+ valid = valid && !!(line['line'].match(poetic_form[:regex]))
67
+ end
68
+ valid
69
+ end
70
+
71
+ # Input a rhyme array and a poetic_form hash.
72
+ # Create a line by line array of conditions.
73
+ # This will be used to analyse the validity of corpus lines.
74
+ def conditions_by_line tokenised_rhyme, poetic_form
75
+ output = []
76
+ tokenised_rhyme.each.with_index do |rhyme, index|
77
+ line_hash = {
78
+ line: index + 1,
79
+ rhyme: rhyme,
80
+ rhyme_letter: rhyme[0].downcase
81
+ }
82
+ poetic_form.keys.each do |k|
83
+ if poetic_form[k].is_a? Hash
84
+ line_hash[k] = poetic_form[k][index + 1]
85
+ end
86
+ end
87
+ output << line_hash
88
+ end
89
+ output
90
+ end
91
+
92
+ # Group by element, with count as value. Ignore spaces.
93
+ # e.g. {"A1"=>4, "b"=>6, "A2"=>4, "a"=>5}
94
+ # => {"b"=>6, "a"=>7}
95
+ def unique_rhymes tokenised_rhyme
96
+
97
+ # Group by element, with count as value. Ignore spaces.
98
+ # e.g. {"A1"=>4, "b"=>6, "A2"=>4, "a"=>5}
99
+ tokens = tokenised_rhyme.reject { |i| i == ' ' }
100
+ grouped = tokens.each_with_object(Hash.new(0)) { |k,h| h[k] += 1 }
101
+
102
+ # For each uppercase token, add one to the corresponding lowercase.
103
+ uppers = grouped.keys.select{ |i| /[[:upper:]]/.match(i) }
104
+ uppers.each { |i| grouped[i[0].downcase] += 1 }
105
+
106
+ # Delete from the grouped hash if uppercase.
107
+ grouped.delete_if { |k,v| /[[:upper:]]/.match(k) }
108
+ grouped
109
+ end
110
+
111
+ ############################################################################
112
+
113
+ # Return a permutation of 'array' where each element validates to the
114
+ # same index in a 'conditions' array of procs that return Boolean.
115
+ # Will not work on arrays that contain nil values.
116
+ # This may take a whole lot of time, depending on how lenient the
117
+ # conditions are. It is better for the stricter conditions to be
118
+ # at the start of the array, due to the way the code is written.
119
+ def conditional_permutation array, conditions, current_iter = 0
120
+ output = []
121
+
122
+ # Get the current conditional.
123
+ cond = conditions[current_iter]
124
+
125
+ # Loop through and return the first element that validates.
126
+ valid = false
127
+ array.each do |elem|
128
+
129
+ # Test the condition. If we've run out of elements
130
+ # in the condition array, then allow any value.
131
+ valid = cond ? cond.call(elem) : true
132
+ if valid
133
+
134
+ # Remove this element from the array, and recurse.
135
+ remain = array.dup
136
+ delete_first(remain, elem)
137
+
138
+ # If the remaining array is empty, no need to recurse.
139
+ new_val = nil
140
+ if !remain.empty?
141
+ new_val = conditional_permutation(remain,
142
+ conditions, current_iter + 1)
143
+ end
144
+
145
+ # If we cannot use this value, because it breaks future conditions.
146
+ if !remain.empty? && new_val.empty?
147
+ valid = false
148
+ else
149
+ output << elem << new_val
150
+ end
151
+ end
152
+
153
+ break if valid
154
+ end
155
+
156
+ output.flatten.compact
157
+ end
158
+
159
+ # Return values from 'array' where each element validates to the same
160
+ # index in a 'conditions' array of procs that return Boolean.
161
+ # Return an array of conditions.length
162
+ def conditional_selection array, conditions,
163
+ current_iter = 0,
164
+ current_array = []
165
+ output = []
166
+
167
+ # Get the current conditional.
168
+ cond = conditions[current_iter]
169
+
170
+ # Return nil if we have reached the end of the conditionals.
171
+ return nil if cond.nil?
172
+
173
+ # Loop through and return the first element that validates.
174
+ valid = false
175
+ array.each do |elem|
176
+
177
+ # Test the condition. If we've run out of elements
178
+ # in the condition array, then allow any value.
179
+ valid = cond.call(current_array, elem)
180
+ if valid
181
+
182
+ # Remove this element from the array, and recurse.
183
+ remain = array.dup
184
+ delete_first(remain, elem)
185
+
186
+ # If the remaining array is empty, no need to recurse.
187
+ new_val = conditional_selection(remain,
188
+ conditions, current_iter + 1, current_array + [elem])
189
+
190
+ # If we cannot use this value, because it breaks future conditions.
191
+ if new_val and new_val.empty?
192
+ valid = false
193
+ else
194
+ output << elem << new_val
195
+ end
196
+ end
197
+
198
+ break if valid
199
+ end
200
+
201
+ output.flatten.compact
202
+ end
203
+
204
+ end
205
+
206
+ end
207
+
208
+ ################################################################################
@@ -0,0 +1,252 @@
1
+ #!/usr/bin/env ruby
2
+ # Encoding: UTF-8
3
+
4
+ ################################################################################
5
+ # Class for connecting to a sqlite3 database.
6
+ ################################################################################
7
+
8
+ require 'sqlite3'
9
+ require 'tempfile'
10
+
11
+ require_relative 'string_manipulation.rb'
12
+ require_relative 'handle_error.rb'
13
+
14
+ ################################################################################
15
+
16
+ module Poefy
17
+
18
+ class Database
19
+
20
+ include Poefy::StringManipulation
21
+ include Poefy::HandleError
22
+
23
+ attr_reader :console, :db_file
24
+
25
+ # Finalizer must be a class variable.
26
+ @@final = proc { |dbase| proc {
27
+ @sproc_lines_all.close if @sproc_lines_all
28
+ @sproc_rhymes_by_count.close if @sproc_rhymes_by_count
29
+ @sproc_rhymes_by_count_syllables.close if @sproc_rhymes_by_count_syllables
30
+ dbase.close if dbase
31
+ } }
32
+
33
+ def initialize db_file, console = false
34
+ @db_file = db_file
35
+ @console = console
36
+ ObjectSpace.define_finalizer(self, @@final.call(@db))
37
+ end
38
+
39
+ # Open global database session, if not already existing.
40
+ # This is called in all methods where it is needed. So no need to
41
+ # execute it before any calling code.
42
+ def db
43
+ if not @db
44
+ begin
45
+ open
46
+ rescue
47
+ @db = nil
48
+ return handle_error 'ERROR: Database does not yet exist'
49
+ end
50
+ end
51
+ @db
52
+ end
53
+
54
+ # Open the database file.
55
+ def open
56
+ @db = SQLite3::Database.open(@db_file)
57
+ @db.results_as_hash = true
58
+
59
+ # Create a REGEX function in SQLite.
60
+ # http://stackoverflow.com/questions/7302311
61
+ @db.create_function('regexp', 2) do |func, pattern, expression|
62
+ regexp = Regexp.new(pattern.to_s, Regexp::IGNORECASE)
63
+ func.result = expression.to_s.match(regexp) ? 1 : 0
64
+ end
65
+ end
66
+
67
+ # Close the database file.
68
+ def close
69
+ @sproc_lines_all.close if @sproc_lines_all
70
+ @sproc_rhymes_by_count.close if @sproc_rhymes_by_count
71
+ @sproc_rhymes_by_count_syllables.close if @sproc_rhymes_by_count_syllables
72
+ db.close
73
+ end
74
+
75
+ # See if the database file exists or not.
76
+ def exists?
77
+ File.exists?(@db_file)
78
+ end
79
+
80
+ # Creates a database with the correct format.
81
+ # Convert input lines array to SQL import format file.
82
+ # Delete database if already exists.
83
+ # Create database using SQL import file.
84
+ # Delete both files.
85
+ def make_new lines
86
+ make_new!(lines) if !exists?
87
+ end
88
+
89
+ # Force new database, overwriting existing.
90
+ def make_new! lines
91
+
92
+ # Convert the lines array into an import file.
93
+ sql_import_file = save_sql_import_file lines
94
+
95
+ # Delete any existing database.
96
+ File.delete(@db_file) rescue nil
97
+
98
+ # Write SQL and SQLite instructions to temp file,
99
+ # import to database, delete temp file.
100
+ # The SQL file is finicky. Each line requires no leading whitespace.
101
+ sql_instruction_file = tmpfile
102
+ sql = %Q[
103
+ CREATE TABLE IF NOT EXISTS lines (
104
+ line TEXT, syllables INT, final_word TEXT, rhyme TEXT
105
+ );
106
+ CREATE INDEX idx ON lines (rhyme, final_word, line);
107
+ .separator "\t"
108
+ .import #{sql_import_file} lines
109
+ ].split("\n").map(&:strip).join("\n")
110
+ File.open(sql_instruction_file, 'w') { |fo| fo.write sql }
111
+
112
+ # Create the database using the SQL instructions.
113
+ `sqlite3 #{@db_file} < #{sql_instruction_file}`
114
+
115
+ # Delete temporary files.
116
+ File.delete sql_instruction_file
117
+ File.delete sql_import_file
118
+ end
119
+
120
+ # Execute an SQL request.
121
+ def execute! sql
122
+ begin
123
+ db.execute sql
124
+ rescue
125
+ return handle_error 'ERROR: Database has incorrect table structure', []
126
+ end
127
+ end
128
+
129
+ # Format a string for SQL.
130
+ def format_sql_string string
131
+ string.gsub('"','""')
132
+ end
133
+
134
+ # Public interfaces for private stored procedure methods.
135
+ def sproc_rhymes_all! rhyme_count, syllable_min_max = nil
136
+ if syllable_min_max
137
+ sproc_rhymes_by_count_syllables rhyme_count, syllable_min_max
138
+ else
139
+ sproc_rhymes_by_count rhyme_count
140
+ end
141
+ end
142
+ def sproc_lines_all! rhyme
143
+ sproc_lines_all rhyme
144
+ end
145
+
146
+ private
147
+
148
+ # Turn an array of string lines into an SQL import file.
149
+ # Format is "line, final_word, rhyme, syllables"
150
+ # Use tabs as delimiters.
151
+ def save_sql_import_file lines
152
+ sql_lines = []
153
+ lines.map do |line|
154
+ next if Wordfilter.blacklisted? line
155
+ line_ = format_sql_string line
156
+ final = line.to_phrase.last_word.downcase rescue ''
157
+
158
+ final_ = format_sql_string final
159
+ syll = syllables line
160
+ get_rhymes(line).each do |rhyme|
161
+ rhyme_ = format_sql_string rhyme
162
+ sql_lines << "\"#{line_}\"\t#{syll}\t\"#{final_}\"\t\"#{rhyme_}\""
163
+ end
164
+ end
165
+ sql_file = tmpfile
166
+ File.open(sql_file, 'w') { |fo| fo.puts sql_lines }
167
+ sql_file
168
+ end
169
+
170
+ # Generate a random temporary file.
171
+ def tmpfile
172
+ Dir::Tmpname.make_tmpname ['tmp-','.txt'], nil
173
+ end
174
+
175
+ ##########################################################################
176
+
177
+ # Find rhymes and counts greater than a certain length.
178
+ def sproc_rhymes_by_count rhyme_count
179
+ if not @sproc_rhymes_by_count
180
+ sql = %Q[
181
+ SELECT rhyme, COUNT(rhyme) AS rc
182
+ FROM (
183
+ SELECT rhyme, final_word, COUNT(final_word) AS wc
184
+ FROM lines
185
+ GROUP BY rhyme, final_word
186
+ )
187
+ GROUP BY rhyme
188
+ HAVING rc >= ?
189
+ ]
190
+ begin
191
+ @sproc_rhymes_by_count = db.prepare sql
192
+ rescue
193
+ return handle_error 'ERROR: Database table structure is invalid'
194
+ end
195
+ end
196
+ @sproc_rhymes_by_count.reset!
197
+ @sproc_rhymes_by_count.bind_param(1, rhyme_count)
198
+ @sproc_rhymes_by_count.execute.to_a
199
+ end
200
+
201
+ # Also adds syllable selection.
202
+ def sproc_rhymes_by_count_syllables rhyme_count, syllable_min_max
203
+ if not @sproc_rhymes_by_count_syllables
204
+ sql = %Q[
205
+ SELECT rhyme, COUNT(rhyme) AS rc
206
+ FROM (
207
+ SELECT rhyme, final_word, COUNT(final_word) AS wc
208
+ FROM lines
209
+ WHERE syllables BETWEEN ? AND ?
210
+ GROUP BY rhyme, final_word
211
+ )
212
+ GROUP BY rhyme
213
+ HAVING rc >= ?
214
+ ]
215
+ begin
216
+ @sproc_rhymes_by_count_syllables = db.prepare sql
217
+ rescue
218
+ return handle_error 'ERROR: Database table structure is invalid'
219
+ end
220
+ end
221
+ @sproc_rhymes_by_count_syllables.reset!
222
+ @sproc_rhymes_by_count_syllables.bind_param(1, syllable_min_max[:min])
223
+ @sproc_rhymes_by_count_syllables.bind_param(2, syllable_min_max[:max])
224
+ @sproc_rhymes_by_count_syllables.bind_param(3, rhyme_count)
225
+ @sproc_rhymes_by_count_syllables.execute.to_a
226
+ end
227
+
228
+ ##########################################################################
229
+
230
+ # Find all lines for a certain rhyme.
231
+ def sproc_lines_all rhyme
232
+ if not @sproc_lines_all
233
+ sql = %Q[
234
+ SELECT line, syllables, final_word, rhyme
235
+ FROM lines WHERE rhyme = ?
236
+ ]
237
+ begin
238
+ @sproc_lines_all = db.prepare sql
239
+ rescue
240
+ return handle_error 'ERROR: Database table structure is invalid'
241
+ end
242
+ end
243
+ @sproc_lines_all.reset!
244
+ @sproc_lines_all.bind_param(1, rhyme)
245
+ @sproc_lines_all.execute.to_a
246
+ end
247
+
248
+ end
249
+
250
+ end
251
+
252
+ ################################################################################