poefy 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +74 -0
- data/.rspec +2 -0
- data/Gemfile +2 -0
- data/LICENSE +13 -0
- data/README.md +522 -0
- data/Rakefile +6 -0
- data/bin/poefy +205 -0
- data/data/emily_dickinson.txt +9942 -0
- data/data/english_as_she_is_spoke.txt +647 -0
- data/data/shakespeare_sonnets.txt +2618 -0
- data/data/spec_test_tiny.txt +12 -0
- data/data/st_therese_of_lisieux.txt +3700 -0
- data/data/whitman_leaves.txt +17815 -0
- data/lib/poefy/conditional_satisfaction.rb +208 -0
- data/lib/poefy/database.rb +252 -0
- data/lib/poefy/generation.rb +268 -0
- data/lib/poefy/handle_error.rb +27 -0
- data/lib/poefy/poefy_gen_base.rb +124 -0
- data/lib/poefy/poetic_forms.rb +330 -0
- data/lib/poefy/self.rb +21 -0
- data/lib/poefy/string_manipulation.rb +81 -0
- data/lib/poefy/version.rb +29 -0
- data/lib/poefy.rb +49 -0
- data/poefy.gemspec +33 -0
- data/spec/poefy_spec.rb +464 -0
- data/spec/spec_helper.rb +9 -0
- metadata +175 -0
@@ -0,0 +1,208 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Encoding: UTF-8
|
3
|
+
|
4
|
+
################################################################################
|
5
|
+
# Two methods for assessing permutations of an input array versus an
|
6
|
+
# array of conditions for each element.
|
7
|
+
# Both methods return an output array consisting of samples from an
|
8
|
+
# input array, for which output[0] satisfies condition[0], etc.
|
9
|
+
################################################################################
|
10
|
+
# '#conditional_permutation' returns a complete permutation of an array.
|
11
|
+
# i.e. output length == array length
|
12
|
+
# Any elements in the array that are extra to the number of conditions will
|
13
|
+
# be assumed valid.
|
14
|
+
# array = [1,2,3,4,5].shuffle
|
15
|
+
# conditions = [
|
16
|
+
# proc { |arr, elem| elem < 2},
|
17
|
+
# proc { |arr, elem| elem > 2},
|
18
|
+
# proc { |arr, elem| elem > 1}
|
19
|
+
# ]
|
20
|
+
# possible output = [1,3,4,5,2]
|
21
|
+
#
|
22
|
+
#
|
23
|
+
# ToDo: This is now not used! Need to add 'current_array' argument.
|
24
|
+
#
|
25
|
+
#
|
26
|
+
################################################################################
|
27
|
+
# '#conditional_selection' returns an array that satisfies only the conditions.
|
28
|
+
# i.e. output length == conditions length
|
29
|
+
# array = [1,2,3,4,5].shuffle
|
30
|
+
# conditions = [
|
31
|
+
# proc { |arr, elem| elem < 2},
|
32
|
+
# proc { |arr, elem| elem > 2},
|
33
|
+
# proc { |arr, elem| elem > 1}
|
34
|
+
# ]
|
35
|
+
# possible output = [1,5,3]
|
36
|
+
################################################################################
|
37
|
+
# Condition array:
|
38
|
+
# Must contain boolean procs using args |arr, elem|
|
39
|
+
# 'arr' is a reference to the current array that has been built up
|
40
|
+
# through the recursion chain.
|
41
|
+
# 'elem' is a reference to the current element.
|
42
|
+
################################################################################
|
43
|
+
|
44
|
+
module Poefy
|
45
|
+
|
46
|
+
module ConditionalSatisfaction
|
47
|
+
|
48
|
+
# Delete the first matching value in an array.
|
49
|
+
def delete_first array, value
|
50
|
+
array.delete_at(array.index(value) || array.length)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Make sure each line ends with a different word.
|
54
|
+
# This is intented to be used in 'conditions' procs.
|
55
|
+
def diff_end arr, elem
|
56
|
+
!arr.map{ |i| i['final_word'] }.include?(elem['final_word'])
|
57
|
+
end
|
58
|
+
|
59
|
+
# See if a line matches to a particular 'poetic_form'
|
60
|
+
def validate_line line, poetic_form
|
61
|
+
valid = true
|
62
|
+
if poetic_form[:syllable] and poetic_form[:syllable] != 0
|
63
|
+
valid = valid && [*poetic_form[:syllable]].include?(line['syllables'])
|
64
|
+
end
|
65
|
+
if poetic_form[:regex]
|
66
|
+
valid = valid && !!(line['line'].match(poetic_form[:regex]))
|
67
|
+
end
|
68
|
+
valid
|
69
|
+
end
|
70
|
+
|
71
|
+
# Input a rhyme array and a poetic_form hash.
|
72
|
+
# Create a line by line array of conditions.
|
73
|
+
# This will be used to analyse the validity of corpus lines.
|
74
|
+
def conditions_by_line tokenised_rhyme, poetic_form
|
75
|
+
output = []
|
76
|
+
tokenised_rhyme.each.with_index do |rhyme, index|
|
77
|
+
line_hash = {
|
78
|
+
line: index + 1,
|
79
|
+
rhyme: rhyme,
|
80
|
+
rhyme_letter: rhyme[0].downcase
|
81
|
+
}
|
82
|
+
poetic_form.keys.each do |k|
|
83
|
+
if poetic_form[k].is_a? Hash
|
84
|
+
line_hash[k] = poetic_form[k][index + 1]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
output << line_hash
|
88
|
+
end
|
89
|
+
output
|
90
|
+
end
|
91
|
+
|
92
|
+
# Group by element, with count as value. Ignore spaces.
|
93
|
+
# e.g. {"A1"=>4, "b"=>6, "A2"=>4, "a"=>5}
|
94
|
+
# => {"b"=>6, "a"=>7}
|
95
|
+
def unique_rhymes tokenised_rhyme
|
96
|
+
|
97
|
+
# Group by element, with count as value. Ignore spaces.
|
98
|
+
# e.g. {"A1"=>4, "b"=>6, "A2"=>4, "a"=>5}
|
99
|
+
tokens = tokenised_rhyme.reject { |i| i == ' ' }
|
100
|
+
grouped = tokens.each_with_object(Hash.new(0)) { |k,h| h[k] += 1 }
|
101
|
+
|
102
|
+
# For each uppercase token, add one to the corresponding lowercase.
|
103
|
+
uppers = grouped.keys.select{ |i| /[[:upper:]]/.match(i) }
|
104
|
+
uppers.each { |i| grouped[i[0].downcase] += 1 }
|
105
|
+
|
106
|
+
# Delete from the grouped hash if uppercase.
|
107
|
+
grouped.delete_if { |k,v| /[[:upper:]]/.match(k) }
|
108
|
+
grouped
|
109
|
+
end
|
110
|
+
|
111
|
+
############################################################################
|
112
|
+
|
113
|
+
# Return a permutation of 'array' where each element validates to the
|
114
|
+
# same index in a 'conditions' array of procs that return Boolean.
|
115
|
+
# Will not work on arrays that contain nil values.
|
116
|
+
# This may take a whole lot of time, depending on how lenient the
|
117
|
+
# conditions are. It is better for the stricter conditions to be
|
118
|
+
# at the start of the array, due to the way the code is written.
|
119
|
+
def conditional_permutation array, conditions, current_iter = 0
|
120
|
+
output = []
|
121
|
+
|
122
|
+
# Get the current conditional.
|
123
|
+
cond = conditions[current_iter]
|
124
|
+
|
125
|
+
# Loop through and return the first element that validates.
|
126
|
+
valid = false
|
127
|
+
array.each do |elem|
|
128
|
+
|
129
|
+
# Test the condition. If we've run out of elements
|
130
|
+
# in the condition array, then allow any value.
|
131
|
+
valid = cond ? cond.call(elem) : true
|
132
|
+
if valid
|
133
|
+
|
134
|
+
# Remove this element from the array, and recurse.
|
135
|
+
remain = array.dup
|
136
|
+
delete_first(remain, elem)
|
137
|
+
|
138
|
+
# If the remaining array is empty, no need to recurse.
|
139
|
+
new_val = nil
|
140
|
+
if !remain.empty?
|
141
|
+
new_val = conditional_permutation(remain,
|
142
|
+
conditions, current_iter + 1)
|
143
|
+
end
|
144
|
+
|
145
|
+
# If we cannot use this value, because it breaks future conditions.
|
146
|
+
if !remain.empty? && new_val.empty?
|
147
|
+
valid = false
|
148
|
+
else
|
149
|
+
output << elem << new_val
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
break if valid
|
154
|
+
end
|
155
|
+
|
156
|
+
output.flatten.compact
|
157
|
+
end
|
158
|
+
|
159
|
+
# Return values from 'array' where each element validates to the same
|
160
|
+
# index in a 'conditions' array of procs that return Boolean.
|
161
|
+
# Return an array of conditions.length
|
162
|
+
def conditional_selection array, conditions,
|
163
|
+
current_iter = 0,
|
164
|
+
current_array = []
|
165
|
+
output = []
|
166
|
+
|
167
|
+
# Get the current conditional.
|
168
|
+
cond = conditions[current_iter]
|
169
|
+
|
170
|
+
# Return nil if we have reached the end of the conditionals.
|
171
|
+
return nil if cond.nil?
|
172
|
+
|
173
|
+
# Loop through and return the first element that validates.
|
174
|
+
valid = false
|
175
|
+
array.each do |elem|
|
176
|
+
|
177
|
+
# Test the condition. If we've run out of elements
|
178
|
+
# in the condition array, then allow any value.
|
179
|
+
valid = cond.call(current_array, elem)
|
180
|
+
if valid
|
181
|
+
|
182
|
+
# Remove this element from the array, and recurse.
|
183
|
+
remain = array.dup
|
184
|
+
delete_first(remain, elem)
|
185
|
+
|
186
|
+
# If the remaining array is empty, no need to recurse.
|
187
|
+
new_val = conditional_selection(remain,
|
188
|
+
conditions, current_iter + 1, current_array + [elem])
|
189
|
+
|
190
|
+
# If we cannot use this value, because it breaks future conditions.
|
191
|
+
if new_val and new_val.empty?
|
192
|
+
valid = false
|
193
|
+
else
|
194
|
+
output << elem << new_val
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
break if valid
|
199
|
+
end
|
200
|
+
|
201
|
+
output.flatten.compact
|
202
|
+
end
|
203
|
+
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
################################################################################
|
@@ -0,0 +1,252 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Encoding: UTF-8
|
3
|
+
|
4
|
+
################################################################################
|
5
|
+
# Class for connecting to a sqlite3 database.
|
6
|
+
################################################################################
|
7
|
+
|
8
|
+
require 'sqlite3'
|
9
|
+
require 'tempfile'
|
10
|
+
|
11
|
+
require_relative 'string_manipulation.rb'
|
12
|
+
require_relative 'handle_error.rb'
|
13
|
+
|
14
|
+
################################################################################
|
15
|
+
|
16
|
+
module Poefy
|
17
|
+
|
18
|
+
class Database
|
19
|
+
|
20
|
+
include Poefy::StringManipulation
|
21
|
+
include Poefy::HandleError
|
22
|
+
|
23
|
+
attr_reader :console, :db_file
|
24
|
+
|
25
|
+
# Finalizer must be a class variable.
|
26
|
+
@@final = proc { |dbase| proc {
|
27
|
+
@sproc_lines_all.close if @sproc_lines_all
|
28
|
+
@sproc_rhymes_by_count.close if @sproc_rhymes_by_count
|
29
|
+
@sproc_rhymes_by_count_syllables.close if @sproc_rhymes_by_count_syllables
|
30
|
+
dbase.close if dbase
|
31
|
+
} }
|
32
|
+
|
33
|
+
def initialize db_file, console = false
|
34
|
+
@db_file = db_file
|
35
|
+
@console = console
|
36
|
+
ObjectSpace.define_finalizer(self, @@final.call(@db))
|
37
|
+
end
|
38
|
+
|
39
|
+
# Open global database session, if not already existing.
|
40
|
+
# This is called in all methods where it is needed. So no need to
|
41
|
+
# execute it before any calling code.
|
42
|
+
def db
|
43
|
+
if not @db
|
44
|
+
begin
|
45
|
+
open
|
46
|
+
rescue
|
47
|
+
@db = nil
|
48
|
+
return handle_error 'ERROR: Database does not yet exist'
|
49
|
+
end
|
50
|
+
end
|
51
|
+
@db
|
52
|
+
end
|
53
|
+
|
54
|
+
# Open the database file.
|
55
|
+
def open
|
56
|
+
@db = SQLite3::Database.open(@db_file)
|
57
|
+
@db.results_as_hash = true
|
58
|
+
|
59
|
+
# Create a REGEX function in SQLite.
|
60
|
+
# http://stackoverflow.com/questions/7302311
|
61
|
+
@db.create_function('regexp', 2) do |func, pattern, expression|
|
62
|
+
regexp = Regexp.new(pattern.to_s, Regexp::IGNORECASE)
|
63
|
+
func.result = expression.to_s.match(regexp) ? 1 : 0
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Close the database file.
|
68
|
+
def close
|
69
|
+
@sproc_lines_all.close if @sproc_lines_all
|
70
|
+
@sproc_rhymes_by_count.close if @sproc_rhymes_by_count
|
71
|
+
@sproc_rhymes_by_count_syllables.close if @sproc_rhymes_by_count_syllables
|
72
|
+
db.close
|
73
|
+
end
|
74
|
+
|
75
|
+
# See if the database file exists or not.
|
76
|
+
def exists?
|
77
|
+
File.exists?(@db_file)
|
78
|
+
end
|
79
|
+
|
80
|
+
# Creates a database with the correct format.
|
81
|
+
# Convert input lines array to SQL import format file.
|
82
|
+
# Delete database if already exists.
|
83
|
+
# Create database using SQL import file.
|
84
|
+
# Delete both files.
|
85
|
+
def make_new lines
|
86
|
+
make_new!(lines) if !exists?
|
87
|
+
end
|
88
|
+
|
89
|
+
# Force new database, overwriting existing.
|
90
|
+
def make_new! lines
|
91
|
+
|
92
|
+
# Convert the lines array into an import file.
|
93
|
+
sql_import_file = save_sql_import_file lines
|
94
|
+
|
95
|
+
# Delete any existing database.
|
96
|
+
File.delete(@db_file) rescue nil
|
97
|
+
|
98
|
+
# Write SQL and SQLite instructions to temp file,
|
99
|
+
# import to database, delete temp file.
|
100
|
+
# The SQL file is finicky. Each line requires no leading whitespace.
|
101
|
+
sql_instruction_file = tmpfile
|
102
|
+
sql = %Q[
|
103
|
+
CREATE TABLE IF NOT EXISTS lines (
|
104
|
+
line TEXT, syllables INT, final_word TEXT, rhyme TEXT
|
105
|
+
);
|
106
|
+
CREATE INDEX idx ON lines (rhyme, final_word, line);
|
107
|
+
.separator "\t"
|
108
|
+
.import #{sql_import_file} lines
|
109
|
+
].split("\n").map(&:strip).join("\n")
|
110
|
+
File.open(sql_instruction_file, 'w') { |fo| fo.write sql }
|
111
|
+
|
112
|
+
# Create the database using the SQL instructions.
|
113
|
+
`sqlite3 #{@db_file} < #{sql_instruction_file}`
|
114
|
+
|
115
|
+
# Delete temporary files.
|
116
|
+
File.delete sql_instruction_file
|
117
|
+
File.delete sql_import_file
|
118
|
+
end
|
119
|
+
|
120
|
+
# Execute an SQL request.
|
121
|
+
def execute! sql
|
122
|
+
begin
|
123
|
+
db.execute sql
|
124
|
+
rescue
|
125
|
+
return handle_error 'ERROR: Database has incorrect table structure', []
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Format a string for SQL.
|
130
|
+
def format_sql_string string
|
131
|
+
string.gsub('"','""')
|
132
|
+
end
|
133
|
+
|
134
|
+
# Public interfaces for private stored procedure methods.
|
135
|
+
def sproc_rhymes_all! rhyme_count, syllable_min_max = nil
|
136
|
+
if syllable_min_max
|
137
|
+
sproc_rhymes_by_count_syllables rhyme_count, syllable_min_max
|
138
|
+
else
|
139
|
+
sproc_rhymes_by_count rhyme_count
|
140
|
+
end
|
141
|
+
end
|
142
|
+
def sproc_lines_all! rhyme
|
143
|
+
sproc_lines_all rhyme
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
# Turn an array of string lines into an SQL import file.
|
149
|
+
# Format is "line, final_word, rhyme, syllables"
|
150
|
+
# Use tabs as delimiters.
|
151
|
+
def save_sql_import_file lines
|
152
|
+
sql_lines = []
|
153
|
+
lines.map do |line|
|
154
|
+
next if Wordfilter.blacklisted? line
|
155
|
+
line_ = format_sql_string line
|
156
|
+
final = line.to_phrase.last_word.downcase rescue ''
|
157
|
+
|
158
|
+
final_ = format_sql_string final
|
159
|
+
syll = syllables line
|
160
|
+
get_rhymes(line).each do |rhyme|
|
161
|
+
rhyme_ = format_sql_string rhyme
|
162
|
+
sql_lines << "\"#{line_}\"\t#{syll}\t\"#{final_}\"\t\"#{rhyme_}\""
|
163
|
+
end
|
164
|
+
end
|
165
|
+
sql_file = tmpfile
|
166
|
+
File.open(sql_file, 'w') { |fo| fo.puts sql_lines }
|
167
|
+
sql_file
|
168
|
+
end
|
169
|
+
|
170
|
+
# Generate a random temporary file.
|
171
|
+
def tmpfile
|
172
|
+
Dir::Tmpname.make_tmpname ['tmp-','.txt'], nil
|
173
|
+
end
|
174
|
+
|
175
|
+
##########################################################################
|
176
|
+
|
177
|
+
# Find rhymes and counts greater than a certain length.
|
178
|
+
def sproc_rhymes_by_count rhyme_count
|
179
|
+
if not @sproc_rhymes_by_count
|
180
|
+
sql = %Q[
|
181
|
+
SELECT rhyme, COUNT(rhyme) AS rc
|
182
|
+
FROM (
|
183
|
+
SELECT rhyme, final_word, COUNT(final_word) AS wc
|
184
|
+
FROM lines
|
185
|
+
GROUP BY rhyme, final_word
|
186
|
+
)
|
187
|
+
GROUP BY rhyme
|
188
|
+
HAVING rc >= ?
|
189
|
+
]
|
190
|
+
begin
|
191
|
+
@sproc_rhymes_by_count = db.prepare sql
|
192
|
+
rescue
|
193
|
+
return handle_error 'ERROR: Database table structure is invalid'
|
194
|
+
end
|
195
|
+
end
|
196
|
+
@sproc_rhymes_by_count.reset!
|
197
|
+
@sproc_rhymes_by_count.bind_param(1, rhyme_count)
|
198
|
+
@sproc_rhymes_by_count.execute.to_a
|
199
|
+
end
|
200
|
+
|
201
|
+
# Also adds syllable selection.
|
202
|
+
def sproc_rhymes_by_count_syllables rhyme_count, syllable_min_max
|
203
|
+
if not @sproc_rhymes_by_count_syllables
|
204
|
+
sql = %Q[
|
205
|
+
SELECT rhyme, COUNT(rhyme) AS rc
|
206
|
+
FROM (
|
207
|
+
SELECT rhyme, final_word, COUNT(final_word) AS wc
|
208
|
+
FROM lines
|
209
|
+
WHERE syllables BETWEEN ? AND ?
|
210
|
+
GROUP BY rhyme, final_word
|
211
|
+
)
|
212
|
+
GROUP BY rhyme
|
213
|
+
HAVING rc >= ?
|
214
|
+
]
|
215
|
+
begin
|
216
|
+
@sproc_rhymes_by_count_syllables = db.prepare sql
|
217
|
+
rescue
|
218
|
+
return handle_error 'ERROR: Database table structure is invalid'
|
219
|
+
end
|
220
|
+
end
|
221
|
+
@sproc_rhymes_by_count_syllables.reset!
|
222
|
+
@sproc_rhymes_by_count_syllables.bind_param(1, syllable_min_max[:min])
|
223
|
+
@sproc_rhymes_by_count_syllables.bind_param(2, syllable_min_max[:max])
|
224
|
+
@sproc_rhymes_by_count_syllables.bind_param(3, rhyme_count)
|
225
|
+
@sproc_rhymes_by_count_syllables.execute.to_a
|
226
|
+
end
|
227
|
+
|
228
|
+
##########################################################################
|
229
|
+
|
230
|
+
# Find all lines for a certain rhyme.
|
231
|
+
def sproc_lines_all rhyme
|
232
|
+
if not @sproc_lines_all
|
233
|
+
sql = %Q[
|
234
|
+
SELECT line, syllables, final_word, rhyme
|
235
|
+
FROM lines WHERE rhyme = ?
|
236
|
+
]
|
237
|
+
begin
|
238
|
+
@sproc_lines_all = db.prepare sql
|
239
|
+
rescue
|
240
|
+
return handle_error 'ERROR: Database table structure is invalid'
|
241
|
+
end
|
242
|
+
end
|
243
|
+
@sproc_lines_all.reset!
|
244
|
+
@sproc_lines_all.bind_param(1, rhyme)
|
245
|
+
@sproc_lines_all.execute.to_a
|
246
|
+
end
|
247
|
+
|
248
|
+
end
|
249
|
+
|
250
|
+
end
|
251
|
+
|
252
|
+
################################################################################
|