poefy 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +74 -0
- data/.rspec +2 -0
- data/Gemfile +2 -0
- data/LICENSE +13 -0
- data/README.md +522 -0
- data/Rakefile +6 -0
- data/bin/poefy +205 -0
- data/data/emily_dickinson.txt +9942 -0
- data/data/english_as_she_is_spoke.txt +647 -0
- data/data/shakespeare_sonnets.txt +2618 -0
- data/data/spec_test_tiny.txt +12 -0
- data/data/st_therese_of_lisieux.txt +3700 -0
- data/data/whitman_leaves.txt +17815 -0
- data/lib/poefy/conditional_satisfaction.rb +208 -0
- data/lib/poefy/database.rb +252 -0
- data/lib/poefy/generation.rb +268 -0
- data/lib/poefy/handle_error.rb +27 -0
- data/lib/poefy/poefy_gen_base.rb +124 -0
- data/lib/poefy/poetic_forms.rb +330 -0
- data/lib/poefy/self.rb +21 -0
- data/lib/poefy/string_manipulation.rb +81 -0
- data/lib/poefy/version.rb +29 -0
- data/lib/poefy.rb +49 -0
- data/poefy.gemspec +33 -0
- data/spec/poefy_spec.rb +464 -0
- data/spec/spec_helper.rb +9 -0
- metadata +175 -0
@@ -0,0 +1,208 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Encoding: UTF-8
|
3
|
+
|
4
|
+
################################################################################
|
5
|
+
# Two methods for assessing permutations of an input array versus an
|
6
|
+
# array of conditions for each element.
|
7
|
+
# Both methods return an output array consisting of samples from an
|
8
|
+
# input array, for which output[0] satisfies condition[0], etc.
|
9
|
+
################################################################################
|
10
|
+
# '#conditional_permutation' returns a complete permutation of an array.
|
11
|
+
# i.e. output length == array length
|
12
|
+
# Any elements in the array that are extra to the number of conditions will
|
13
|
+
# be assumed valid.
|
14
|
+
# array = [1,2,3,4,5].shuffle
|
15
|
+
# conditions = [
|
16
|
+
# proc { |arr, elem| elem < 2},
|
17
|
+
# proc { |arr, elem| elem > 2},
|
18
|
+
# proc { |arr, elem| elem > 1}
|
19
|
+
# ]
|
20
|
+
# possible output = [1,3,4,5,2]
|
21
|
+
#
|
22
|
+
#
|
23
|
+
# ToDo: This is now not used! Need to add 'current_array' argument.
|
24
|
+
#
|
25
|
+
#
|
26
|
+
################################################################################
|
27
|
+
# '#conditional_selection' returns an array that satisfies only the conditions.
|
28
|
+
# i.e. output length == conditions length
|
29
|
+
# array = [1,2,3,4,5].shuffle
|
30
|
+
# conditions = [
|
31
|
+
# proc { |arr, elem| elem < 2},
|
32
|
+
# proc { |arr, elem| elem > 2},
|
33
|
+
# proc { |arr, elem| elem > 1}
|
34
|
+
# ]
|
35
|
+
# possible output = [1,5,3]
|
36
|
+
################################################################################
|
37
|
+
# Condition array:
|
38
|
+
# Must contain boolean procs using args |arr, elem|
|
39
|
+
# 'arr' is a reference to the current array that has been built up
|
40
|
+
# through the recursion chain.
|
41
|
+
# 'elem' is a reference to the current element.
|
42
|
+
################################################################################
|
43
|
+
|
44
|
+
module Poefy
|
45
|
+
|
46
|
+
module ConditionalSatisfaction
|
47
|
+
|
48
|
+
# Delete the first matching value in an array.
|
49
|
+
def delete_first array, value
|
50
|
+
array.delete_at(array.index(value) || array.length)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Make sure each line ends with a different word.
|
54
|
+
# This is intented to be used in 'conditions' procs.
|
55
|
+
def diff_end arr, elem
|
56
|
+
!arr.map{ |i| i['final_word'] }.include?(elem['final_word'])
|
57
|
+
end
|
58
|
+
|
59
|
+
# See if a line matches to a particular 'poetic_form'
|
60
|
+
def validate_line line, poetic_form
|
61
|
+
valid = true
|
62
|
+
if poetic_form[:syllable] and poetic_form[:syllable] != 0
|
63
|
+
valid = valid && [*poetic_form[:syllable]].include?(line['syllables'])
|
64
|
+
end
|
65
|
+
if poetic_form[:regex]
|
66
|
+
valid = valid && !!(line['line'].match(poetic_form[:regex]))
|
67
|
+
end
|
68
|
+
valid
|
69
|
+
end
|
70
|
+
|
71
|
+
# Input a rhyme array and a poetic_form hash.
|
72
|
+
# Create a line by line array of conditions.
|
73
|
+
# This will be used to analyse the validity of corpus lines.
|
74
|
+
def conditions_by_line tokenised_rhyme, poetic_form
|
75
|
+
output = []
|
76
|
+
tokenised_rhyme.each.with_index do |rhyme, index|
|
77
|
+
line_hash = {
|
78
|
+
line: index + 1,
|
79
|
+
rhyme: rhyme,
|
80
|
+
rhyme_letter: rhyme[0].downcase
|
81
|
+
}
|
82
|
+
poetic_form.keys.each do |k|
|
83
|
+
if poetic_form[k].is_a? Hash
|
84
|
+
line_hash[k] = poetic_form[k][index + 1]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
output << line_hash
|
88
|
+
end
|
89
|
+
output
|
90
|
+
end
|
91
|
+
|
92
|
+
# Group by element, with count as value. Ignore spaces.
|
93
|
+
# e.g. {"A1"=>4, "b"=>6, "A2"=>4, "a"=>5}
|
94
|
+
# => {"b"=>6, "a"=>7}
|
95
|
+
def unique_rhymes tokenised_rhyme
|
96
|
+
|
97
|
+
# Group by element, with count as value. Ignore spaces.
|
98
|
+
# e.g. {"A1"=>4, "b"=>6, "A2"=>4, "a"=>5}
|
99
|
+
tokens = tokenised_rhyme.reject { |i| i == ' ' }
|
100
|
+
grouped = tokens.each_with_object(Hash.new(0)) { |k,h| h[k] += 1 }
|
101
|
+
|
102
|
+
# For each uppercase token, add one to the corresponding lowercase.
|
103
|
+
uppers = grouped.keys.select{ |i| /[[:upper:]]/.match(i) }
|
104
|
+
uppers.each { |i| grouped[i[0].downcase] += 1 }
|
105
|
+
|
106
|
+
# Delete from the grouped hash if uppercase.
|
107
|
+
grouped.delete_if { |k,v| /[[:upper:]]/.match(k) }
|
108
|
+
grouped
|
109
|
+
end
|
110
|
+
|
111
|
+
############################################################################
|
112
|
+
|
113
|
+
# Return a permutation of 'array' where each element validates to the
|
114
|
+
# same index in a 'conditions' array of procs that return Boolean.
|
115
|
+
# Will not work on arrays that contain nil values.
|
116
|
+
# This may take a whole lot of time, depending on how lenient the
|
117
|
+
# conditions are. It is better for the stricter conditions to be
|
118
|
+
# at the start of the array, due to the way the code is written.
|
119
|
+
def conditional_permutation array, conditions, current_iter = 0
|
120
|
+
output = []
|
121
|
+
|
122
|
+
# Get the current conditional.
|
123
|
+
cond = conditions[current_iter]
|
124
|
+
|
125
|
+
# Loop through and return the first element that validates.
|
126
|
+
valid = false
|
127
|
+
array.each do |elem|
|
128
|
+
|
129
|
+
# Test the condition. If we've run out of elements
|
130
|
+
# in the condition array, then allow any value.
|
131
|
+
valid = cond ? cond.call(elem) : true
|
132
|
+
if valid
|
133
|
+
|
134
|
+
# Remove this element from the array, and recurse.
|
135
|
+
remain = array.dup
|
136
|
+
delete_first(remain, elem)
|
137
|
+
|
138
|
+
# If the remaining array is empty, no need to recurse.
|
139
|
+
new_val = nil
|
140
|
+
if !remain.empty?
|
141
|
+
new_val = conditional_permutation(remain,
|
142
|
+
conditions, current_iter + 1)
|
143
|
+
end
|
144
|
+
|
145
|
+
# If we cannot use this value, because it breaks future conditions.
|
146
|
+
if !remain.empty? && new_val.empty?
|
147
|
+
valid = false
|
148
|
+
else
|
149
|
+
output << elem << new_val
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
break if valid
|
154
|
+
end
|
155
|
+
|
156
|
+
output.flatten.compact
|
157
|
+
end
|
158
|
+
|
159
|
+
# Return values from 'array' where each element validates to the same
|
160
|
+
# index in a 'conditions' array of procs that return Boolean.
|
161
|
+
# Return an array of conditions.length
|
162
|
+
def conditional_selection array, conditions,
|
163
|
+
current_iter = 0,
|
164
|
+
current_array = []
|
165
|
+
output = []
|
166
|
+
|
167
|
+
# Get the current conditional.
|
168
|
+
cond = conditions[current_iter]
|
169
|
+
|
170
|
+
# Return nil if we have reached the end of the conditionals.
|
171
|
+
return nil if cond.nil?
|
172
|
+
|
173
|
+
# Loop through and return the first element that validates.
|
174
|
+
valid = false
|
175
|
+
array.each do |elem|
|
176
|
+
|
177
|
+
# Test the condition. If we've run out of elements
|
178
|
+
# in the condition array, then allow any value.
|
179
|
+
valid = cond.call(current_array, elem)
|
180
|
+
if valid
|
181
|
+
|
182
|
+
# Remove this element from the array, and recurse.
|
183
|
+
remain = array.dup
|
184
|
+
delete_first(remain, elem)
|
185
|
+
|
186
|
+
# If the remaining array is empty, no need to recurse.
|
187
|
+
new_val = conditional_selection(remain,
|
188
|
+
conditions, current_iter + 1, current_array + [elem])
|
189
|
+
|
190
|
+
# If we cannot use this value, because it breaks future conditions.
|
191
|
+
if new_val and new_val.empty?
|
192
|
+
valid = false
|
193
|
+
else
|
194
|
+
output << elem << new_val
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
break if valid
|
199
|
+
end
|
200
|
+
|
201
|
+
output.flatten.compact
|
202
|
+
end
|
203
|
+
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
################################################################################
|
@@ -0,0 +1,252 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Encoding: UTF-8
|
3
|
+
|
4
|
+
################################################################################
|
5
|
+
# Class for connecting to a sqlite3 database.
|
6
|
+
################################################################################
|
7
|
+
|
8
|
+
require 'sqlite3'
|
9
|
+
require 'tempfile'
|
10
|
+
|
11
|
+
require_relative 'string_manipulation.rb'
|
12
|
+
require_relative 'handle_error.rb'
|
13
|
+
|
14
|
+
################################################################################
|
15
|
+
|
16
|
+
module Poefy
|
17
|
+
|
18
|
+
class Database
|
19
|
+
|
20
|
+
include Poefy::StringManipulation
|
21
|
+
include Poefy::HandleError
|
22
|
+
|
23
|
+
attr_reader :console, :db_file
|
24
|
+
|
25
|
+
# Finalizer must be a class variable.
|
26
|
+
@@final = proc { |dbase| proc {
|
27
|
+
@sproc_lines_all.close if @sproc_lines_all
|
28
|
+
@sproc_rhymes_by_count.close if @sproc_rhymes_by_count
|
29
|
+
@sproc_rhymes_by_count_syllables.close if @sproc_rhymes_by_count_syllables
|
30
|
+
dbase.close if dbase
|
31
|
+
} }
|
32
|
+
|
33
|
+
def initialize db_file, console = false
|
34
|
+
@db_file = db_file
|
35
|
+
@console = console
|
36
|
+
ObjectSpace.define_finalizer(self, @@final.call(@db))
|
37
|
+
end
|
38
|
+
|
39
|
+
# Open global database session, if not already existing.
|
40
|
+
# This is called in all methods where it is needed. So no need to
|
41
|
+
# execute it before any calling code.
|
42
|
+
def db
|
43
|
+
if not @db
|
44
|
+
begin
|
45
|
+
open
|
46
|
+
rescue
|
47
|
+
@db = nil
|
48
|
+
return handle_error 'ERROR: Database does not yet exist'
|
49
|
+
end
|
50
|
+
end
|
51
|
+
@db
|
52
|
+
end
|
53
|
+
|
54
|
+
# Open the database file.
|
55
|
+
def open
|
56
|
+
@db = SQLite3::Database.open(@db_file)
|
57
|
+
@db.results_as_hash = true
|
58
|
+
|
59
|
+
# Create a REGEX function in SQLite.
|
60
|
+
# http://stackoverflow.com/questions/7302311
|
61
|
+
@db.create_function('regexp', 2) do |func, pattern, expression|
|
62
|
+
regexp = Regexp.new(pattern.to_s, Regexp::IGNORECASE)
|
63
|
+
func.result = expression.to_s.match(regexp) ? 1 : 0
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Close the database file.
|
68
|
+
def close
|
69
|
+
@sproc_lines_all.close if @sproc_lines_all
|
70
|
+
@sproc_rhymes_by_count.close if @sproc_rhymes_by_count
|
71
|
+
@sproc_rhymes_by_count_syllables.close if @sproc_rhymes_by_count_syllables
|
72
|
+
db.close
|
73
|
+
end
|
74
|
+
|
75
|
+
# See if the database file exists or not.
|
76
|
+
def exists?
|
77
|
+
File.exists?(@db_file)
|
78
|
+
end
|
79
|
+
|
80
|
+
# Creates a database with the correct format.
|
81
|
+
# Convert input lines array to SQL import format file.
|
82
|
+
# Delete database if already exists.
|
83
|
+
# Create database using SQL import file.
|
84
|
+
# Delete both files.
|
85
|
+
def make_new lines
|
86
|
+
make_new!(lines) if !exists?
|
87
|
+
end
|
88
|
+
|
89
|
+
# Force new database, overwriting existing.
|
90
|
+
def make_new! lines
|
91
|
+
|
92
|
+
# Convert the lines array into an import file.
|
93
|
+
sql_import_file = save_sql_import_file lines
|
94
|
+
|
95
|
+
# Delete any existing database.
|
96
|
+
File.delete(@db_file) rescue nil
|
97
|
+
|
98
|
+
# Write SQL and SQLite instructions to temp file,
|
99
|
+
# import to database, delete temp file.
|
100
|
+
# The SQL file is finicky. Each line requires no leading whitespace.
|
101
|
+
sql_instruction_file = tmpfile
|
102
|
+
sql = %Q[
|
103
|
+
CREATE TABLE IF NOT EXISTS lines (
|
104
|
+
line TEXT, syllables INT, final_word TEXT, rhyme TEXT
|
105
|
+
);
|
106
|
+
CREATE INDEX idx ON lines (rhyme, final_word, line);
|
107
|
+
.separator "\t"
|
108
|
+
.import #{sql_import_file} lines
|
109
|
+
].split("\n").map(&:strip).join("\n")
|
110
|
+
File.open(sql_instruction_file, 'w') { |fo| fo.write sql }
|
111
|
+
|
112
|
+
# Create the database using the SQL instructions.
|
113
|
+
`sqlite3 #{@db_file} < #{sql_instruction_file}`
|
114
|
+
|
115
|
+
# Delete temporary files.
|
116
|
+
File.delete sql_instruction_file
|
117
|
+
File.delete sql_import_file
|
118
|
+
end
|
119
|
+
|
120
|
+
# Execute an SQL request.
|
121
|
+
def execute! sql
|
122
|
+
begin
|
123
|
+
db.execute sql
|
124
|
+
rescue
|
125
|
+
return handle_error 'ERROR: Database has incorrect table structure', []
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Format a string for SQL.
|
130
|
+
def format_sql_string string
|
131
|
+
string.gsub('"','""')
|
132
|
+
end
|
133
|
+
|
134
|
+
# Public interfaces for private stored procedure methods.
|
135
|
+
def sproc_rhymes_all! rhyme_count, syllable_min_max = nil
|
136
|
+
if syllable_min_max
|
137
|
+
sproc_rhymes_by_count_syllables rhyme_count, syllable_min_max
|
138
|
+
else
|
139
|
+
sproc_rhymes_by_count rhyme_count
|
140
|
+
end
|
141
|
+
end
|
142
|
+
def sproc_lines_all! rhyme
|
143
|
+
sproc_lines_all rhyme
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
# Turn an array of string lines into an SQL import file.
|
149
|
+
# Format is "line, final_word, rhyme, syllables"
|
150
|
+
# Use tabs as delimiters.
|
151
|
+
def save_sql_import_file lines
|
152
|
+
sql_lines = []
|
153
|
+
lines.map do |line|
|
154
|
+
next if Wordfilter.blacklisted? line
|
155
|
+
line_ = format_sql_string line
|
156
|
+
final = line.to_phrase.last_word.downcase rescue ''
|
157
|
+
|
158
|
+
final_ = format_sql_string final
|
159
|
+
syll = syllables line
|
160
|
+
get_rhymes(line).each do |rhyme|
|
161
|
+
rhyme_ = format_sql_string rhyme
|
162
|
+
sql_lines << "\"#{line_}\"\t#{syll}\t\"#{final_}\"\t\"#{rhyme_}\""
|
163
|
+
end
|
164
|
+
end
|
165
|
+
sql_file = tmpfile
|
166
|
+
File.open(sql_file, 'w') { |fo| fo.puts sql_lines }
|
167
|
+
sql_file
|
168
|
+
end
|
169
|
+
|
170
|
+
# Generate a random temporary file.
|
171
|
+
def tmpfile
|
172
|
+
Dir::Tmpname.make_tmpname ['tmp-','.txt'], nil
|
173
|
+
end
|
174
|
+
|
175
|
+
##########################################################################
|
176
|
+
|
177
|
+
# Find rhymes and counts greater than a certain length.
|
178
|
+
def sproc_rhymes_by_count rhyme_count
|
179
|
+
if not @sproc_rhymes_by_count
|
180
|
+
sql = %Q[
|
181
|
+
SELECT rhyme, COUNT(rhyme) AS rc
|
182
|
+
FROM (
|
183
|
+
SELECT rhyme, final_word, COUNT(final_word) AS wc
|
184
|
+
FROM lines
|
185
|
+
GROUP BY rhyme, final_word
|
186
|
+
)
|
187
|
+
GROUP BY rhyme
|
188
|
+
HAVING rc >= ?
|
189
|
+
]
|
190
|
+
begin
|
191
|
+
@sproc_rhymes_by_count = db.prepare sql
|
192
|
+
rescue
|
193
|
+
return handle_error 'ERROR: Database table structure is invalid'
|
194
|
+
end
|
195
|
+
end
|
196
|
+
@sproc_rhymes_by_count.reset!
|
197
|
+
@sproc_rhymes_by_count.bind_param(1, rhyme_count)
|
198
|
+
@sproc_rhymes_by_count.execute.to_a
|
199
|
+
end
|
200
|
+
|
201
|
+
# Also adds syllable selection.
|
202
|
+
def sproc_rhymes_by_count_syllables rhyme_count, syllable_min_max
|
203
|
+
if not @sproc_rhymes_by_count_syllables
|
204
|
+
sql = %Q[
|
205
|
+
SELECT rhyme, COUNT(rhyme) AS rc
|
206
|
+
FROM (
|
207
|
+
SELECT rhyme, final_word, COUNT(final_word) AS wc
|
208
|
+
FROM lines
|
209
|
+
WHERE syllables BETWEEN ? AND ?
|
210
|
+
GROUP BY rhyme, final_word
|
211
|
+
)
|
212
|
+
GROUP BY rhyme
|
213
|
+
HAVING rc >= ?
|
214
|
+
]
|
215
|
+
begin
|
216
|
+
@sproc_rhymes_by_count_syllables = db.prepare sql
|
217
|
+
rescue
|
218
|
+
return handle_error 'ERROR: Database table structure is invalid'
|
219
|
+
end
|
220
|
+
end
|
221
|
+
@sproc_rhymes_by_count_syllables.reset!
|
222
|
+
@sproc_rhymes_by_count_syllables.bind_param(1, syllable_min_max[:min])
|
223
|
+
@sproc_rhymes_by_count_syllables.bind_param(2, syllable_min_max[:max])
|
224
|
+
@sproc_rhymes_by_count_syllables.bind_param(3, rhyme_count)
|
225
|
+
@sproc_rhymes_by_count_syllables.execute.to_a
|
226
|
+
end
|
227
|
+
|
228
|
+
##########################################################################
|
229
|
+
|
230
|
+
# Find all lines for a certain rhyme.
|
231
|
+
def sproc_lines_all rhyme
|
232
|
+
if not @sproc_lines_all
|
233
|
+
sql = %Q[
|
234
|
+
SELECT line, syllables, final_word, rhyme
|
235
|
+
FROM lines WHERE rhyme = ?
|
236
|
+
]
|
237
|
+
begin
|
238
|
+
@sproc_lines_all = db.prepare sql
|
239
|
+
rescue
|
240
|
+
return handle_error 'ERROR: Database table structure is invalid'
|
241
|
+
end
|
242
|
+
end
|
243
|
+
@sproc_lines_all.reset!
|
244
|
+
@sproc_lines_all.bind_param(1, rhyme)
|
245
|
+
@sproc_lines_all.execute.to_a
|
246
|
+
end
|
247
|
+
|
248
|
+
end
|
249
|
+
|
250
|
+
end
|
251
|
+
|
252
|
+
################################################################################
|