poefy 0.6.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/poefy_make ADDED
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/env ruby
2
+ # Encoding: UTF-8
3
+
4
+ ################################################################################
5
+ # Determine which database interface to use, based on the gems installed.
6
+ # Generate initial corpora.
7
+ ################################################################################
8
+
9
+ require_relative '../lib/poefy.rb'
10
+
11
+ Poefy.console = true
12
+
13
+ ################################################################################
14
+
15
+ # Determine which database interface to use, based on the gems installed.
16
+ # Attempt to load exactly one of the below files.
17
+ # Array is ordered by priority, so use PostgreSQL before SQLite.
18
+ def determine_database_interface
19
+ loaded_file = nil
20
+ [
21
+ 'poefy/pg',
22
+ 'poefy/sqlite3'
23
+ ].each do |file|
24
+ begin
25
+ require_relative file
26
+ loaded_file = File.basename(file)
27
+ break
28
+ rescue LoadError
29
+ end
30
+ end
31
+
32
+ # Exit and send error to the console if no file loaded.
33
+ if loaded_file.nil?
34
+ msg = "ERROR: Please specify the type of database to use." +
35
+ "\n The 'poefy' gem does not implement a database interface" +
36
+ "\n by default; you must install one of the below gems:" +
37
+ "\n gem install poefy-sqlite3" +
38
+ "\n gem install poefy-pg"
39
+ STDERR.puts msg
40
+ exit 1
41
+ end
42
+
43
+ loaded_file
44
+ end
45
+
46
+ # If the user already has a database interface setup, then use that.
47
+ # If they don't, then determine based off installed gems.
48
+ if Poefy.database_type(false).nil?
49
+ Poefy.database_type = determine_database_interface
50
+ end
51
+
52
+ # Should already be setup, but just run this to confirm no error is thrown.
53
+ Poefy.require_db
54
+
55
+ ################################################################################
56
+
57
+ # Create corpora from the text files included with the repository.
58
+ # Exclude all lines which do not contain lowercase letters.
59
+ def make_db database, textfile, description
60
+ file = Poefy.root + '/data/' + textfile
61
+ input = File.readlines(file).keep_if { |i| i =~ /[a-z]/ }
62
+ poefy = Poefy::Poem.new database
63
+ poefy.make_database! input, description
64
+ poefy.close
65
+ end
66
+
67
+ [
68
+ [
69
+ 'shakespeare',
70
+ 'shakespeare_sonnets.txt',
71
+ "Shakespeare's sonnets"
72
+ ],[
73
+ 'therese',
74
+ 'st_therese_of_lisieux.txt',
75
+ "St. Thérèse of Lisieux"
76
+ ],[
77
+ 'whitman',
78
+ 'whitman_leaves.txt',
79
+ "Walt Whitman, Leaves of Grass"
80
+ ],[
81
+ 'dickinson',
82
+ 'emily_dickinson.txt',
83
+ "Emily Dickinson"
84
+ ],[
85
+ 'spoke',
86
+ 'english_as_she_is_spoke.txt',
87
+ "English As She Is Spoke"
88
+ ]
89
+ ].each do |i|
90
+ make_db(*i)
91
+ end
92
+
93
+ ################################################################################
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env ruby
2
+ # Encoding: UTF-8
3
+
4
+ ################################################################################
5
+ # Code for interfacing with the 'conditional_sample' gem.
6
+ ################################################################################
7
+
8
+ module Poefy
9
+
10
+ module ConditionalSample
11
+
12
+ # Delete the first matching value in an array.
13
+ def delete_first array, value
14
+ array.delete_at(array.index(value) || array.length)
15
+ end
16
+
17
+ # Make sure each line ends with a different word.
18
+ # This is intented to be used in 'conditions' procs.
19
+ def diff_end arr, elem
20
+ !arr.map{ |i| i['final_word'] }.include?(elem['final_word'])
21
+ end
22
+
23
+ # See if a line matches to a particular 'poetic_form'
24
+ def validate_line line, poetic_form
25
+ valid = true
26
+ if poetic_form[:syllable] and poetic_form[:syllable] != 0
27
+ valid = valid && [*poetic_form[:syllable]].include?(line['syllables'])
28
+ end
29
+ if poetic_form[:regex]
30
+ [*poetic_form[:regex]].each do |i|
31
+ valid = valid && !!(line['line'].match(i))
32
+ end
33
+ end
34
+ valid
35
+ end
36
+
37
+ # Input a rhyme array and a poetic_form hash.
38
+ # Create a line by line array of conditions.
39
+ # This will be used to analyse the validity of corpus lines.
40
+ def conditions_by_line tokenised_rhyme, poetic_form
41
+ output = []
42
+ tokenised_rhyme.each.with_index do |rhyme, index|
43
+ line_hash = {
44
+ line: index + 1,
45
+ rhyme: rhyme[:token],
46
+ rhyme_letter: rhyme[:rhyme_letter]
47
+ }
48
+ if rhyme[:refrain] and rhyme[:refrain] != ' '
49
+ line_hash[:refrain] = rhyme[:refrain]
50
+ end
51
+ line_hash[:exact] = rhyme[:exact] if rhyme[:exact]
52
+ poetic_form.keys.each do |k|
53
+ if poetic_form[k].is_a? Hash
54
+ line_hash[k] = poetic_form[k][index + 1]
55
+ end
56
+ end
57
+ output << line_hash
58
+ end
59
+ output
60
+ end
61
+
62
+ # Group by element, with count as value. Ignore spaces.
63
+ # e.g. {"A1"=>4, "b"=>6, "A2"=>4, "a"=>5}
64
+ # => {"b"=>6, "a"=>7}
65
+ def unique_rhymes tokenised_rhyme
66
+
67
+ # Group by element, with count as value. Ignore spaces.
68
+ # e.g. {"A1"=>4, "b"=>6, "A2"=>4, "a"=>5}
69
+ tokens = tokenised_rhyme.reject { |i| i == ' ' }
70
+ grouped = tokens.each_with_object(Hash.new(0)) { |k,h| h[k] += 1 }
71
+
72
+ # For each uppercase token, add one to the corresponding lowercase.
73
+ uppers = grouped.keys.select{ |i| /[[:upper:]]/.match(i) }
74
+ uppers.each { |i| grouped[i[0].downcase] += 1 }
75
+
76
+ # Delete from the grouped hash if uppercase.
77
+ grouped.delete_if { |k,v| /[[:upper:]]/.match(k) }
78
+ grouped
79
+ end
80
+
81
+ end
82
+
83
+ end
84
+
85
+ ################################################################################
@@ -5,60 +5,99 @@
5
5
  # Monkey patch the Array class.
6
6
  ################################################################################
7
7
 
8
- # [array] is the same array as [self], but ordered by closeness to the index.
9
- # Optionally pass an integer, for results for just that index element.
10
- # Returns a Struct, or an array of Structs, in the form:
11
- # .index => original index
12
- # .value => original element
13
- # .array => self array minus value, ordered by closeness to index
14
- # Example usage:
15
- # lines = (1..4).to_a * 2
16
- # puts lines.by_distance
17
- # puts lines.by_distance(3)
18
- # lines.by_distance(3).each { ... }
8
+ #--
9
+ # Declare module structure.
10
+ #++
19
11
  module Poefy
20
12
  module CoreExtensions
13
+ module Array
14
+ module SortByDistance
15
+ end
16
+ end
17
+ end
18
+ end
21
19
 
22
- # Output struct for #by_distance method.
23
- # Array is the most useful data, but index and value are also kept.
24
- IndexValueArray = Struct.new(:index, :value, :array) do
25
- alias_method :to_a, :array
26
- include Enumerable
27
- def each &block
28
- array.each do |i|
29
- block.call i
20
+ #--
21
+ # Define module methods.
22
+ #++
23
+ module Poefy::CoreExtensions::Array::SortByDistance
24
+
25
+ ##
26
+ # Take an array index and return a permutation of the
27
+ # items sorted by distance from that index.
28
+ # If 'index' is not specified, return an Enumerator
29
+ # of the results for all indices, in order.
30
+ #
31
+ # The ':reverse' keyword argument switches the equally close
32
+ # neighbours from lowest index first to highest first.
33
+ # It's an option added mostly for completeness, but it's
34
+ # there if you need it.
35
+ #
36
+ def sort_by_distance_from_index index = nil, reverse: false
37
+
38
+ # Return Enumerator of all possible output arrays.
39
+ if index.nil?
40
+ Enumerator.new(self.count) do |y|
41
+ self.each.with_index do |value, index|
42
+ y << self.sort_by_distance_from_index(index, reverse: reverse)
30
43
  end
31
44
  end
32
- end
33
45
 
34
- module Array
46
+ # Return Enumerator of results for a single index.
47
+ else
48
+ Enumerator.new(self.count) do |y|
49
+ y << self[index]
50
+ counter = 0
51
+ loop do
52
+ counter += 1
35
53
 
36
- def by_distance index = nil
37
- if index.nil?
38
- self.map.with_index do |value, index|
39
- self.by_distance index
40
- end
41
- else
42
- others, counter = [], 0
43
- loop do
44
- counter += 1
45
- below_index = index - counter
46
- below_index = nil if below_index < 0
47
- below = self[below_index] if below_index
48
- above = self[index + counter]
49
- others << below if below
50
- others << above if above
51
- break if !above and !below
54
+ # Consider negative indices OOB, not from array tail.
55
+ below_index = index - counter
56
+ below_index = nil if below_index < 0
57
+ below = self[below_index] if below_index
58
+
59
+ # This is fine, uses nil as default value if OOB.
60
+ above = self[index + counter]
61
+
62
+ # Both the elements with index one higher and one lower
63
+ # are equally close neighbours to the subject element.
64
+ # The default is to output the element with the lowest
65
+ # index first. With ':reverse' set to true, the highest
66
+ # index is appended first.
67
+ if reverse
68
+ y << above if above
69
+ y << below if below
70
+ else
71
+ y << below if below
72
+ y << above if above
52
73
  end
53
- IndexValueArray.new(index, self[index], others)
74
+
75
+ # Break if we're at the last element.
76
+ break if !above and !below
54
77
  end
55
78
  end
56
79
  end
57
80
  end
81
+
82
+ ##
83
+ # Find all elements that match 'value' and return the
84
+ # sort_by_distance results for all, as an Enumerator.
85
+ #
86
+ def sort_by_distance_from_value value = nil, reverse: false
87
+ matching = self.each_index.select { |i| self[i] == value }
88
+ Enumerator.new(matching.count) do |y|
89
+ matching.each do |index|
90
+ y << self.sort_by_distance_from_index(index, reverse: reverse)
91
+ end
92
+ end
93
+ end
58
94
  end
59
95
 
96
+ #--
97
+ # Extend Array class.
98
+ #++
60
99
  class Array
61
- include Poefy::CoreExtensions::Array
100
+ include Poefy::CoreExtensions::Array::SortByDistance
62
101
  end
63
102
 
64
103
  ################################################################################
@@ -2,12 +2,10 @@
2
2
  # Encoding: UTF-8
3
3
 
4
4
  ################################################################################
5
- # Class for connecting to a sqlite3 database.
5
+ # Base class for connecting to a database.
6
+ # Install gem 'poefy-sqlite3' or 'poefy-pg' for implementation.
6
7
  ################################################################################
7
8
 
8
- require 'sqlite3'
9
- require 'tempfile'
10
-
11
9
  require_relative 'string_manipulation.rb'
12
10
  require_relative 'handle_error.rb'
13
11
 
@@ -20,38 +18,38 @@ module Poefy
20
18
  include Poefy::StringManipulation
21
19
  include Poefy::HandleError
22
20
 
23
- attr_reader :console, :db_file
24
-
25
- # Finalizer must be a class variable.
26
- @@final = proc { |dbase, sproc| proc {
27
- sproc.each { |k, v| v.close }
28
- dbase.close if dbase
29
- } }
21
+ attr_reader :name, :local
30
22
 
31
- def initialize db_file, console = false
32
- @db_file = db_file
33
- @console = console
23
+ def initialize name, local = false
24
+ @local = local
25
+ @name = name.to_s
34
26
  @sproc = {}
27
+ type
35
28
  db
36
- ObjectSpace.define_finalizer(self, @@final.call(@db, @sproc))
37
29
  end
38
30
 
39
- # Open global database session, if not already existing.
40
- # This is called in all methods where it is needed. So no need to
41
- # execute it before any calling code.
31
+ ############################################################################
32
+
33
+ # Validate that a database type has been required.
34
+ # This will be overwritten by a database-specific method,
35
+ # so raise an error if no database has been specified yet.
36
+ # Due to the way 'bin/poefy' is set up, that code will fail before
37
+ # this point is reached, so this error is only from Ruby calls.
38
+ def type
39
+ msg = "No database interface specified. " +
40
+ "Please require 'poefy/sqlite3' or 'poefy/pg'"
41
+ raise LoadError, msg
42
+ end
43
+
44
+ # Open instance database session, if not already existing.
42
45
  def db
43
- if not @db
44
- if !exists?
45
- @db = nil
46
- else
47
- begin
48
- @db = SQLite3::Database.open(@db_file)
49
- @db.results_as_hash = true
50
- rescue
51
- @db = nil
52
- return handle_error 'ERROR: Database contains invalid structure'
53
- end
46
+ if not @db and exists?
47
+ begin
48
+ open_connection
54
49
  create_sprocs
50
+ rescue
51
+ @db = nil
52
+ return handle_error 'ERROR: Database contains invalid structure'
55
53
  end
56
54
  end
57
55
  @db
@@ -64,68 +62,37 @@ module Poefy
64
62
  @db = nil
65
63
  end
66
64
 
67
- # See if the database file exists or not.
68
- def exists?
69
- File.exists?(@db_file)
70
- end
71
-
72
65
  # Creates a database with the correct format.
73
66
  # Convert input lines array to SQL import format file.
74
67
  # Delete database if already exists.
75
68
  # Create database using SQL import file.
76
69
  # Delete both files.
77
- def make_new lines
78
- make_new!(lines) if !exists?
70
+ def make_new lines, description = nil
71
+ make_new!(lines, description) if !exists?
79
72
  end
80
73
 
81
74
  # Force new database, overwriting existing.
82
- def make_new! lines
75
+ def make_new! lines, description = nil
83
76
 
84
- # Convert the lines array into an import file.
85
- sql_import_file = save_sql_import_file lines
77
+ # Create a new database.
78
+ new_connection
86
79
 
87
- # Delete any existing database.
88
- File.delete(@db_file) if File.exists?(@db_file)
80
+ # Create the lines table and the index.
81
+ create_table table, description
89
82
 
90
- # Write SQL and SQLite instructions to temp file,
91
- # import to database, delete temp file.
92
- # The SQL file is finicky. Each line requires no leading whitespace.
93
- sql_instruction_file = tmpfile
94
- sql = %Q[
95
- CREATE TABLE IF NOT EXISTS lines (
96
- line TEXT, syllables INT, final_word TEXT, rhyme TEXT
97
- );
98
- CREATE INDEX idx ON lines (rhyme, final_word, line);
99
- .separator "\t"
100
- .import #{sql_import_file} lines
101
- ].split("\n").map(&:strip).join("\n")
102
- File.open(sql_instruction_file, 'w') { |fo| fo.write sql }
83
+ # Convert the lines array into an expanded array of rhyme metadata.
84
+ import_data = lines_rhyme_metadata lines
103
85
 
104
- # Create the database using the SQL instructions.
105
- `sqlite3 #{@db_file} < #{sql_instruction_file}`
86
+ # Import the data.
87
+ insert_lines table, import_data
106
88
 
107
- # Delete temporary files.
108
- File.delete sql_instruction_file
109
- File.delete sql_import_file
110
- end
111
-
112
- # Execute an SQL request.
113
- def execute! sql
114
- begin
115
- db.execute sql
116
- rescue
117
- return handle_error 'ERROR: Database has incorrect table structure', []
118
- end
119
- end
120
-
121
- # Format a string for SQL.
122
- def format_sql_string string
123
- string.gsub('"','""')
89
+ # Recreate the stored procedures.
90
+ create_sprocs
124
91
  end
125
92
 
126
93
  # Public interfaces for private stored procedure methods.
127
94
  # Use instance variables to keep a cache of the results.
128
- def sproc_rhymes_all! rhyme_count, syllable_min_max = nil
95
+ def rhymes_by_count rhyme_count, syllable_min_max = nil
129
96
  db
130
97
  @rbc = Hash.new { |h,k| h[k] = {} } if @rbc.nil?
131
98
  if @rbc[rhyme_count][syllable_min_max].nil?
@@ -137,14 +104,14 @@ module Poefy
137
104
  end
138
105
  @rbc[rhyme_count][syllable_min_max].dup
139
106
  end
140
- def sproc_lines_all! rhyme, syllable_min_max = nil
107
+ def lines_by_rhyme rhyme, syllable_min_max = nil
141
108
  db
142
109
  @la = Hash.new { |h,k| h[k] = {} } if @la.nil?
143
110
  if @la[rhyme][syllable_min_max].nil?
144
111
  @la[rhyme][syllable_min_max] = if syllable_min_max
145
- sproc_lines_all_syllables(rhyme, syllable_min_max)
112
+ sproc_lines_by_rhyme_syllables(rhyme, syllable_min_max)
146
113
  else
147
- sproc_lines_all(rhyme)
114
+ sproc_lines_by_rhyme(rhyme)
148
115
  end
149
116
  end
150
117
  @la[rhyme][syllable_min_max].dup
@@ -152,123 +119,33 @@ module Poefy
152
119
 
153
120
  private
154
121
 
155
- # Turn an array of string lines into an SQL import file.
156
- # Format is "line, final_word, rhyme, syllables"
157
- # Use tabs as delimiters.
158
- def save_sql_import_file lines
159
- sql_lines = []
122
+ ##########################################################################
123
+
124
+ # For each line, figure out the needed rhyme metadata.
125
+ # Output is an array: [line, final_word, rhyme, syllables]
126
+ def lines_rhyme_metadata lines
127
+ output = []
160
128
  lines.map do |line|
161
129
 
162
130
  # Don't add the line if it contains a blacklisted? substring.
163
131
  next if Wordfilter.blacklisted? line
164
132
 
165
- # Format the line for SQL parsing.
166
- line_ = format_sql_string line
167
-
168
133
  # Get the phrase info for the line.
169
134
  phrase = phrase_info line
170
135
  syll = phrase[:syllables]
171
136
  rhymes = phrase[:rhymes]
172
- final_ = format_sql_string phrase[:last_word]
137
+ final = phrase[:last_word]
173
138
 
174
139
  # There may be more than one rhyme, so add a database
175
140
  # record for each rhyme.
176
141
  rhymes.each do |rhyme|
177
- rhyme_ = format_sql_string rhyme
178
- sql_lines << "\"#{line_}\"\t#{syll}\t\"#{final_}\"\t\"#{rhyme_}\""
142
+ output << [line, syll, final, rhyme]
179
143
  end
180
144
  end
181
145
 
182
- # Save the SQL spec to a temporary file, and return the filename.
183
- sql_file = tmpfile
184
- File.open(sql_file, 'w') { |fo| fo.puts sql_lines }
185
- sql_file
146
+ output
186
147
  end
187
148
 
188
- # Generate a random temporary file.
189
- def tmpfile
190
- Dir::Tmpname.make_tmpname ['tmp-','.txt'], nil
191
- end
192
-
193
- ##########################################################################
194
-
195
- # Define all stored procedures.
196
- def create_sprocs
197
- sql = {}
198
- sql[:rbc] = %Q[
199
- SELECT rhyme, COUNT(rhyme) AS rc
200
- FROM (
201
- SELECT rhyme, final_word, COUNT(final_word) AS wc
202
- FROM lines
203
- GROUP BY rhyme, final_word
204
- )
205
- GROUP BY rhyme
206
- HAVING rc >= ?
207
- ]
208
- sql[:rbcs] = %Q[
209
- SELECT rhyme, COUNT(rhyme) AS rc
210
- FROM (
211
- SELECT rhyme, final_word, COUNT(final_word) AS wc
212
- FROM lines
213
- WHERE syllables BETWEEN ? AND ?
214
- GROUP BY rhyme, final_word
215
- )
216
- GROUP BY rhyme
217
- HAVING rc >= ?
218
- ]
219
- sql[:la] = %Q[
220
- SELECT line, syllables, final_word, rhyme
221
- FROM lines WHERE rhyme = ?
222
- ]
223
- sql[:las] = %Q[
224
- SELECT line, syllables, final_word, rhyme
225
- FROM lines WHERE rhyme = ?
226
- AND syllables BETWEEN ? AND ?
227
- ]
228
- sql.each do |key, value|
229
- begin
230
- @sproc[key] = db.prepare value
231
- rescue
232
- raise 'ERROR: Database table structure is invalid'
233
- return handle_error 'ERROR: Database table structure is invalid'
234
- end
235
- end
236
- end
237
-
238
- # Find rhymes and counts greater than a certain length.
239
- def sproc_rhymes_by_count rhyme_count
240
- @sproc[:rbc].reset!
241
- @sproc[:rbc].bind_param(1, rhyme_count)
242
- @sproc[:rbc].execute.to_a
243
- end
244
-
245
- # Also adds syllable selection.
246
- def sproc_rhymes_by_count_syllables rhyme_count, syllable_min_max
247
- @sproc[:rbcs].reset!
248
- @sproc[:rbcs].bind_param(1, syllable_min_max[:min])
249
- @sproc[:rbcs].bind_param(2, syllable_min_max[:max])
250
- @sproc[:rbcs].bind_param(3, rhyme_count)
251
- @sproc[:rbcs].execute.to_a
252
- end
253
-
254
- # Find all lines for a certain rhyme.
255
- def sproc_lines_all rhyme
256
- @sproc[:la].reset!
257
- @sproc[:la].bind_param(1, rhyme)
258
- @sproc[:la].execute.to_a
259
- end
260
-
261
- # Also adds syllable selection.
262
- def sproc_lines_all_syllables rhyme, syllable_min_max
263
- @sproc[:las].reset!
264
- @sproc[:las].bind_param(1, rhyme)
265
- @sproc[:las].bind_param(2, syllable_min_max[:min])
266
- @sproc[:las].bind_param(3, syllable_min_max[:max])
267
- @sproc[:las].execute.to_a
268
- end
269
-
270
- ##########################################################################
271
-
272
149
  end
273
150
 
274
151
  end
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env ruby
2
+ # Encoding: UTF-8
3
+
4
+ ################################################################################
5
+ # Methods for selecting which database interface to use.
6
+ # And for including the correct gem, based on that choice.
7
+ ################################################################################
8
+
9
+ require 'yaml'
10
+
11
+ ################################################################################
12
+
13
+ module Poefy
14
+
15
+ # Are we running this through the console? (Or as a Ruby library?)
16
+ def self.console= bool
17
+ @@console = !!bool
18
+ end
19
+ def self.console
20
+ @@console ||= false
21
+ end
22
+
23
+ # View and amend the database type in the 'settings' file.
24
+ def self.database_type= db_name
25
+ settings_file = Poefy.root + '/settings.yml'
26
+ File.open(settings_file, 'w') do |file|
27
+ hsh = {'database' => db_name}
28
+ file.write hsh.to_yaml
29
+ end
30
+ end
31
+ def self.database_type create_file = true
32
+ settings_file = Poefy.root + '/settings.yml'
33
+ if not File.exists?(settings_file)
34
+ return nil if !create_file
35
+ Poefy.database_type = 'pg'
36
+ end
37
+ YAML::load_file(settings_file)['database']
38
+ end
39
+
40
+ # Requires the chosen database interface gem.
41
+ def self.require_db db_interface_gem = nil
42
+ begin
43
+ require 'poefy/' + (db_interface_gem || Poefy.database_type)
44
+
45
+ # Exit and send error to the console if no file loaded.
46
+ rescue LoadError
47
+ if loaded_file.nil?
48
+ msg = "ERROR: Please specify the type of database to use." +
49
+ "\n The 'poefy' gem does not implement a database interface" +
50
+ "\n by default; you must install one of the below gems:" +
51
+ "\n gem install poefy-sqlite3" +
52
+ "\n gem install poefy-pg"
53
+ if Poefy.console
54
+ STDERR.puts msg
55
+ exit 1
56
+ end
57
+ raise msg
58
+ end
59
+ end
60
+ end
61
+
62
+ end
63
+
64
+ ################################################################################