poetize 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +6 -0
- data/Gemfile +4 -0
- data/README.md +55 -0
- data/Rakefile +1 -0
- data/analyzer/analyzer.rb +44 -0
- data/analyzer/strophe_analyzer.rb +84 -0
- data/bin/poetize +37 -0
- data/lib/file_handler.rb +16 -0
- data/lib/hyphenator.rb +78 -0
- data/lib/poetize.rb +5 -0
- data/lib/poetize/version.rb +3 -0
- data/lib/text_handler.rb +69 -0
- data/model/poem.rb +25 -0
- data/model/strophe.rb +20 -0
- data/poetize.gemspec +24 -0
- data/resources/syllables_dict.txt +156176 -0
- data/test/file_handler_spec.rb +18 -0
- data/test/hyphenator_spec.rb +25 -0
- data/test/poem_spec.rb +22 -0
- data/test/samples/01.txt +34 -0
- data/test/samples/02_ballad.txt +26 -0
- data/test/samples/02_doubled.txt +62 -0
- data/test/samples/02_full.txt +32 -0
- data/test/samples/02_original.txt +30 -0
- data/test/spec_helper.rb +28 -0
- data/test/strophe_analyzer_spec.rb +33 -0
- data/test/strophe_spec.rb +17 -0
- data/test/text_handler_spec.rb +20 -0
- metadata +103 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
#ABOUT
|
2
|
+
|
3
|
+
Our goal is to provide a tool that parses a poem and analyzes it according to the following:
|
4
|
+
|
5
|
+
* Strophes
|
6
|
+
* Name (based on number of verses) [done]
|
7
|
+
* Type (based on verse metrics)
|
8
|
+
* Verses
|
9
|
+
* Syllable division
|
10
|
+
* Grammatical
|
11
|
+
* Poetical
|
12
|
+
* Metric and name
|
13
|
+
* Cadency (rhythm)
|
14
|
+
* Poem
|
15
|
+
* Number os strophes [done]
|
16
|
+
* Number of verses [done]
|
17
|
+
* Rhymes
|
18
|
+
* Name (based on strophe structure)
|
19
|
+
|
20
|
+
We are focusing in the portuguese (brazillian) language since it's our native language and phonetics vary from language to language.
|
21
|
+
|
22
|
+
For more details and examples, see our wiki: https://github.com/lfilho/poetize/wiki
|
23
|
+
|
24
|
+
#USAGE
|
25
|
+
|
26
|
+
Certify you chmod the poetize file to be an executable:
|
27
|
+
|
28
|
+
`$chmod +x poetize`
|
29
|
+
|
30
|
+
Then run in your shell:
|
31
|
+
|
32
|
+
`$ poetize file_path [, ignore_line[, ignore_line[,...]]]`
|
33
|
+
|
34
|
+
The ignore_line argument is the line(s) you want the program to ignore (i.e. the title, author, date created, dedicated to, etc...)
|
35
|
+
|
36
|
+
##Usage examples:
|
37
|
+
|
38
|
+
`poetize ./samples/my_poem.txt`
|
39
|
+
|
40
|
+
`poetize /home/user/docs/love_poem.txt 1 2 13 14`
|
41
|
+
#REFERENCES
|
42
|
+
|
43
|
+
Good site for learning about "poetic science":
|
44
|
+
|
45
|
+
* For portuguese language:
|
46
|
+
* http://mpbsapiens.com/ciencia-poetica/
|
47
|
+
* http://pt.wikipedia.org/wiki/Versifica%C3%A7%C3%A3o
|
48
|
+
* http://www.portaldalinguaportuguesa.org/?action=divisao&page=present
|
49
|
+
|
50
|
+
* For english language:
|
51
|
+
* http://en.wikipedia.org/wiki/Poem
|
52
|
+
|
53
|
+
#ABOUT THE DEVELOPERS
|
54
|
+
|
55
|
+
We started this project to learn Ruby and TDD so expect it to be noobly coded :-) Feel free to refactor the code and contribute to our learning experience ;-)
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
$app_path = File.dirname(__FILE__) + '/../'
|
3
|
+
require $app_path + 'lib/file_handler'
|
4
|
+
require $app_path + 'lib/hyphenator'
|
5
|
+
require $app_path + 'analyzer/strophe_analyzer'
|
6
|
+
|
7
|
+
class Analyzer
|
8
|
+
def initialize(poem)
|
9
|
+
@poem = poem
|
10
|
+
@ident = 3
|
11
|
+
end
|
12
|
+
|
13
|
+
def analyze
|
14
|
+
prefix = ' ' * @ident + '- '
|
15
|
+
output = "This poem:\n" + prefix +
|
16
|
+
"Contains #{@poem.number_of_strophes} strophes;\n" + prefix +
|
17
|
+
"Contains #{@poem.number_of_verses} verses;\n" +
|
18
|
+
"As for the strophes:\n"
|
19
|
+
output += analyze_strophes.reduce(''){|out, n| out + prefix + n + "\n"}
|
20
|
+
output += "Grammatical syllable divison:\n"
|
21
|
+
output += divide_grammatically
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def analyze_strophes
|
27
|
+
strophe_analyzer = StropheAnalyzer.new @poem
|
28
|
+
strophe_analyzer.analyze
|
29
|
+
end
|
30
|
+
|
31
|
+
def divide_grammatically
|
32
|
+
hyphenated = ''
|
33
|
+
@poem.body.each do |line|
|
34
|
+
new_line = ' ' * @ident
|
35
|
+
line.split(' ').each do |word|
|
36
|
+
hyp = Hyphenator.new
|
37
|
+
new_line << hyp.hyphenate(word) + ' '
|
38
|
+
end
|
39
|
+
hyphenated << new_line + "\n"
|
40
|
+
end
|
41
|
+
|
42
|
+
hyphenated
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
$app_path = File.dirname(__FILE__) + '/../'
|
3
|
+
require $app_path + 'lib/file_handler'
|
4
|
+
|
5
|
+
class StropheAnalyzer
|
6
|
+
attr_accessor :number_of_verses, :number_of_strophes
|
7
|
+
def initialize(poem)
|
8
|
+
@strophes = poem.strophes
|
9
|
+
@analyzis_output = Array.new
|
10
|
+
@strophes_verse_count = Array.new
|
11
|
+
@number_of_strophes = poem.number_of_strophes
|
12
|
+
@number_of_verses = poem.number_of_verses
|
13
|
+
end
|
14
|
+
|
15
|
+
def analyze
|
16
|
+
analyze_names
|
17
|
+
analyze_fixed_form
|
18
|
+
@analyzis_output
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def analyze_names
|
24
|
+
name_num = Hash.new
|
25
|
+
|
26
|
+
@strophes.each_with_index do |n,i|
|
27
|
+
num_verses = n.number_of_verses
|
28
|
+
@strophes_verse_count << num_verses
|
29
|
+
if name_num[num_verses].nil?
|
30
|
+
name_num[num_verses] = [i+1]
|
31
|
+
else
|
32
|
+
name_num[num_verses] << i+1
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
name_num.each do |n|
|
37
|
+
num_items = n[1].size
|
38
|
+
name = Strophe.name(n[0])
|
39
|
+
temp = n[1].to_s.gsub(/[\[\]]/, '')
|
40
|
+
if num_items == 1
|
41
|
+
@analyzis_output << "Strophe #{temp} is #{name}"
|
42
|
+
elsif
|
43
|
+
temp = temp.reverse.sub(",", " and".reverse).reverse
|
44
|
+
@analyzis_output << "Strophes #{temp} are #{name}s"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def analyze_fixed_form
|
50
|
+
return if fixed_form_ballad?
|
51
|
+
return if fixed_form_italian_sonet?
|
52
|
+
return if fixed_form_english_sonet?
|
53
|
+
end
|
54
|
+
|
55
|
+
public
|
56
|
+
|
57
|
+
def fixed_form_ballad?
|
58
|
+
odd_verses = Array.new
|
59
|
+
even_verses = Array.new
|
60
|
+
@strophes_verse_count.each_slice(2) do |n|
|
61
|
+
odd_verses << n[0] unless n[0].nil?
|
62
|
+
even_verses << n[1] unless n[1].nil?
|
63
|
+
end
|
64
|
+
is_odds_same = odd_verses.all? {|n| n == odd_verses[0] }
|
65
|
+
is_evens_same = even_verses.all? {|n| n == odd_verses[0]/2 }
|
66
|
+
if (is_odds_same && is_evens_same)
|
67
|
+
@analyzis_output << "Which makes this poem is a Ballad! (" +
|
68
|
+
Strophe.name(odd_verses[0]) + " verses with " +
|
69
|
+
Strophe.name(even_verses[0]) + " verses between them)."
|
70
|
+
return true
|
71
|
+
end
|
72
|
+
false
|
73
|
+
end
|
74
|
+
|
75
|
+
def fixed_form_italian_sonet?
|
76
|
+
#TODO
|
77
|
+
false
|
78
|
+
end
|
79
|
+
|
80
|
+
def fixed_form_english_sonet?
|
81
|
+
#TODO
|
82
|
+
false
|
83
|
+
end
|
84
|
+
end
|
data/bin/poetize
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$app_path = File.dirname(__FILE__) + '/../'
|
3
|
+
require $app_path + 'lib/text_handler'
|
4
|
+
require $app_path + 'lib/file_handler'
|
5
|
+
require $app_path + 'model/poem'
|
6
|
+
require $app_path + 'analyzer/analyzer'
|
7
|
+
|
8
|
+
if ARGV.size < 1
|
9
|
+
puts <<-eol
|
10
|
+
== Usage:
|
11
|
+
poetize file_path [, ignore_line[, ignore_line[,...]]]
|
12
|
+
== Examples:
|
13
|
+
poetize /home/user/docs/my_poem.txt
|
14
|
+
poetize /home/user/docs/my_poem.txt 1 2 13 14
|
15
|
+
eol
|
16
|
+
exit
|
17
|
+
end
|
18
|
+
|
19
|
+
file_path = ARGV[0]
|
20
|
+
ignore_lines = ARGV.drop(1).map{|n| n = n.to_i - 1}
|
21
|
+
text = ""
|
22
|
+
if File.exists? file_path
|
23
|
+
text = FileHandler.get_file_contents file_path
|
24
|
+
end
|
25
|
+
|
26
|
+
metadata = Array.new
|
27
|
+
ignore_lines.reverse_each do |n|
|
28
|
+
metadata << text[n]
|
29
|
+
text.delete_at n
|
30
|
+
end
|
31
|
+
|
32
|
+
metadata = TextHandler.normalize_text metadata.reverse!
|
33
|
+
body = TextHandler.normalize_text text
|
34
|
+
|
35
|
+
poem = Poem.new metadata, body
|
36
|
+
analyzer = Analyzer.new poem
|
37
|
+
puts analyzer.analyze
|
data/lib/file_handler.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
class FileHandler
|
2
|
+
def self.get_file_contents(file_path)
|
3
|
+
begin
|
4
|
+
text = Array.new
|
5
|
+
File.open file_path do |file|
|
6
|
+
while line = file.gets
|
7
|
+
text << line
|
8
|
+
end
|
9
|
+
end
|
10
|
+
text
|
11
|
+
rescue Exception => e
|
12
|
+
raise e.message
|
13
|
+
puts e.backtrace.inspect
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/hyphenator.rb
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'rubygems'
|
3
|
+
require 'text-hyphen'
|
4
|
+
|
5
|
+
class Hyphenator
|
6
|
+
ACCENTED_CHARS = /[çãâäõáéíóúêôàü]/
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@hyphenator = Text::Hyphen.new do |h|
|
10
|
+
h.left = 0
|
11
|
+
h.right = 0
|
12
|
+
h.language = 'pt'
|
13
|
+
end
|
14
|
+
|
15
|
+
@orig_accented_chars_pos = Hash.new
|
16
|
+
end
|
17
|
+
|
18
|
+
def hyphenate(word)
|
19
|
+
word = remove_accents word
|
20
|
+
hyphenated_raw = @hyphenator.visualize word
|
21
|
+
hyphenated_final = put_accents_back word, hyphenated_raw
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
def put_accents_back(word, hyphenated_word)
|
26
|
+
if @orig_accented_chars_pos.size > 0
|
27
|
+
dashes_pos = @hyphenator.hyphenate word
|
28
|
+
@orig_accented_chars_pos.keys.sort.each do |k|
|
29
|
+
offset = 0
|
30
|
+
dashes_pos.reverse.each_with_index do |p, i|
|
31
|
+
if k >= p
|
32
|
+
offset = dashes_pos.size - i
|
33
|
+
break
|
34
|
+
end
|
35
|
+
end
|
36
|
+
hyphenated_word[k + offset] = @orig_accented_chars_pos[k]
|
37
|
+
end
|
38
|
+
|
39
|
+
clean
|
40
|
+
end
|
41
|
+
|
42
|
+
hyphenated_word
|
43
|
+
end
|
44
|
+
|
45
|
+
def remove_accents(word)
|
46
|
+
reversed = word.reverse
|
47
|
+
last_pos = reversed =~ ACCENTED_CHARS
|
48
|
+
|
49
|
+
if (!last_pos.nil?)
|
50
|
+
orig_pos = word.size - 1 - last_pos
|
51
|
+
@orig_accented_chars_pos[orig_pos] = reversed[last_pos]
|
52
|
+
word = replace_accented(reversed, last_pos).reverse
|
53
|
+
remove_accents word
|
54
|
+
else
|
55
|
+
return word
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def replace_accented(word, pos)
|
60
|
+
char = word[pos]
|
61
|
+
|
62
|
+
case char
|
63
|
+
when /[ç]/ then r = 'c'
|
64
|
+
when /[ãâäàá]/ then r = 'a'
|
65
|
+
when /[éê]/ then r = 'e'
|
66
|
+
when /[í]/ then r = 'i'
|
67
|
+
when /[õôó]/ then r = 'o'
|
68
|
+
when /[úü]/ then r = 'u'
|
69
|
+
end
|
70
|
+
|
71
|
+
word[pos] = r
|
72
|
+
word
|
73
|
+
end
|
74
|
+
|
75
|
+
def clean
|
76
|
+
@orig_accented_chars_pos = Hash.new
|
77
|
+
end
|
78
|
+
end
|
data/lib/poetize.rb
ADDED
data/lib/text_handler.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
class Object
|
2
|
+
def blank?
|
3
|
+
return true if self.nil?
|
4
|
+
return self.empty? || !self.match(/^(\s|\t|\n|\r)*$/).nil?
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
class TextHandler
|
9
|
+
def self.normalize_line(string)
|
10
|
+
string.strip!
|
11
|
+
string.squeeze(' ')
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.normalize_text(text)
|
15
|
+
if !text.is_a?(String) && !text.is_a?(Array)
|
16
|
+
raise "Argument must be a String or a Array"
|
17
|
+
return ''
|
18
|
+
end
|
19
|
+
|
20
|
+
if text.is_a? String
|
21
|
+
# Trying to normalize for different OSes
|
22
|
+
text = text.gsub(/\015\012?/, "\n")
|
23
|
+
text = text.split(/\n/) # Now it's an Array
|
24
|
+
end
|
25
|
+
|
26
|
+
text = TextHandler.remove_padding_lines(text)
|
27
|
+
new_text = Array.new
|
28
|
+
found_double = false
|
29
|
+
text.each_with_index do |line, i|
|
30
|
+
next_line = text[i+1]
|
31
|
+
if line.blank?
|
32
|
+
if next_line.blank?
|
33
|
+
found_double = true
|
34
|
+
new_text << "$doubled$"
|
35
|
+
else
|
36
|
+
new_text << "$single$"
|
37
|
+
end
|
38
|
+
else
|
39
|
+
new_text << TextHandler.normalize_line(line)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
new_text.select! do |line|
|
43
|
+
found_double ? (line != "$single$") : true
|
44
|
+
end
|
45
|
+
|
46
|
+
new_text = new_text.chunk{|n| n.gsub("$doubled$", "")}.map(&:first)
|
47
|
+
new_text = new_text.map{|n| n.gsub("$single$", "") }
|
48
|
+
new_text
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
def self.remove_padding_lines(array)
|
53
|
+
array = TextHandler.remove_first_lines array
|
54
|
+
array = TextHandler.remove_first_lines(array.reverse).reverse
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.remove_first_lines(array)
|
58
|
+
found_text = false
|
59
|
+
array.compact!
|
60
|
+
for i in 0..array.size-1
|
61
|
+
if array[i].strip.blank?
|
62
|
+
array[i] = nil
|
63
|
+
else
|
64
|
+
break
|
65
|
+
end
|
66
|
+
end
|
67
|
+
array.compact
|
68
|
+
end
|
69
|
+
end
|
data/model/poem.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
$app_path = File.dirname(__FILE__) + '/../'
|
2
|
+
require $app_path + 'model/strophe'
|
3
|
+
|
4
|
+
class Poem
|
5
|
+
attr_accessor :metadata, :body, :strophes, :number_of_strophes, :number_of_verses
|
6
|
+
|
7
|
+
def initialize(metadata, body)
|
8
|
+
@metadata, @body = metadata, body
|
9
|
+
@strophes = split_in_strophes body
|
10
|
+
@number_of_verses = @strophes.inject(0) {|sum, n| sum + n.number_of_verses }
|
11
|
+
@number_of_strophes = @strophes.size
|
12
|
+
end
|
13
|
+
|
14
|
+
def split_in_strophes(body)
|
15
|
+
body.chunk(&:empty?).select{|n| !n[0]}.map{|n| Strophe.new(n[1])}
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_s
|
19
|
+
"#{@metadata.join("\n")}\n\n#{@body.join("\n")}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_html
|
23
|
+
to_s.gsub("\n", "<br />")
|
24
|
+
end
|
25
|
+
end
|
data/model/strophe.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
class Strophe
|
4
|
+
@@Name = ["", 'Monóstico', "Dístico", "Terceto", "Quadra", "Quintilhas", "Sextilha", "Sétima", "Oitava", "Nona", "Décima", "Livre ou Polimérica"]
|
5
|
+
|
6
|
+
attr_accessor :number_of_verses, :verses
|
7
|
+
|
8
|
+
def initialize(arr)
|
9
|
+
@verses = arr
|
10
|
+
@number_of_verses = arr.size
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.name(i=number_of_verses)
|
14
|
+
return @@Name[11] if i > 10
|
15
|
+
@@Name[i]
|
16
|
+
end
|
17
|
+
def name(i=number_of_verses)
|
18
|
+
Strophe.name(i)
|
19
|
+
end
|
20
|
+
end
|