poetize 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +6 -0
- data/Gemfile +4 -0
- data/README.md +55 -0
- data/Rakefile +1 -0
- data/analyzer/analyzer.rb +44 -0
- data/analyzer/strophe_analyzer.rb +84 -0
- data/bin/poetize +37 -0
- data/lib/file_handler.rb +16 -0
- data/lib/hyphenator.rb +78 -0
- data/lib/poetize.rb +5 -0
- data/lib/poetize/version.rb +3 -0
- data/lib/text_handler.rb +69 -0
- data/model/poem.rb +25 -0
- data/model/strophe.rb +20 -0
- data/poetize.gemspec +24 -0
- data/resources/syllables_dict.txt +156176 -0
- data/test/file_handler_spec.rb +18 -0
- data/test/hyphenator_spec.rb +25 -0
- data/test/poem_spec.rb +22 -0
- data/test/samples/01.txt +34 -0
- data/test/samples/02_ballad.txt +26 -0
- data/test/samples/02_doubled.txt +62 -0
- data/test/samples/02_full.txt +32 -0
- data/test/samples/02_original.txt +30 -0
- data/test/spec_helper.rb +28 -0
- data/test/strophe_analyzer_spec.rb +33 -0
- data/test/strophe_spec.rb +17 -0
- data/test/text_handler_spec.rb +20 -0
- metadata +103 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
#ABOUT
|
2
|
+
|
3
|
+
Our goal is to provide a tool that parses a poem and analyzes it according to the following:
|
4
|
+
|
5
|
+
* Strophes
|
6
|
+
* Name (based on number of verses) [done]
|
7
|
+
* Type (based on verse metrics)
|
8
|
+
* Verses
|
9
|
+
* Syllable division
|
10
|
+
* Grammatical
|
11
|
+
* Poetical
|
12
|
+
* Metric and name
|
13
|
+
* Cadency (rhythm)
|
14
|
+
* Poem
|
15
|
+
* Number os strophes [done]
|
16
|
+
* Number of verses [done]
|
17
|
+
* Rhymes
|
18
|
+
* Name (based on strophe structure)
|
19
|
+
|
20
|
+
We are focusing in the portuguese (brazillian) language since it's our native language and phonetics vary from language to language.
|
21
|
+
|
22
|
+
For more details and examples, see our wiki: https://github.com/lfilho/poetize/wiki
|
23
|
+
|
24
|
+
#USAGE
|
25
|
+
|
26
|
+
Certify you chmod the poetize file to be an executable:
|
27
|
+
|
28
|
+
`$chmod +x poetize`
|
29
|
+
|
30
|
+
Then run in your shell:
|
31
|
+
|
32
|
+
`$ poetize file_path [, ignore_line[, ignore_line[,...]]]`
|
33
|
+
|
34
|
+
The ignore_line argument is the line(s) you want the program to ignore (i.e. the title, author, date created, dedicated to, etc...)
|
35
|
+
|
36
|
+
##Usage examples:
|
37
|
+
|
38
|
+
`poetize ./samples/my_poem.txt`
|
39
|
+
|
40
|
+
`poetize /home/user/docs/love_poem.txt 1 2 13 14`
|
41
|
+
#REFERENCES
|
42
|
+
|
43
|
+
Good site for learning about "poetic science":
|
44
|
+
|
45
|
+
* For portuguese language:
|
46
|
+
* http://mpbsapiens.com/ciencia-poetica/
|
47
|
+
* http://pt.wikipedia.org/wiki/Versifica%C3%A7%C3%A3o
|
48
|
+
* http://www.portaldalinguaportuguesa.org/?action=divisao&page=present
|
49
|
+
|
50
|
+
* For english language:
|
51
|
+
* http://en.wikipedia.org/wiki/Poem
|
52
|
+
|
53
|
+
#ABOUT THE DEVELOPERS
|
54
|
+
|
55
|
+
We started this project to learn Ruby and TDD so expect it to be noobly coded :-) Feel free to refactor the code and contribute to our learning experience ;-)
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
$app_path = File.dirname(__FILE__) + '/../'
|
3
|
+
require $app_path + 'lib/file_handler'
|
4
|
+
require $app_path + 'lib/hyphenator'
|
5
|
+
require $app_path + 'analyzer/strophe_analyzer'
|
6
|
+
|
7
|
+
class Analyzer
|
8
|
+
def initialize(poem)
|
9
|
+
@poem = poem
|
10
|
+
@ident = 3
|
11
|
+
end
|
12
|
+
|
13
|
+
def analyze
|
14
|
+
prefix = ' ' * @ident + '- '
|
15
|
+
output = "This poem:\n" + prefix +
|
16
|
+
"Contains #{@poem.number_of_strophes} strophes;\n" + prefix +
|
17
|
+
"Contains #{@poem.number_of_verses} verses;\n" +
|
18
|
+
"As for the strophes:\n"
|
19
|
+
output += analyze_strophes.reduce(''){|out, n| out + prefix + n + "\n"}
|
20
|
+
output += "Grammatical syllable divison:\n"
|
21
|
+
output += divide_grammatically
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def analyze_strophes
|
27
|
+
strophe_analyzer = StropheAnalyzer.new @poem
|
28
|
+
strophe_analyzer.analyze
|
29
|
+
end
|
30
|
+
|
31
|
+
def divide_grammatically
|
32
|
+
hyphenated = ''
|
33
|
+
@poem.body.each do |line|
|
34
|
+
new_line = ' ' * @ident
|
35
|
+
line.split(' ').each do |word|
|
36
|
+
hyp = Hyphenator.new
|
37
|
+
new_line << hyp.hyphenate(word) + ' '
|
38
|
+
end
|
39
|
+
hyphenated << new_line + "\n"
|
40
|
+
end
|
41
|
+
|
42
|
+
hyphenated
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
$app_path = File.dirname(__FILE__) + '/../'
|
3
|
+
require $app_path + 'lib/file_handler'
|
4
|
+
|
5
|
+
class StropheAnalyzer
|
6
|
+
attr_accessor :number_of_verses, :number_of_strophes
|
7
|
+
def initialize(poem)
|
8
|
+
@strophes = poem.strophes
|
9
|
+
@analyzis_output = Array.new
|
10
|
+
@strophes_verse_count = Array.new
|
11
|
+
@number_of_strophes = poem.number_of_strophes
|
12
|
+
@number_of_verses = poem.number_of_verses
|
13
|
+
end
|
14
|
+
|
15
|
+
def analyze
|
16
|
+
analyze_names
|
17
|
+
analyze_fixed_form
|
18
|
+
@analyzis_output
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def analyze_names
|
24
|
+
name_num = Hash.new
|
25
|
+
|
26
|
+
@strophes.each_with_index do |n,i|
|
27
|
+
num_verses = n.number_of_verses
|
28
|
+
@strophes_verse_count << num_verses
|
29
|
+
if name_num[num_verses].nil?
|
30
|
+
name_num[num_verses] = [i+1]
|
31
|
+
else
|
32
|
+
name_num[num_verses] << i+1
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
name_num.each do |n|
|
37
|
+
num_items = n[1].size
|
38
|
+
name = Strophe.name(n[0])
|
39
|
+
temp = n[1].to_s.gsub(/[\[\]]/, '')
|
40
|
+
if num_items == 1
|
41
|
+
@analyzis_output << "Strophe #{temp} is #{name}"
|
42
|
+
elsif
|
43
|
+
temp = temp.reverse.sub(",", " and".reverse).reverse
|
44
|
+
@analyzis_output << "Strophes #{temp} are #{name}s"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def analyze_fixed_form
|
50
|
+
return if fixed_form_ballad?
|
51
|
+
return if fixed_form_italian_sonet?
|
52
|
+
return if fixed_form_english_sonet?
|
53
|
+
end
|
54
|
+
|
55
|
+
public
|
56
|
+
|
57
|
+
def fixed_form_ballad?
|
58
|
+
odd_verses = Array.new
|
59
|
+
even_verses = Array.new
|
60
|
+
@strophes_verse_count.each_slice(2) do |n|
|
61
|
+
odd_verses << n[0] unless n[0].nil?
|
62
|
+
even_verses << n[1] unless n[1].nil?
|
63
|
+
end
|
64
|
+
is_odds_same = odd_verses.all? {|n| n == odd_verses[0] }
|
65
|
+
is_evens_same = even_verses.all? {|n| n == odd_verses[0]/2 }
|
66
|
+
if (is_odds_same && is_evens_same)
|
67
|
+
@analyzis_output << "Which makes this poem is a Ballad! (" +
|
68
|
+
Strophe.name(odd_verses[0]) + " verses with " +
|
69
|
+
Strophe.name(even_verses[0]) + " verses between them)."
|
70
|
+
return true
|
71
|
+
end
|
72
|
+
false
|
73
|
+
end
|
74
|
+
|
75
|
+
def fixed_form_italian_sonet?
|
76
|
+
#TODO
|
77
|
+
false
|
78
|
+
end
|
79
|
+
|
80
|
+
def fixed_form_english_sonet?
|
81
|
+
#TODO
|
82
|
+
false
|
83
|
+
end
|
84
|
+
end
|
data/bin/poetize
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$app_path = File.dirname(__FILE__) + '/../'
|
3
|
+
require $app_path + 'lib/text_handler'
|
4
|
+
require $app_path + 'lib/file_handler'
|
5
|
+
require $app_path + 'model/poem'
|
6
|
+
require $app_path + 'analyzer/analyzer'
|
7
|
+
|
8
|
+
if ARGV.size < 1
|
9
|
+
puts <<-eol
|
10
|
+
== Usage:
|
11
|
+
poetize file_path [, ignore_line[, ignore_line[,...]]]
|
12
|
+
== Examples:
|
13
|
+
poetize /home/user/docs/my_poem.txt
|
14
|
+
poetize /home/user/docs/my_poem.txt 1 2 13 14
|
15
|
+
eol
|
16
|
+
exit
|
17
|
+
end
|
18
|
+
|
19
|
+
file_path = ARGV[0]
|
20
|
+
ignore_lines = ARGV.drop(1).map{|n| n = n.to_i - 1}
|
21
|
+
text = ""
|
22
|
+
if File.exists? file_path
|
23
|
+
text = FileHandler.get_file_contents file_path
|
24
|
+
end
|
25
|
+
|
26
|
+
metadata = Array.new
|
27
|
+
ignore_lines.reverse_each do |n|
|
28
|
+
metadata << text[n]
|
29
|
+
text.delete_at n
|
30
|
+
end
|
31
|
+
|
32
|
+
metadata = TextHandler.normalize_text metadata.reverse!
|
33
|
+
body = TextHandler.normalize_text text
|
34
|
+
|
35
|
+
poem = Poem.new metadata, body
|
36
|
+
analyzer = Analyzer.new poem
|
37
|
+
puts analyzer.analyze
|
data/lib/file_handler.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
class FileHandler
|
2
|
+
def self.get_file_contents(file_path)
|
3
|
+
begin
|
4
|
+
text = Array.new
|
5
|
+
File.open file_path do |file|
|
6
|
+
while line = file.gets
|
7
|
+
text << line
|
8
|
+
end
|
9
|
+
end
|
10
|
+
text
|
11
|
+
rescue Exception => e
|
12
|
+
raise e.message
|
13
|
+
puts e.backtrace.inspect
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/hyphenator.rb
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'rubygems'
|
3
|
+
require 'text-hyphen'
|
4
|
+
|
5
|
+
class Hyphenator
|
6
|
+
ACCENTED_CHARS = /[çãâäõáéíóúêôàü]/
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@hyphenator = Text::Hyphen.new do |h|
|
10
|
+
h.left = 0
|
11
|
+
h.right = 0
|
12
|
+
h.language = 'pt'
|
13
|
+
end
|
14
|
+
|
15
|
+
@orig_accented_chars_pos = Hash.new
|
16
|
+
end
|
17
|
+
|
18
|
+
def hyphenate(word)
|
19
|
+
word = remove_accents word
|
20
|
+
hyphenated_raw = @hyphenator.visualize word
|
21
|
+
hyphenated_final = put_accents_back word, hyphenated_raw
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
def put_accents_back(word, hyphenated_word)
|
26
|
+
if @orig_accented_chars_pos.size > 0
|
27
|
+
dashes_pos = @hyphenator.hyphenate word
|
28
|
+
@orig_accented_chars_pos.keys.sort.each do |k|
|
29
|
+
offset = 0
|
30
|
+
dashes_pos.reverse.each_with_index do |p, i|
|
31
|
+
if k >= p
|
32
|
+
offset = dashes_pos.size - i
|
33
|
+
break
|
34
|
+
end
|
35
|
+
end
|
36
|
+
hyphenated_word[k + offset] = @orig_accented_chars_pos[k]
|
37
|
+
end
|
38
|
+
|
39
|
+
clean
|
40
|
+
end
|
41
|
+
|
42
|
+
hyphenated_word
|
43
|
+
end
|
44
|
+
|
45
|
+
def remove_accents(word)
|
46
|
+
reversed = word.reverse
|
47
|
+
last_pos = reversed =~ ACCENTED_CHARS
|
48
|
+
|
49
|
+
if (!last_pos.nil?)
|
50
|
+
orig_pos = word.size - 1 - last_pos
|
51
|
+
@orig_accented_chars_pos[orig_pos] = reversed[last_pos]
|
52
|
+
word = replace_accented(reversed, last_pos).reverse
|
53
|
+
remove_accents word
|
54
|
+
else
|
55
|
+
return word
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def replace_accented(word, pos)
|
60
|
+
char = word[pos]
|
61
|
+
|
62
|
+
case char
|
63
|
+
when /[ç]/ then r = 'c'
|
64
|
+
when /[ãâäàá]/ then r = 'a'
|
65
|
+
when /[éê]/ then r = 'e'
|
66
|
+
when /[í]/ then r = 'i'
|
67
|
+
when /[õôó]/ then r = 'o'
|
68
|
+
when /[úü]/ then r = 'u'
|
69
|
+
end
|
70
|
+
|
71
|
+
word[pos] = r
|
72
|
+
word
|
73
|
+
end
|
74
|
+
|
75
|
+
def clean
|
76
|
+
@orig_accented_chars_pos = Hash.new
|
77
|
+
end
|
78
|
+
end
|
data/lib/poetize.rb
ADDED
data/lib/text_handler.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
class Object
|
2
|
+
def blank?
|
3
|
+
return true if self.nil?
|
4
|
+
return self.empty? || !self.match(/^(\s|\t|\n|\r)*$/).nil?
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
class TextHandler
|
9
|
+
def self.normalize_line(string)
|
10
|
+
string.strip!
|
11
|
+
string.squeeze(' ')
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.normalize_text(text)
|
15
|
+
if !text.is_a?(String) && !text.is_a?(Array)
|
16
|
+
raise "Argument must be a String or a Array"
|
17
|
+
return ''
|
18
|
+
end
|
19
|
+
|
20
|
+
if text.is_a? String
|
21
|
+
# Trying to normalize for different OSes
|
22
|
+
text = text.gsub(/\015\012?/, "\n")
|
23
|
+
text = text.split(/\n/) # Now it's an Array
|
24
|
+
end
|
25
|
+
|
26
|
+
text = TextHandler.remove_padding_lines(text)
|
27
|
+
new_text = Array.new
|
28
|
+
found_double = false
|
29
|
+
text.each_with_index do |line, i|
|
30
|
+
next_line = text[i+1]
|
31
|
+
if line.blank?
|
32
|
+
if next_line.blank?
|
33
|
+
found_double = true
|
34
|
+
new_text << "$doubled$"
|
35
|
+
else
|
36
|
+
new_text << "$single$"
|
37
|
+
end
|
38
|
+
else
|
39
|
+
new_text << TextHandler.normalize_line(line)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
new_text.select! do |line|
|
43
|
+
found_double ? (line != "$single$") : true
|
44
|
+
end
|
45
|
+
|
46
|
+
new_text = new_text.chunk{|n| n.gsub("$doubled$", "")}.map(&:first)
|
47
|
+
new_text = new_text.map{|n| n.gsub("$single$", "") }
|
48
|
+
new_text
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
def self.remove_padding_lines(array)
|
53
|
+
array = TextHandler.remove_first_lines array
|
54
|
+
array = TextHandler.remove_first_lines(array.reverse).reverse
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.remove_first_lines(array)
|
58
|
+
found_text = false
|
59
|
+
array.compact!
|
60
|
+
for i in 0..array.size-1
|
61
|
+
if array[i].strip.blank?
|
62
|
+
array[i] = nil
|
63
|
+
else
|
64
|
+
break
|
65
|
+
end
|
66
|
+
end
|
67
|
+
array.compact
|
68
|
+
end
|
69
|
+
end
|
data/model/poem.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
$app_path = File.dirname(__FILE__) + '/../'
|
2
|
+
require $app_path + 'model/strophe'
|
3
|
+
|
4
|
+
class Poem
|
5
|
+
attr_accessor :metadata, :body, :strophes, :number_of_strophes, :number_of_verses
|
6
|
+
|
7
|
+
def initialize(metadata, body)
|
8
|
+
@metadata, @body = metadata, body
|
9
|
+
@strophes = split_in_strophes body
|
10
|
+
@number_of_verses = @strophes.inject(0) {|sum, n| sum + n.number_of_verses }
|
11
|
+
@number_of_strophes = @strophes.size
|
12
|
+
end
|
13
|
+
|
14
|
+
def split_in_strophes(body)
|
15
|
+
body.chunk(&:empty?).select{|n| !n[0]}.map{|n| Strophe.new(n[1])}
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_s
|
19
|
+
"#{@metadata.join("\n")}\n\n#{@body.join("\n")}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_html
|
23
|
+
to_s.gsub("\n", "<br />")
|
24
|
+
end
|
25
|
+
end
|
data/model/strophe.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
class Strophe
|
4
|
+
@@Name = ["", 'Monóstico', "Dístico", "Terceto", "Quadra", "Quintilhas", "Sextilha", "Sétima", "Oitava", "Nona", "Décima", "Livre ou Polimérica"]
|
5
|
+
|
6
|
+
attr_accessor :number_of_verses, :verses
|
7
|
+
|
8
|
+
def initialize(arr)
|
9
|
+
@verses = arr
|
10
|
+
@number_of_verses = arr.size
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.name(i=number_of_verses)
|
14
|
+
return @@Name[11] if i > 10
|
15
|
+
@@Name[i]
|
16
|
+
end
|
17
|
+
def name(i=number_of_verses)
|
18
|
+
Strophe.name(i)
|
19
|
+
end
|
20
|
+
end
|