rbpar 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/rbpar.rb +5 -10
- data/lib/rbpar_engine.rb +47 -19
- data/lib/rbpar_main.rb +50 -6
- data/test/rbpar_engine_test.rb +0 -6
- data/test/rbpar_main_test.rb +42 -5
- metadata +3 -3
data/bin/rbpar.rb
CHANGED
@@ -62,25 +62,20 @@ def main
|
|
62
62
|
end
|
63
63
|
|
64
64
|
rbpar = RbParIterator.new(width)
|
65
|
-
first = true
|
66
65
|
|
67
66
|
# add lines to the parser object ...
|
68
67
|
rbpar << $stdin.readlines
|
69
68
|
|
70
69
|
# ... and read back ready paragraphs
|
71
70
|
rbpar.each do |paragraph|
|
72
|
-
# add a line break before each paragraph (except the first)
|
73
|
-
unless first
|
74
|
-
#$stdout.write("\n") unless first
|
75
|
-
else
|
76
|
-
first = false
|
77
|
-
end
|
78
71
|
# write the lines to stdout
|
79
|
-
paragraph.
|
72
|
+
paragraph.each_with_index do |line, i|
|
80
73
|
$stdout.write(line.rstrip)
|
81
|
-
$stdout.write(" ") if vim
|
82
|
-
$stdout.write(
|
74
|
+
$stdout.write(" ") if vim && i != (paragraph.length-1)
|
75
|
+
$stdout.write(rbpar.eol)
|
83
76
|
end
|
77
|
+
# end the paragraph
|
78
|
+
# $stdout.write("\n") if paragraph.length == 0
|
84
79
|
end
|
85
80
|
|
86
81
|
end
|
data/lib/rbpar_engine.rb
CHANGED
@@ -34,31 +34,59 @@ class DynamicBreaker
|
|
34
34
|
# to accomplish this in reasonable time.
|
35
35
|
|
36
36
|
def initialize
|
37
|
-
@
|
37
|
+
@dirty_cache = true
|
38
38
|
@cost = Hash.new(0)
|
39
39
|
end
|
40
40
|
|
41
|
-
def
|
41
|
+
def initCache(words)
|
42
|
+
|
43
|
+
# init the word cache
|
44
|
+
|
45
|
+
# create the matrix
|
46
|
+
@cache = Array.new(words.length) { Array.new(words.length, 0) }
|
42
47
|
|
43
|
-
#
|
48
|
+
# this implementation wastes one line of storage, if implemented
|
49
|
+
# with a matrix...
|
44
50
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
51
|
+
endIndex = 1 # start from the beginning, cell [0, 1]
|
52
|
+
tmp = endIndex
|
53
|
+
|
54
|
+
# Initialize the first diagonal with word lengths. Do not care
|
55
|
+
# about [i, i] since that will always be 0
|
56
|
+
endIndex.upto(words.length) do |i|
|
57
|
+
@cache.at(i-1)[i] = words.at(i-1).length
|
58
|
+
# printf "cache [%d][%d] = %d\n", i-1, i, @cache[i-1][i]
|
59
|
+
end
|
60
|
+
|
61
|
+
endIndex = 2 # start the real calculation on the next row
|
62
|
+
|
63
|
+
# The idea is as follows: the length of words (2,8) is the
|
64
|
+
# length of words (2,7) + (7,8), and the first value is
|
65
|
+
# previously calculated
|
66
|
+
|
67
|
+
endIndex.upto(words.length) do |i|
|
68
|
+
(i-2).downto(0) do |j|
|
69
|
+
@cache.at(j)[i] = @cache.at(i-1).at(i)+ @cache.at(j).at(i-1)
|
70
|
+
# printf "cache [%d][%d] = %d\n", j, i, @cache[j][i]
|
71
|
+
end
|
50
72
|
end
|
51
|
-
|
73
|
+
|
52
74
|
end
|
75
|
+
|
76
|
+
def charLength(words, beginIndex, endIndex)
|
53
77
|
|
54
|
-
|
78
|
+
# Return the length in characters from beginIndex to endIndex.
|
79
|
+
# Note that the spaces need to be added between the words.
|
80
|
+
|
81
|
+
# see if the cache needs updating
|
55
82
|
|
56
|
-
|
57
|
-
|
83
|
+
if @dirty_cache == true
|
84
|
+
initCache(words)
|
85
|
+
@dirty_cache = false
|
86
|
+
end
|
58
87
|
|
59
|
-
|
60
|
-
|
61
|
-
end - 1 # remove the last space
|
88
|
+
spaces = endIndex - beginIndex - 1
|
89
|
+
@cache[beginIndex][endIndex] + spaces
|
62
90
|
end
|
63
91
|
|
64
92
|
def calculatePenalty(words, availableLength, beginIndex, endIndex)
|
@@ -129,10 +157,10 @@ class DynamicBreaker
|
|
129
157
|
def isLegal(words, availableLength, beginIndex, endIndex)
|
130
158
|
charLength(words, beginIndex, endIndex) <= availableLength
|
131
159
|
end
|
132
|
-
|
160
|
+
|
133
161
|
def parse(words, availableLength)
|
134
162
|
|
135
|
-
# check the parameters, since this is the public
|
163
|
+
# check the parameters, since this is the public entry point to
|
136
164
|
# the algorithm
|
137
165
|
|
138
166
|
if words.nil? or availableLength <= 0
|
@@ -149,11 +177,11 @@ class DynamicBreaker
|
|
149
177
|
# only one word
|
150
178
|
return words
|
151
179
|
end
|
152
|
-
|
180
|
+
|
153
181
|
n = words.length
|
154
182
|
|
183
|
+
@dirty_cache = true
|
155
184
|
# one initialization is enough
|
156
|
-
@charLengthCache.clear
|
157
185
|
@cost.clear
|
158
186
|
|
159
187
|
# initialize with the words. Note: not as the default value
|
data/lib/rbpar_main.rb
CHANGED
@@ -15,6 +15,8 @@ class RbParIterator
|
|
15
15
|
# breaking system.
|
16
16
|
|
17
17
|
include Enumerable
|
18
|
+
|
19
|
+
attr_reader :eol
|
18
20
|
|
19
21
|
def initialize(width)
|
20
22
|
unless width > 0
|
@@ -23,9 +25,23 @@ class RbParIterator
|
|
23
25
|
@breaker = DynamicBreaker.new()
|
24
26
|
@width = width
|
25
27
|
@readlines = Array.new()
|
28
|
+
@eol = ''
|
26
29
|
end
|
27
30
|
|
28
31
|
def <<(lines)
|
32
|
+
|
33
|
+
# add lines to the paragraph
|
34
|
+
|
35
|
+
# check the line format
|
36
|
+
if lines[0] =~ /\r\n$/
|
37
|
+
@eol = "\r\n"
|
38
|
+
elsif lines[0] =~ /\n$/
|
39
|
+
@eol = "\n"
|
40
|
+
end
|
41
|
+
|
42
|
+
# TODO; possibly check that the lines are really lines with only
|
43
|
+
# one newline?
|
44
|
+
|
29
45
|
@readlines = @readlines + lines
|
30
46
|
end
|
31
47
|
|
@@ -41,13 +57,21 @@ class RbParIterator
|
|
41
57
|
|
42
58
|
# results has the processed sub-paragraphs
|
43
59
|
results = paragraphs.collect do |sub_paragraph|
|
60
|
+
|
44
61
|
# process removes the line breaks: it doesn't make sense to
|
45
62
|
# send pure line breaks there
|
46
|
-
|
47
|
-
|
48
|
-
|
63
|
+
|
64
|
+
# chomp removes also dos-style line ends, which is good
|
65
|
+
|
66
|
+
if sub_paragraph[0].chomp.empty?
|
67
|
+
# since this is a paragraph, having a first line with
|
68
|
+
# only a newline character means that it is also the
|
69
|
+
# last line
|
49
70
|
[""]
|
71
|
+
else
|
72
|
+
sub_paragraph.process!(@breaker, @width)
|
50
73
|
end
|
74
|
+
|
51
75
|
end
|
52
76
|
|
53
77
|
# combine the result paragraphs into one
|
@@ -58,10 +82,30 @@ class RbParIterator
|
|
58
82
|
end
|
59
83
|
|
60
84
|
def each
|
61
|
-
|
62
|
-
|
63
|
-
|
85
|
+
paragraph_break = Regexp.new('^' << @eol << '$')
|
86
|
+
|
87
|
+
# find all character breaks
|
88
|
+
|
89
|
+
splitpoints = Array.new
|
90
|
+
|
91
|
+
@readlines.each_with_index do |line, i|
|
92
|
+
splitpoints << i if line.chomp.empty?
|
64
93
|
end
|
94
|
+
|
95
|
+
previous_splitpoint = 0
|
96
|
+
|
97
|
+
if splitpoints.length > 0 && @readlines.length > 1
|
98
|
+
splitpoints.each do |splitpoint|
|
99
|
+
result = process_paragraph(@readlines[previous_splitpoint .. splitpoint])
|
100
|
+
|
101
|
+
previous_splitpoint = splitpoint + 1
|
102
|
+
yield result
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# the last paragraph
|
107
|
+
yield process_paragraph(@readlines[previous_splitpoint ... @readlines.length])
|
108
|
+
|
65
109
|
end
|
66
110
|
|
67
111
|
end
|
data/test/rbpar_engine_test.rb
CHANGED
@@ -17,12 +17,6 @@ class TestRbParEngine < Test::Unit::TestCase
|
|
17
17
|
assert_equal(words.length, @breaker.charLength(words.split(" "), 0, 4))
|
18
18
|
end
|
19
19
|
|
20
|
-
def test_real_char_length
|
21
|
-
words = "This is a test"
|
22
|
-
# FIXME: index should go from 0 to 3
|
23
|
-
assert_equal(words.length, @breaker.realCharLength(words.split(" "), 0, 4))
|
24
|
-
end
|
25
|
-
|
26
20
|
def test_parse
|
27
21
|
words = "Sed eget ligula. Nunc fringilla. In ullamcorper turpis quis tortor. Maecenas fringilla dui aliquet leo. Nulla nec mi ut mauris ultrices sollicitudin. Mauris feugiat ornare massa. Ut vitae dolor sed urna blandit imperdiet. Cras tempus, orci sollicitudin pulvinar ultricies, sapien urna fringilla risus, eu rhoncus metus nisi a risus. Aliquam erat volutpat. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Duis iaculis lorem sit amet neque. Cras quis tellus. Ut molestie eros sit amet nibh blandit luctus. Quisque ac sem. Nulla condimentum eros rhoncus ipsum. Duis enim. Phasellus mattis posuere augue."
|
28
22
|
|
data/test/rbpar_main_test.rb
CHANGED
@@ -15,17 +15,30 @@ class TestRbParMain < Test::Unit::TestCase
|
|
15
15
|
lines1 = rbpar1.collect
|
16
16
|
assert_equal("", lines1[0][0])
|
17
17
|
assert_nil(lines1[0][1])
|
18
|
-
|
19
|
-
|
18
|
+
|
19
|
+
# readlines should handle these cases...
|
20
|
+
=begin
|
20
21
|
rbpar2 = RbParIterator.new(63)
|
21
22
|
|
22
23
|
lines2 = ["\n\n"]
|
23
24
|
rbpar2 << lines2
|
24
25
|
lines2 = rbpar2.collect
|
26
|
+
pp lines2
|
25
27
|
assert_equal("", lines2[0][0])
|
26
28
|
assert_equal("", lines2[0][1])
|
27
29
|
assert_nil(lines2[1])
|
28
30
|
|
31
|
+
rbpar3 = RbParIterator.new(63)
|
32
|
+
|
33
|
+
lines3 = ["\n\n\n"]
|
34
|
+
rbpar3 << lines2
|
35
|
+
lines3 = rbpar3.collect
|
36
|
+
pp lines3
|
37
|
+
assert_equal("", lines2[0][0])
|
38
|
+
assert_equal("", lines2[0][1])
|
39
|
+
assert_equal("", lines2[0][2])
|
40
|
+
assert_nil(lines2[1])
|
41
|
+
=end
|
29
42
|
end
|
30
43
|
|
31
44
|
def test_paragraphs
|
@@ -168,6 +181,7 @@ class TestRbParMain < Test::Unit::TestCase
|
|
168
181
|
rbpar7 << lines7
|
169
182
|
|
170
183
|
lines7 = rbpar7.collect
|
184
|
+
|
171
185
|
|
172
186
|
assert_equal("> text text", lines7[0][0])
|
173
187
|
assert_equal("", lines7[0][1])
|
@@ -175,6 +189,32 @@ class TestRbParMain < Test::Unit::TestCase
|
|
175
189
|
|
176
190
|
end
|
177
191
|
|
192
|
+
def test_dos_line_breaks
|
193
|
+
|
194
|
+
# dos and unix line breaks should be equivalent
|
195
|
+
|
196
|
+
rbpar1 = RbParIterator.new(15)
|
197
|
+
lines1 = [ "> text text\n", "\n", "> text\n" ]
|
198
|
+
rbpar1 << lines1
|
199
|
+
|
200
|
+
lines1 = rbpar1.collect
|
201
|
+
|
202
|
+
assert_equal("> text text", lines1[0][0])
|
203
|
+
assert_equal("", lines1[0][1])
|
204
|
+
assert_equal("> text", lines1[1][0])
|
205
|
+
|
206
|
+
rbpar2 = RbParIterator.new(15)
|
207
|
+
lines2 = [ "> text text\r\n", "\r\n", "> text\r\n" ]
|
208
|
+
rbpar2 << lines2
|
209
|
+
|
210
|
+
lines2 = rbpar2.collect
|
211
|
+
|
212
|
+
assert_equal("> text text", lines2[0][0])
|
213
|
+
assert_equal("", lines2[0][1])
|
214
|
+
assert_equal("> text", lines2[1][0])
|
215
|
+
|
216
|
+
end
|
217
|
+
|
178
218
|
def test_signature
|
179
219
|
|
180
220
|
# test that signature paragraphs behave correctly
|
@@ -188,7 +228,6 @@ class TestRbParMain < Test::Unit::TestCase
|
|
188
228
|
lines_sig_1 = rbpar_sig_1.collect
|
189
229
|
assert_equal("-- ", lines_sig_1[0][0])
|
190
230
|
assert_equal(" - NanoNano", lines_sig_1[0][1])
|
191
|
-
# pp lines_sig_1
|
192
231
|
|
193
232
|
lines_sig_2 = [">-- \n", "> - NanoNano\n"]
|
194
233
|
|
@@ -197,7 +236,6 @@ class TestRbParMain < Test::Unit::TestCase
|
|
197
236
|
lines_sig_2 = rbpar_sig_2.collect
|
198
237
|
assert_equal("> -- ", lines_sig_2[0][0])
|
199
238
|
assert_equal("> - NanoNano", lines_sig_2[0][1])
|
200
|
-
# pp lines_sig_2
|
201
239
|
|
202
240
|
lines_sig_3 = ["> -- \n", "> - NanoNano\n"]
|
203
241
|
|
@@ -206,7 +244,6 @@ class TestRbParMain < Test::Unit::TestCase
|
|
206
244
|
lines_sig_3 = rbpar_sig_3.collect
|
207
245
|
assert_equal("> -- ", lines_sig_3[0][0])
|
208
246
|
assert_equal("> - NanoNano", lines_sig_3[0][1])
|
209
|
-
# pp lines_sig_3
|
210
247
|
end
|
211
248
|
|
212
249
|
end
|
metadata
CHANGED
@@ -3,14 +3,14 @@ rubygems_version: 0.9.4
|
|
3
3
|
specification_version: 1
|
4
4
|
name: rbpar
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date:
|
6
|
+
version: 0.2.0
|
7
|
+
date: 2008-01-26 00:00:00 +02:00
|
8
8
|
summary: A program for managing pargraph formatting
|
9
9
|
require_paths:
|
10
10
|
- lib
|
11
11
|
email: ismo@iki.fi
|
12
12
|
homepage: ""
|
13
|
-
rubyforge_project:
|
13
|
+
rubyforge_project: rbpar
|
14
14
|
description:
|
15
15
|
autorequire: rbpar
|
16
16
|
default_executable: rbpar.rb
|