rbpar 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/bin/rbpar.rb CHANGED
@@ -62,25 +62,20 @@ def main
62
62
  end
63
63
 
64
64
  rbpar = RbParIterator.new(width)
65
- first = true
66
65
 
67
66
  # add lines to the parser object ...
68
67
  rbpar << $stdin.readlines
69
68
 
70
69
  # ... and read back ready paragraphs
71
70
  rbpar.each do |paragraph|
72
- # add a line break before each paragraph (except the first)
73
- unless first
74
- #$stdout.write("\n") unless first
75
- else
76
- first = false
77
- end
78
71
  # write the lines to stdout
79
- paragraph.each do |line|
72
+ paragraph.each_with_index do |line, i|
80
73
  $stdout.write(line.rstrip)
81
- $stdout.write(" ") if vim
82
- $stdout.write("\n")
74
+ $stdout.write(" ") if vim && i != (paragraph.length-1)
75
+ $stdout.write(rbpar.eol)
83
76
  end
77
+ # end the paragraph
78
+ # $stdout.write("\n") if paragraph.length == 0
84
79
  end
85
80
 
86
81
  end
data/lib/rbpar_engine.rb CHANGED
@@ -34,31 +34,59 @@ class DynamicBreaker
34
34
  # to accomplish this in reasonable time.
35
35
 
36
36
  def initialize
37
- @charLengthCache = Hash.new
37
+ @dirty_cache = true
38
38
  @cost = Hash.new(0)
39
39
  end
40
40
 
41
- def charLength(words, beginIndex, endIndex)
41
+ def initCache(words)
42
+
43
+ # init the word cache
44
+
45
+ # create the matrix
46
+ @cache = Array.new(words.length) { Array.new(words.length, 0) }
42
47
 
43
- # see if the result is already cached
48
+ # this implementation wastes one line of storage, if implemented
49
+ # with a matrix...
44
50
 
45
- token = beginIndex.to_s+ "_" + endIndex.to_s
46
- cachedResult = @charLengthCache[token]
47
- if cachedResult.nil?
48
- length = realCharLength(words, beginIndex, endIndex)
49
- return @charLengthCache[token] = length
51
+ endIndex = 1 # start from the beginning, cell [0, 1]
52
+ tmp = endIndex
53
+
54
+ # Initialize the first diagonal with word lengths. Do not care
55
+ # about [i, i] since that will always be 0
56
+ endIndex.upto(words.length) do |i|
57
+ @cache.at(i-1)[i] = words.at(i-1).length
58
+ # printf "cache [%d][%d] = %d\n", i-1, i, @cache[i-1][i]
59
+ end
60
+
61
+ endIndex = 2 # start the real calculation on the next row
62
+
63
+ # The idea is as follows: the length of words (2,8) is the
64
+ # length of words (2,7) + (7,8), and the first value is
65
+ # previously calculated
66
+
67
+ endIndex.upto(words.length) do |i|
68
+ (i-2).downto(0) do |j|
69
+ @cache.at(j)[i] = @cache.at(i-1).at(i)+ @cache.at(j).at(i-1)
70
+ # printf "cache [%d][%d] = %d\n", j, i, @cache[j][i]
71
+ end
50
72
  end
51
- return cachedResult
73
+
52
74
  end
75
+
76
+ def charLength(words, beginIndex, endIndex)
53
77
 
54
- def realCharLength(words, beginIndex, endIndex)
78
+ # Return the length in characters from beginIndex to endIndex.
79
+ # Note that the spaces need to be added between the words.
80
+
81
+ # see if the cache needs updating
55
82
 
56
- # sum the lenghts of the words and add 1 for the space after the
57
- # word
83
+ if @dirty_cache == true
84
+ initCache(words)
85
+ @dirty_cache = false
86
+ end
58
87
 
59
- return (beginIndex...endIndex).inject(0) do |sum, i|
60
- sum + words[i].length + 1
61
- end - 1 # remove the last space
88
+ spaces = endIndex - beginIndex - 1
89
+ @cache[beginIndex][endIndex] + spaces
62
90
  end
63
91
 
64
92
  def calculatePenalty(words, availableLength, beginIndex, endIndex)
@@ -129,10 +157,10 @@ class DynamicBreaker
129
157
  def isLegal(words, availableLength, beginIndex, endIndex)
130
158
  charLength(words, beginIndex, endIndex) <= availableLength
131
159
  end
132
-
160
+
133
161
  def parse(words, availableLength)
134
162
 
135
- # check the parameters, since this is the public entrypoint to
163
+ # check the parameters, since this is the public entry point to
136
164
  # the algorithm
137
165
 
138
166
  if words.nil? or availableLength <= 0
@@ -149,11 +177,11 @@ class DynamicBreaker
149
177
  # only one word
150
178
  return words
151
179
  end
152
-
180
+
153
181
  n = words.length
154
182
 
183
+ @dirty_cache = true
155
184
  # one initialization is enough
156
- @charLengthCache.clear
157
185
  @cost.clear
158
186
 
159
187
  # initialize with the words. Note: not as the default value
data/lib/rbpar_main.rb CHANGED
@@ -15,6 +15,8 @@ class RbParIterator
15
15
  # breaking system.
16
16
 
17
17
  include Enumerable
18
+
19
+ attr_reader :eol
18
20
 
19
21
  def initialize(width)
20
22
  unless width > 0
@@ -23,9 +25,23 @@ class RbParIterator
23
25
  @breaker = DynamicBreaker.new()
24
26
  @width = width
25
27
  @readlines = Array.new()
28
+ @eol = ''
26
29
  end
27
30
 
28
31
  def <<(lines)
32
+
33
+ # add lines to the paragraph
34
+
35
+ # check the line format
36
+ if lines[0] =~ /\r\n$/
37
+ @eol = "\r\n"
38
+ elsif lines[0] =~ /\n$/
39
+ @eol = "\n"
40
+ end
41
+
42
+ # TODO; possibly check that the lines are really lines with only
43
+ # one newline?
44
+
29
45
  @readlines = @readlines + lines
30
46
  end
31
47
 
@@ -41,13 +57,21 @@ class RbParIterator
41
57
 
42
58
  # results has the processed sub-paragraphs
43
59
  results = paragraphs.collect do |sub_paragraph|
60
+
44
61
  # process removes the line breaks: it doesn't make sense to
45
62
  # send pure line breaks there
46
- if sub_paragraph[0] != "\n"
47
- sub_paragraph.process!(@breaker, @width)
48
- else
63
+
64
+ # chomp removes also dos-style line ends, which is good
65
+
66
+ if sub_paragraph[0].chomp.empty?
67
+ # since this is a paragraph, having a first line with
68
+ # only a newline character means that it is also the
69
+ # last line
49
70
  [""]
71
+ else
72
+ sub_paragraph.process!(@breaker, @width)
50
73
  end
74
+
51
75
  end
52
76
 
53
77
  # combine the result paragraphs into one
@@ -58,10 +82,30 @@ class RbParIterator
58
82
  end
59
83
 
60
84
  def each
61
- @readlines.join.each('') do |paragraph|
62
- lines = paragraph.collect
63
- yield process_paragraph(lines)
85
+ paragraph_break = Regexp.new('^' << @eol << '$')
86
+
87
+ # find all character breaks
88
+
89
+ splitpoints = Array.new
90
+
91
+ @readlines.each_with_index do |line, i|
92
+ splitpoints << i if line.chomp.empty?
64
93
  end
94
+
95
+ previous_splitpoint = 0
96
+
97
+ if splitpoints.length > 0 && @readlines.length > 1
98
+ splitpoints.each do |splitpoint|
99
+ result = process_paragraph(@readlines[previous_splitpoint .. splitpoint])
100
+
101
+ previous_splitpoint = splitpoint + 1
102
+ yield result
103
+ end
104
+ end
105
+
106
+ # the last paragraph
107
+ yield process_paragraph(@readlines[previous_splitpoint ... @readlines.length])
108
+
65
109
  end
66
110
 
67
111
  end
@@ -17,12 +17,6 @@ class TestRbParEngine < Test::Unit::TestCase
17
17
  assert_equal(words.length, @breaker.charLength(words.split(" "), 0, 4))
18
18
  end
19
19
 
20
- def test_real_char_length
21
- words = "This is a test"
22
- # FIXME: index should go from 0 to 3
23
- assert_equal(words.length, @breaker.realCharLength(words.split(" "), 0, 4))
24
- end
25
-
26
20
  def test_parse
27
21
  words = "Sed eget ligula. Nunc fringilla. In ullamcorper turpis quis tortor. Maecenas fringilla dui aliquet leo. Nulla nec mi ut mauris ultrices sollicitudin. Mauris feugiat ornare massa. Ut vitae dolor sed urna blandit imperdiet. Cras tempus, orci sollicitudin pulvinar ultricies, sapien urna fringilla risus, eu rhoncus metus nisi a risus. Aliquam erat volutpat. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Duis iaculis lorem sit amet neque. Cras quis tellus. Ut molestie eros sit amet nibh blandit luctus. Quisque ac sem. Nulla condimentum eros rhoncus ipsum. Duis enim. Phasellus mattis posuere augue."
28
22
 
@@ -15,17 +15,30 @@ class TestRbParMain < Test::Unit::TestCase
15
15
  lines1 = rbpar1.collect
16
16
  assert_equal("", lines1[0][0])
17
17
  assert_nil(lines1[0][1])
18
- assert_nil(lines1[1])
19
-
18
+
19
+ # readlines should handle these cases...
20
+ =begin
20
21
  rbpar2 = RbParIterator.new(63)
21
22
 
22
23
  lines2 = ["\n\n"]
23
24
  rbpar2 << lines2
24
25
  lines2 = rbpar2.collect
26
+ pp lines2
25
27
  assert_equal("", lines2[0][0])
26
28
  assert_equal("", lines2[0][1])
27
29
  assert_nil(lines2[1])
28
30
 
31
+ rbpar3 = RbParIterator.new(63)
32
+
33
+ lines3 = ["\n\n\n"]
34
+ rbpar3 << lines2
35
+ lines3 = rbpar3.collect
36
+ pp lines3
37
+ assert_equal("", lines2[0][0])
38
+ assert_equal("", lines2[0][1])
39
+ assert_equal("", lines2[0][2])
40
+ assert_nil(lines2[1])
41
+ =end
29
42
  end
30
43
 
31
44
  def test_paragraphs
@@ -168,6 +181,7 @@ class TestRbParMain < Test::Unit::TestCase
168
181
  rbpar7 << lines7
169
182
 
170
183
  lines7 = rbpar7.collect
184
+
171
185
 
172
186
  assert_equal("> text text", lines7[0][0])
173
187
  assert_equal("", lines7[0][1])
@@ -175,6 +189,32 @@ class TestRbParMain < Test::Unit::TestCase
175
189
 
176
190
  end
177
191
 
192
+ def test_dos_line_breaks
193
+
194
+ # dos and unix line breaks should be equivalent
195
+
196
+ rbpar1 = RbParIterator.new(15)
197
+ lines1 = [ "> text text\n", "\n", "> text\n" ]
198
+ rbpar1 << lines1
199
+
200
+ lines1 = rbpar1.collect
201
+
202
+ assert_equal("> text text", lines1[0][0])
203
+ assert_equal("", lines1[0][1])
204
+ assert_equal("> text", lines1[1][0])
205
+
206
+ rbpar2 = RbParIterator.new(15)
207
+ lines2 = [ "> text text\r\n", "\r\n", "> text\r\n" ]
208
+ rbpar2 << lines2
209
+
210
+ lines2 = rbpar2.collect
211
+
212
+ assert_equal("> text text", lines2[0][0])
213
+ assert_equal("", lines2[0][1])
214
+ assert_equal("> text", lines2[1][0])
215
+
216
+ end
217
+
178
218
  def test_signature
179
219
 
180
220
  # test that signature paragraphs behave correctly
@@ -188,7 +228,6 @@ class TestRbParMain < Test::Unit::TestCase
188
228
  lines_sig_1 = rbpar_sig_1.collect
189
229
  assert_equal("-- ", lines_sig_1[0][0])
190
230
  assert_equal(" - NanoNano", lines_sig_1[0][1])
191
- # pp lines_sig_1
192
231
 
193
232
  lines_sig_2 = [">-- \n", "> - NanoNano\n"]
194
233
 
@@ -197,7 +236,6 @@ class TestRbParMain < Test::Unit::TestCase
197
236
  lines_sig_2 = rbpar_sig_2.collect
198
237
  assert_equal("> -- ", lines_sig_2[0][0])
199
238
  assert_equal("> - NanoNano", lines_sig_2[0][1])
200
- # pp lines_sig_2
201
239
 
202
240
  lines_sig_3 = ["> -- \n", "> - NanoNano\n"]
203
241
 
@@ -206,7 +244,6 @@ class TestRbParMain < Test::Unit::TestCase
206
244
  lines_sig_3 = rbpar_sig_3.collect
207
245
  assert_equal("> -- ", lines_sig_3[0][0])
208
246
  assert_equal("> - NanoNano", lines_sig_3[0][1])
209
- # pp lines_sig_3
210
247
  end
211
248
 
212
249
  end
metadata CHANGED
@@ -3,14 +3,14 @@ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: rbpar
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.0
7
- date: 2007-12-04 00:00:00 +02:00
6
+ version: 0.2.0
7
+ date: 2008-01-26 00:00:00 +02:00
8
8
  summary: A program for managing pargraph formatting
9
9
  require_paths:
10
10
  - lib
11
11
  email: ismo@iki.fi
12
12
  homepage: ""
13
- rubyforge_project:
13
+ rubyforge_project: rbpar
14
14
  description:
15
15
  autorequire: rbpar
16
16
  default_executable: rbpar.rb