RubyGems - rbpar - Versions diffs - 0.1.0 → 0.2.0 - Mend

rbpar 0.1.0 → 0.2.0

Files changed (6) hide show

data/bin/rbpar.rb CHANGED Viewed

@@ -62,25 +62,20 @@ def main
     end
     rbpar = RbParIterator.new(width)
-    first = true
     # add lines to the parser object ...
     rbpar << $stdin.readlines
     # ... and read back ready paragraphs
     rbpar.each do |paragraph|
-        # add a line break before each paragraph (except the first)
-        unless first
-            #$stdout.write("\n") unless first
-        else
-            first = false
-        end
         # write the lines to stdout
-        paragraph.each do |line|
+        paragraph.each_with_index do |line, i|
             $stdout.write(line.rstrip)
-            $stdout.write(" ") if vim
-            $stdout.write("\n")
+            $stdout.write(" ") if vim && i != (paragraph.length-1)
+            $stdout.write(rbpar.eol)
         end
+        # end the paragraph
+        # $stdout.write("\n") if paragraph.length == 0
     end
 end

data/lib/rbpar_engine.rb CHANGED Viewed

@@ -34,31 +34,59 @@ class DynamicBreaker
     # to accomplish this in reasonable time.
     def initialize
-        @charLengthCache = Hash.new
+        @dirty_cache = true
         @cost = Hash.new(0)
     end
-    def charLength(words, beginIndex, endIndex)
+    def initCache(words)
+        # init the word cache
+        # create the matrix
+        @cache = Array.new(words.length) { Array.new(words.length, 0) }
-        # see if the result is already cached
+        # this implementation wastes one line of storage, if implemented
+        # with a matrix...
-        token = beginIndex.to_s+ "_" + endIndex.to_s
-        cachedResult = @charLengthCache[token]
-        if cachedResult.nil?
-            length = realCharLength(words, beginIndex, endIndex)
-            return @charLengthCache[token] = length
+        endIndex = 1 # start from the beginning, cell [0, 1]
+        tmp = endIndex
+        # Initialize the first diagonal with word lengths. Do not care
+        # about [i, i] since that will always be 0
+        endIndex.upto(words.length) do |i|
+            @cache.at(i-1)[i] = words.at(i-1).length
+            # printf "cache [%d][%d] = %d\n", i-1, i, @cache[i-1][i]
+        end
+        endIndex = 2 # start the real calculation on the next row
+        # The idea is as follows: the length of words (2,8) is the
+        # length of words (2,7) + (7,8), and the first value is
+        # previously calculated
+        endIndex.upto(words.length) do |i|
+            (i-2).downto(0) do |j|
+                @cache.at(j)[i] = @cache.at(i-1).at(i)+ @cache.at(j).at(i-1)
+                # printf "cache [%d][%d] = %d\n", j, i, @cache[j][i]
+            end
         end
-        return cachedResult
     end
+    def charLength(words, beginIndex, endIndex)
-    def realCharLength(words, beginIndex, endIndex)
+        # Return the length in characters from beginIndex to endIndex.
+        # Note that the spaces need to be added between the words.
+        # see if the cache needs updating
-        # sum the lenghts of the words and add 1 for the space after the
-        # word
+        if @dirty_cache == true
+            initCache(words)
+            @dirty_cache = false
+        end
-        return (beginIndex...endIndex).inject(0) do |sum, i|
-            sum + words[i].length + 1
-        end - 1 # remove the last space
+        spaces = endIndex - beginIndex - 1
+        @cache[beginIndex][endIndex] + spaces
     end
     def calculatePenalty(words, availableLength, beginIndex, endIndex)
@@ -129,10 +157,10 @@ class DynamicBreaker
     def isLegal(words, availableLength, beginIndex, endIndex)
         charLength(words, beginIndex, endIndex) <= availableLength
     end
     def parse(words, availableLength)
-        # check the parameters, since this is the public entrypoint to
+        # check the parameters, since this is the public entry point to
         # the algorithm
         if words.nil? or availableLength <= 0
@@ -149,11 +177,11 @@ class DynamicBreaker
             # only one word
             return words
         end
         n = words.length
+        @dirty_cache = true
         # one initialization is enough
-        @charLengthCache.clear
         @cost.clear
         # initialize with the words. Note: not as the default value

data/lib/rbpar_main.rb CHANGED Viewed

@@ -15,6 +15,8 @@ class RbParIterator
     # breaking system.
     include Enumerable
+    attr_reader :eol
     def initialize(width)
         unless width > 0
@@ -23,9 +25,23 @@ class RbParIterator
         @breaker = DynamicBreaker.new()
         @width = width
         @readlines = Array.new()
+        @eol = ''
     end
     def <<(lines)
+        # add lines to the paragraph
+        # check the line format
+        if lines[0] =~ /\r\n$/
+            @eol = "\r\n"
+        elsif lines[0] =~ /\n$/
+            @eol = "\n"
+        end
+        # TODO; possibly check that the lines are really lines with only
+        # one newline?
         @readlines = @readlines + lines
     end
@@ -41,13 +57,21 @@ class RbParIterator
         # results has the processed sub-paragraphs
         results = paragraphs.collect do |sub_paragraph|
             # process removes the line breaks: it doesn't make sense to
             # send pure line breaks there
-            if sub_paragraph[0] != "\n"
-                sub_paragraph.process!(@breaker, @width)
-            else
+            # chomp removes also dos-style line ends, which is good
+            if sub_paragraph[0].chomp.empty?
+                # since this is a paragraph, having a first line with
+                # only a newline character means that it is also the
+                # last line
                 [""]
+            else
+                sub_paragraph.process!(@breaker, @width)
             end
         end
         # combine the result paragraphs into one
@@ -58,10 +82,30 @@ class RbParIterator
     end
     def each
-        @readlines.join.each('') do |paragraph|
-            lines = paragraph.collect
-            yield process_paragraph(lines)
+        paragraph_break = Regexp.new('^' << @eol << '$')
+        # find all character breaks
+        splitpoints = Array.new
+        @readlines.each_with_index do |line, i|
+            splitpoints << i if line.chomp.empty?
         end
+        previous_splitpoint = 0
+        if splitpoints.length > 0 && @readlines.length > 1
+            splitpoints.each do |splitpoint|
+                result = process_paragraph(@readlines[previous_splitpoint .. splitpoint])
+                previous_splitpoint = splitpoint + 1
+                yield result
+            end
+        end
+        # the last paragraph
+        yield process_paragraph(@readlines[previous_splitpoint ... @readlines.length])
     end
 end

data/test/rbpar_engine_test.rb CHANGED Viewed

@@ -17,12 +17,6 @@ class TestRbParEngine < Test::Unit::TestCase
         assert_equal(words.length, @breaker.charLength(words.split(" "), 0, 4))
     end
-    def test_real_char_length
-        words = "This is a test"
-        # FIXME: index should go from 0 to 3
-        assert_equal(words.length, @breaker.realCharLength(words.split(" "), 0, 4))
-    end
     def test_parse
         words = "Sed eget ligula. Nunc fringilla. In ullamcorper turpis quis tortor. Maecenas fringilla dui aliquet leo. Nulla nec mi ut mauris ultrices sollicitudin. Mauris feugiat ornare massa. Ut vitae dolor sed urna blandit imperdiet. Cras tempus, orci sollicitudin pulvinar ultricies, sapien urna fringilla risus, eu rhoncus metus nisi a risus. Aliquam erat volutpat. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Duis iaculis lorem sit amet neque. Cras quis tellus. Ut molestie eros sit amet nibh blandit luctus. Quisque ac sem. Nulla condimentum eros rhoncus ipsum. Duis enim. Phasellus mattis posuere augue."

data/test/rbpar_main_test.rb CHANGED Viewed

@@ -15,17 +15,30 @@ class TestRbParMain < Test::Unit::TestCase
         lines1 = rbpar1.collect
         assert_equal("", lines1[0][0])
         assert_nil(lines1[0][1])
-        assert_nil(lines1[1])
+        # readlines should handle these cases...
+=begin
         rbpar2 = RbParIterator.new(63)
         lines2 = ["\n\n"]
         rbpar2 << lines2
         lines2 = rbpar2.collect
+        pp lines2
         assert_equal("", lines2[0][0])
         assert_equal("", lines2[0][1])
         assert_nil(lines2[1])
+        rbpar3 = RbParIterator.new(63)
+        lines3 = ["\n\n\n"]
+        rbpar3 << lines2
+        lines3 = rbpar3.collect
+        pp lines3
+        assert_equal("", lines2[0][0])
+        assert_equal("", lines2[0][1])
+        assert_equal("", lines2[0][2])
+        assert_nil(lines2[1])
+=end
     end
     def test_paragraphs
@@ -168,6 +181,7 @@ class TestRbParMain < Test::Unit::TestCase
         rbpar7 << lines7
         lines7 = rbpar7.collect
         assert_equal("> text text", lines7[0][0])
         assert_equal("", lines7[0][1])
@@ -175,6 +189,32 @@ class TestRbParMain < Test::Unit::TestCase
     end
+    def test_dos_line_breaks
+        # dos and unix line breaks should be equivalent
+        rbpar1 = RbParIterator.new(15)
+        lines1 = [ "> text text\n", "\n", "> text\n" ]
+        rbpar1 << lines1
+        lines1 = rbpar1.collect
+        assert_equal("> text text", lines1[0][0])
+        assert_equal("", lines1[0][1])
+        assert_equal("> text", lines1[1][0])
+        rbpar2 = RbParIterator.new(15)
+        lines2 = [ "> text text\r\n", "\r\n", "> text\r\n" ]
+        rbpar2 << lines2
+        lines2 = rbpar2.collect
+        assert_equal("> text text", lines2[0][0])
+        assert_equal("", lines2[0][1])
+        assert_equal("> text", lines2[1][0])
+    end
     def test_signature
         # test that signature paragraphs behave correctly
@@ -188,7 +228,6 @@ class TestRbParMain < Test::Unit::TestCase
         lines_sig_1 = rbpar_sig_1.collect
         assert_equal("-- ", lines_sig_1[0][0])
         assert_equal("    - NanoNano", lines_sig_1[0][1])
-        # pp lines_sig_1
         lines_sig_2 = [">-- \n", ">    - NanoNano\n"]
@@ -197,7 +236,6 @@ class TestRbParMain < Test::Unit::TestCase
         lines_sig_2 = rbpar_sig_2.collect
         assert_equal("> -- ", lines_sig_2[0][0])
         assert_equal(">     - NanoNano", lines_sig_2[0][1])
-        # pp lines_sig_2
         lines_sig_3 = ["> -- \n", ">     - NanoNano\n"]
@@ -206,7 +244,6 @@ class TestRbParMain < Test::Unit::TestCase
         lines_sig_3 = rbpar_sig_3.collect
         assert_equal("> -- ", lines_sig_3[0][0])
         assert_equal(">     - NanoNano", lines_sig_3[0][1])
-        # pp lines_sig_3
     end
 end

metadata CHANGED Viewed

@@ -3,14 +3,14 @@ rubygems_version: 0.9.4
 specification_version: 1
 name: rbpar
 version: !ruby/object:Gem::Version
-  version: 0.1.0
-date: 2007-12-04 00:00:00 +02:00
+  version: 0.2.0
+date: 2008-01-26 00:00:00 +02:00
 summary: A program for managing pargraph formatting
 require_paths:
 - lib
 email: ismo@iki.fi
 homepage: ""
-rubyforge_project:
+rubyforge_project: rbpar
 description:
 autorequire: rbpar
 default_executable: rbpar.rb