RubyGems - doc_storage - Versions diffs - 0.9 → 1.0 - Mend

doc_storage 0.9 → 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

data/README.rdoc +23 -31
data/Rakefile +3 -3
data/VERSION +1 -1
data/examples/multipart.rb +4 -8
data/examples/simple.rb +3 -7
data/lib/doc_storage/{multi_part_document.rb → multipart_document.rb} +58 -31
data/lib/doc_storage/simple_document.rb +116 -30
data/lib/doc_storage.rb +1 -1
data/spec/multipart_document_spec.rb +215 -0
data/spec/simple_document_spec.rb +169 -30
metadata +5 -5
data/spec/multi_part_document_spec.rb +0 -139

data/README.rdoc CHANGED Viewed

@@ -1,21 +1,19 @@
 = DocStorage
-http://github.com/dmajda/doc_storage
+{http://bitbucket.org/dmajda/doc_storage/}[http://bitbucket.org/dmajda/doc_storage/]
-DocStorage is a simple Ruby library for manipulating documents containing a text
+DocStorage is a simple Ruby library for manipulating documents containing text
 and metadata. These documents can be used to implement a blog, wiki, or similar
-application without a relational database.
+application without a database.
-The library distinguishes between <em>simple documents</em> and <em>multipart
-documents</em>. A simple document looks like a RFC 822 message and it is
-suitable for storing a text associated with some metadata (e.g. a blog article
-with a title and a publication date). A multipart document is loosely based on
-the MIME multipart message format and allows storing multiple simple documents
-(e.g. blog comments, each with an author and a publication date) in one file.
+== Document Formats
-== Document Format
+The library distinguishes between <em>simple documents</em> and <em>multipart
+documents</em>.
-A simple document looks like this:
+A simple document is similar to a RFC 822 message and it is suitable for storing
+text associated with some metadata (e.g. a blog article with a title and a
+publication date). It looks like this:
   Title: My blog article
   Datetime: 2009-11-01 18:03:27
@@ -25,8 +23,9 @@ A simple document looks like this:
   Suspendisse metus sapien, consectetur vitae imperdiet vel, ornare a metus.
   In imperdiet euismod mi, nec volutpat lorem porta id.
-A multipart document looks like this:
+A multipart document is loosely based on the MIME multipart message format and
+allows storing multiple simple documents (e.g. blog comments, each with an
+author and a publication date) in one file. It looks like this:
   Boundary: =====
@@ -42,12 +41,12 @@ A multipart document looks like this:
   Your article sucks!
 See the documentation of <tt>DocStorage::SimpleDocument</tt> and
-<tt>DocStorage::MultiPartDocument</tt> classes for more formal format
+<tt>DocStorage::MultipartDocument</tt> classes for more formal format
 description.
 == Installation
-  sudo gem install doc_storage --source http://gemcutter.org
+  sudo gem install doc_storage --source http://gemcutter.org/
 == Example Usage
@@ -64,25 +63,21 @@ description.
     "We should finish the documentation ASAP."
   )
-  # Parse a file
-  document = File.open("examples/simple.txt", "r") do |f|
-    DocStorage::SimpleDocument.parse(f)
-  end
+  # Load from a file
+  document = DocStorage::SimpleDocument.load_file("examples/simple.txt")
   # Document manipulation
   document.headers["Tags"] = "example"
   document.body += "Nulla mi dui, pellentesque et accumsan vitae, mattis et velit."
   # Save the modified document
-  File.open("examples/simple_modified.txt", "w") do |f|
-    f.write(document)
-  end
+  document.save_file("examples/simple_modified.txt")
 === Multipart Documents
   require "lib/doc_storage"
   # Create a new document with two parts
-  document = DocStorage::MultiPartDocument.new([
+  document = DocStorage::MultipartDocument.new([
     DocStorage::SimpleDocument.new(
       {
         "Title"    => "Finishing the documentation",
@@ -99,10 +94,8 @@ description.
     ),
   ])
-  # Parse a file
-  document = File.open("examples/multipart.txt", "r") do |f|
-    DocStorage::MultiPartDocument.parse(f)
-  end
+  # Load from a file
+  document = DocStorage::MultipartDocument.load_file("examples/multipart.txt")
   # Document manipulation
   document.parts << DocStorage::SimpleDocument.new(
@@ -114,10 +107,9 @@ description.
   )
   # Save the modified document
-  File.open("examples/multipart_modified.txt", "w") do |f|
-    f.write(document)
-  end
+  document.save_file("examples/multipart_modified.txt")
 == Author
-DocStorage was brought to you by David Majda (david@majda.cz[mailto:david@majda.cz], www.majda.cz).
+DocStorage was brought to you by David Majda
+(david@majda.cz[mailto:david@majda.cz], majda.cz[http://majda.cz/]).

data/Rakefile CHANGED Viewed

@@ -3,7 +3,7 @@ require "rake/rdoctask"
 require "spec/rake/spectask"
 Spec::Rake::SpecTask.new do |t|
-  t.spec_opts = ["--color", "--format", "nested"]
+  t.spec_opts = ["--color"]
 end
 Rake::RDocTask.new do |t|
@@ -14,7 +14,7 @@ end
 specification = Gem::Specification.new do |s|
   s.name = "doc_storage"
-  s.version = "0.9"
+  s.version = "1.0"
   s.summary = "Simple Ruby library for manipulating documents containing a " +
               "text and metadata."
   s.description = "DocStorage is a simple Ruby library for manipulating " +
@@ -25,7 +25,7 @@ specification = Gem::Specification.new do |s|
   s.author = "David Majda"
   s.email = "david@majda.cz"
-  s.homepage = "http://github.com/dmajda/doc_storage"
+  s.homepage = "http://bitbucket.org/dmajda/doc_storage/"
   s.files = FileList[
               "Rakefile",

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.9
1	+ 1.0

data/examples/multipart.rb CHANGED Viewed

@@ -3,7 +3,7 @@ dir = File.dirname(__FILE__)
 require "#{dir}/../lib/doc_storage"
 # Create a new document with two parts
-document = DocStorage::MultiPartDocument.new([
+document = DocStorage::MultipartDocument.new([
   DocStorage::SimpleDocument.new(
     {
       "Title"    => "Finishing the documentation",
@@ -20,10 +20,8 @@ document = DocStorage::MultiPartDocument.new([
   ),
 ])
-# Parse a file
-document = File.open("#{dir}/multipart.txt", "r") do |f|
-  DocStorage::MultiPartDocument.parse(f)
-end
+# Load from a file
+document = DocStorage::MultipartDocument.load_file("examples/multipart.txt")
 # Document manipulation
 document.parts << DocStorage::SimpleDocument.new(
@@ -35,6 +33,4 @@ document.parts << DocStorage::SimpleDocument.new(
 )
 # Save the modified document
-File.open("#{dir}/multipart_modified.txt", "w") do |f|
-  f.write(document)
-end
+document.save_file("#{dir}/multipart_modified.txt")

data/examples/simple.rb CHANGED Viewed

@@ -11,16 +11,12 @@ document = DocStorage::SimpleDocument.new(
   "We should finish the documentation ASAP."
 )
-# Parse a file
-document = File.open("#{dir}/simple.txt", "r") do |f|
-  DocStorage::SimpleDocument.parse(f)
-end
+# Load from a file
+document = DocStorage::SimpleDocument.load_file("examples/simple.txt")
 # Document manipulation
 document.headers["Tags"] = "example"
 document.body += "Nulla mi dui, pellentesque et accumsan vitae, mattis et velit."
 # Save the modified document
-File.open("#{dir}/simple_modified.txt", "w") do |f|
-  f.write(document)
-end
+document.save_file("#{dir}/simple_modified.txt")

data/lib/doc_storage/{multi_part_document.rb → multipart_document.rb} RENAMED Viewed

@@ -1,12 +1,11 @@
 module DocStorage
-  # The +MultiPartDocument+ class represents a document consisting of several
+  # The +MultipartDocument+ class represents a document consisting of several
   # simple documents (see the +SimpleDocument+ class documentation for a
   # description), loosely based on the MIME multipart message format. It is
-  # suitable for storing multiple documents containing a text associated with
-  # some metadata (e.g. blog comments, each with an author and a publication
-  # date). The +MultiPartDocument+ class allows to create the document
-  # programatically, parse it from a file, manipulate its structure and save it
-  # to a file.
+  # suitable for storing multiple documents containing text associated with some
+  # metadata (e.g. blog comments, each with an author and a publication date).
+  # The +MultipartDocument+ class allows to create the document programatically,
+  # load it from a file, manipulate its structure and save it to a file.
   #
   # == Document Format
   #
@@ -30,16 +29,16 @@ module DocStorage
   # <em>boundary string</em>. The first document is a _prologue_ and it defines
   # the boundary string (without the "--" prefix) in its "Boundary" header. All
   # other headers of the prologue are ignored and so is its body. Remaining
-  # documents are the _parts_ of the multipart document. Documents without any
-  # parts are perfectly legal, however the prologue with the boundary definition
-  # must be always present.
+  # documents are _parts_ of the multipart document. Documents without any parts
+  # are perfectly legal, however the prologue with the boundary definition must
+  # be always present.
   #
   # == Example Usage
   #
   #   require "lib/doc_storage"
   #
   #   # Create a new document with two parts
-  #   document = DocStorage::MultiPartDocument.new([
+  #   document = DocStorage::MultipartDocument.new([
   #     DocStorage::SimpleDocument.new(
   #       {
   #         "Title"    => "Finishing the documentation",
@@ -56,10 +55,8 @@ module DocStorage
   #     ),
   #   ])
   #
-  #   # Parse a file
-  #   document = File.open("examples/multipart.txt", "r") do |f|
-  #     DocStorage::MultiPartDocument.parse(f)
-  #   end
+  #   # Load from a file
+  #   document = DocStorage::MultipartDocument.load_file("examples/multipart.txt")
   #
   #   # Document manipulation
   #   document.parts << DocStorage::SimpleDocument.new(
@@ -71,30 +68,28 @@ module DocStorage
   #   )
   #
   #   # Save the modified document
-  #   File.open("examples/multipart_modified.txt", "w") do |f|
-  #     f.write(document)
-  #   end
-  class MultiPartDocument
+  #   document.save_file("examples/multipart_modified.txt")
+  class MultipartDocument
     # document parts (+Array+ of <tt>DocStorage::SimpleDocument</tt>)
     attr_accessor :parts
     class << self
       private
-        def parse_from_io(io)
-          prologue = SimpleDocument.parse(io, :detect)
+        def load_from_io(io)
+          prologue = SimpleDocument.load(io, :detect)
           boundary = prologue.headers["Boundary"]
           parts = []
           until io.eof?
-            parts << SimpleDocument.parse(io, boundary)
+            parts << SimpleDocument.load(io, boundary)
           end
-          MultiPartDocument.new(parts)
+          MultipartDocument.new(parts)
         end
       public
-        # Parses a multipart document from its serialized form and returns a new
-        # +MultiPartDocument+ instance.
+        # Loads a multipart document from its serialized form and returns a new
+        # +MultipartDocument+ instance.
         #
         # The +source+ can be either an +IO+-like object or a +String+. In the
         # latter case, it is assumed that the string contains a serialized
@@ -106,14 +101,25 @@ module DocStorage
         # headers and body is parsed before the end of file) or if no "Boundary"
         # header is found in the prologue.
         #
-        # See the +MultiPartDocument+ class documentation for a detailed
+        # See the +MultipartDocument+ class documentation for a detailed
         # document format description.
-        def parse(source)
-          parse_from_io(source.is_a?(String) ? StringIO.new(source) : source)
+        def load(source)
+          load_from_io(source.is_a?(String) ? StringIO.new(source) : source)
+        end
+        # Loads a multipart document from a file and returns a new
+        # +MultipartDocument+ instance. This method is just a thin wrapper
+        # around MultipartDocument#load -- see its documentation for description
+        # of the behavior and parameters of this method.
+        #
+        # See the +MultipartDocument+ class documentation for a detailed
+        # document format description.
+        def load_file(file)
+          File.open(file, "r") { |f| load(f) }
         end
     end
-    # Creates a new +MultiPartDocument+ with given parts.
+    # Creates a new +MultipartDocument+ with given parts.
     def initialize(parts)
       @parts = parts
     end
@@ -124,12 +130,15 @@ module DocStorage
       other.instance_of?(self.class) && @parts == other.parts
     end
-    # Returns string representation of this document. The result is in format
-    # described in the +MultiPartDocument+ class documentation.
+    # Returns string representation of this document. The result is in the
+    # format described in the +MultipartDocument+ class documentation.
+    #
+    # Raises +SyntaxError+ if any document header in any contained document has
+    # invalid name.
     def to_s
       # The boundary is just a random string. We do not check if the boudnary
       # appears anywhere in the subdocuments, which may lead to malformed
-      # document.  This is of course principially wrong, but the probability of
+      # document. This is of course principially wrong, but the probability of
       # collision is so small that it does not bother me much.
       chars = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a
       boundary = Array.new(64) { chars[rand(chars.length)] }.join("")
@@ -137,5 +146,23 @@ module DocStorage
       SimpleDocument.new({"Boundary" => boundary}, "").to_s +
         @parts.map { |part| "--#{boundary}\n#{part.to_s}" }.join("\n")
     end
+    # Saves this document to an +IO+-like object. The result is in the format
+    # described in the +MultipartDocument+ class documentation.
+    #
+    # Raises +SyntaxError+ if any document header in any contained document has
+    # invalid name.
+    def save(io)
+      io.write(to_s)
+    end
+    # Saves this document to a file. The result is in the format described in
+    # the +MultipartDocument+ class documentation.
+    #
+    # Raises +SyntaxError+ if any document header in any contained document has
+    # invalid name.
+    def save_file(file)
+      File.open(file, "w") { |f| save(f) }
+    end
   end
 end

data/lib/doc_storage/simple_document.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module DocStorage
   # The +SimpleDocument+ class represents a simple RFC 822-like document,
-  # suitable for storing a text associated with some metadata (e.g. a blog
+  # suitable for storing text associated with some metadata (e.g. a blog
   # article with a title and a publication date). The +SimpleDocument+ class
   # allows to create the document programatically, parse it from a file,
   # manipulate its structure and save it to a file.
@@ -8,8 +8,7 @@ module DocStorage
   # Each document consist of _headers_ and a _body_. Headers are a dictionary,
   # mapping string names to string values. Body is a free-form text. The header
   # names can contain only alphanumeric characters and a hyphen ("-") and they
-  # are case sensitive. The header values can contain any text that does not
-  # begin with whitespace and does not contain a CR or LF character.
+  # are case sensitive. The header values can contain any text.
   #
   # == Document Format
   #
@@ -23,11 +22,14 @@ module DocStorage
   #   Suspendisse metus sapien, consectetur vitae imperdiet vel, ornare a metus.
   #   In imperdiet euismod mi, nec volutpat lorem porta id.
   #
-  # The headers are first, each on its own line. The header names are separated
-  # from values by a colon (":") and any amount of whitespace. Duplicate headers
-  # are allowed with later value overwriting the earlier one. Otherwise, the
-  # order of the headers does not matter. The body is separated from the headers
-  # by an empty line.
+  # Headers are first, each on its own line. Header names are separated from
+  # values by a colon (":") and any amount of whitespace, trailing whitespace
+  # after values is ignored. Values containing special characters (especially
+  # newlines or leading/trailing whitepsace) must be enclosed in single or
+  # double quotes. Quoted values can contain usual C-like escape sequences (e.g.
+  # "\n", "\xFF", etc.). Duplicate headers are allowed with later value
+  # overwriting the earlier one. Other than that, the order of headers does not
+  # matter. The body is separated from headers by empty line.
   #
   # Documents without any headers are perfectly legal and so are documents with
   # an empty body. However, the separating line must be always present. This
@@ -47,19 +49,15 @@ module DocStorage
   #     "We should finish the documentation ASAP."
   #   )
   #
-  #   # Parse a file
-  #   document = File.open("examples/simple.txt", "r") do |f|
-  #     DocStorage::SimpleDocument.parse(f)
-  #   end
+  #   # Load from a file
+  #   document = DocStorage::SimpleDocument.load_file("examples/simple.txt")
   #
   #   # Document manipulation
   #   document.headers["Tags"] = "example"
   #   document.body += "Nulla mi dui, pellentesque et accumsan vitae, mattis et velit."
   #
   #   # Save the modified document
-  #   File.open("examples/simple_modified.txt", "w") do |f|
-  #     f.write(document)
-  #   end
+  #   document.save_file("examples/simple_modified.txt")
   class SimpleDocument
     # document headers (+Hash+)
     attr_accessor :headers
@@ -68,6 +66,41 @@ module DocStorage
     class << self
       private
+        def parse_header_value(value)
+          case value[0..0]
+            when '"', "'"
+              quote = value[0..0]
+              if value[-1..-1] != quote
+                raise SyntaxError, "Unterminated header value: #{value.inspect}."
+              end
+              inner_text = value[1..-2]
+              if inner_text.gsub("\\" + quote, "").include?(quote)
+                raise SyntaxError, "Badly quoted header value: #{value.inspect}."
+              end
+              inner_text = inner_text.
+                gsub(/\\x([0-9a-fA-F]{2})/) { $1.to_i(16).chr }.
+                gsub(/\\([0-7]{3})/) { $1.to_i(8).chr }.
+                gsub("\\0", "\0").
+                gsub("\\a", "\a").
+                gsub("\\b", "\b").
+                gsub("\\t", "\t").
+                gsub("\\n", "\n").
+                gsub("\\v", "\v").
+                gsub("\\f", "\f").
+                gsub("\\r", "\r").
+                gsub("\\\"", "\"").
+                gsub("\\'", "'")
+              if inner_text !~ /^(\\\\|[^\\])*$/
+                raise SyntaxError, "Invalid escape sequence in header value: #{value.inspect}."
+              end
+              inner_text.gsub("\\\\", "\\")
+            else
+              value
+          end
+        end
         def parse_headers(io, detect_boundary)
           result = {}
           headers_terminated = false
@@ -75,13 +108,13 @@ module DocStorage
           until io.eof?
             line = io.readline
             case line
-              when /^([a-zA-Z0-9-]+):\s(.*)\n$/
-                result[$1] = $2
+              when /^([a-zA-Z0-9-]+):(.*)\n$/
+                result[$1] = parse_header_value($2.strip)
               when "\n"
                 headers_terminated = true
                 break
               else
-                raise SyntaxError, "Invalid header: \"#{line.strip}\"."
+                raise SyntaxError, "Invalid header: #{line.sub(/\n$/, "").inspect}."
             end
           end
@@ -93,6 +126,10 @@ module DocStorage
           result
         end
+        def trim_last_char(s)
+          s[0..-2]
+        end
         def parse_body(io, boundary)
           if boundary
             result = ""
@@ -102,18 +139,26 @@ module DocStorage
                 # Trim last newline from the body as it belongs to the boudnary
                 # logically. This behavior is implemented to allow bodies with
                 # no trailing newline).
-                return result[0..-2]
+                return trim_last_char(result)
               end
               result += line
             end
-            result
+            # IO#readline always returns a newline at the end of a line, even
+            # when it physically wasn't there (which can happen at the end of a
+            # file). Note that only IO and its descendants behave this way (not
+            # StringIO, for example).
+            io.is_a?(IO) ? trim_last_char(result) : result
           else
-            io.read
+            # IO#read always returns a newline at the end of the input, even
+            # when it physically wasn't there. Note that only IO and its
+            # descendants behave this way (not StringIO, for example).
+            io.is_a?(IO) ? trim_last_char(io.read) : io.read
           end
         end
-        def parse_from_io(io, boundary)
+        def load_from_io(io, boundary)
           headers = parse_headers(io, boundary == :detect)
           boundary = headers["Boundary"] if boundary == :detect
           body = parse_body(io, boundary)
@@ -122,7 +167,7 @@ module DocStorage
         end
       public
-        # Parses a simple document from its serialized form and returns a new
+        # Loads a simple document from its serialized form and returns a new
         # +SimpleDocument+ instance.
         #
         # The +source+ can be either an +IO+-like object or a +String+. In the
@@ -145,23 +190,34 @@ module DocStorage
         #   read.
         #
         # The +boundary+ parameter is provided mainly for parsing parts of
-        # multipart documents (see the +MultiPartDocument+ class documentation)
+        # multipart documents (see the +MultipartDocument+ class documentation)
         # and usually should not be used.
         #
         # If any syntax error occurs, a +SyntaxError+ exception is raised. This
-        # can happen when an invalid header is encountered, the headers are not
+        # can happen when an invalid header is encountered, headers are not
         # terminated (no empty line separating headers and body is parsed before
         # the end of file) or if no "Boundary" header is found when detecting a
         # boundary.
         #
         # See the +SimpleDocument+ class documentation for a detailed document
         # format description.
-        def parse(source, boundary = nil)
-          parse_from_io(
+        def load(source, boundary = nil)
+          load_from_io(
             source.is_a?(String) ? StringIO.new(source) : source,
             boundary
           )
         end
+        # Loads a simple document from a file and returns a new +SimpleDocument+
+        # instance. This method is just a thin wrapper around
+        # SimpleDocument#load -- see its documentation for description of the
+        # behavior and parameters of this method.
+        #
+        # See the +SimpleDocument+ class documentation for a detailed document
+        # format description.
+        def load_file(file, boundary = nil)
+          File.open(file, "r") { |f| load(f, boundary) }
+        end
     end
     # Creates a new +SimpleDocument+ with given headers and body.
@@ -177,13 +233,43 @@ module DocStorage
         @body == other.body
     end
-    # Returns string representation of this document. The result is in format
-    # described in the +SimpleDocument+ class documentation.
+    # Returns string representation of this document. The result is in the
+    # format described in the +SimpleDocument+ class documentation.
+    #
+    # Raises +SyntaxError+ if any document header has invalid name.
     def to_s
+      @headers.keys.each do |name|
+        if name !~ /\A[a-zA-Z0-9-]+\Z/
+          raise SyntaxError, "Invalid header name: #{name.inspect}."
+        end
+      end
       serialized_headers = @headers.keys.sort.inject("") do |acc, key|
-        acc + "#{key}: #{@headers[key]}\n"
+        value_is_simple = @headers[key] !~ /\A\s+/ &&
+                          @headers[key] !~ /\s+\Z/ &&
+                          @headers[key] !~ /[\n\r]/
+        value = value_is_simple ? @headers[key] : @headers[key].inspect
+        acc + "#{key}: #{value}\n"
       end
       serialized_headers + "\n" + @body
     end
+    # Saves this document to an +IO+-like object. The result is in the format
+    # described in the +SimpleDocument+ class documentation.
+    #
+    # Raises +SyntaxError+ if any document header has invalid name.
+    def save(io)
+      io.write(to_s)
+    end
+    # Saves this document to a file. The result is in the format described in
+    # the +SimpleDocument+ class documentation.
+    #
+    # Raises +SyntaxError+ if any document header has invalid name.
+    def save_file(file)
+      File.open(file, "w") { |f| save(f) }
+    end
   end
 end

data/lib/doc_storage.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 require File.dirname(__FILE__) + "/doc_storage/syntax_error"
 require File.dirname(__FILE__) + "/doc_storage/simple_document"
-require File.dirname(__FILE__) + "/doc_storage/multi_part_document"
+require File.dirname(__FILE__) + "/doc_storage/multipart_document"

data/spec/multipart_document_spec.rb ADDED Viewed

@@ -0,0 +1,215 @@
+require "tempfile"
+require File.dirname(__FILE__) + "/../lib/doc_storage"
+module DocStorage
+  describe MultipartDocument do
+    MULTIPART_FIXTURE_FILE = File.dirname(__FILE__) + "/fixtures/multipart.txt"
+    Spec::Matchers.define :load_as_multipart_document do |document|
+      match do |string|
+        MultipartDocument::load(string) == document
+      end
+    end
+    before :each do
+      @document = MultipartDocument.new([:part1, :part2])
+      @document_with_no_parts = MultipartDocument.new([])
+      @document_with_multiple_parts = MultipartDocument.new([
+        SimpleDocument.new({ "a" => "42", "b" => "43" }, "line1\nline2"),
+        SimpleDocument.new({ "c" => "44", "d" => "45" }, "line3\nline4"),
+      ])
+    end
+    describe "initialize" do
+      it "sets attributes correctly" do
+        @document.parts.should == [:part1, :part2]
+      end
+    end
+    describe "==" do
+      it "returns true when passed the same object" do
+        @document.should == @document
+      end
+      it "returns true when passed a MultipartDocument initialized with the same parameter" do
+        @document.should == MultipartDocument.new([:part1, :part2])
+      end
+      it "returns false when passed some random object" do
+        @document.should_not == Object.new
+      end
+      it "returns false when passed a subclass of MultipartDocument initialized with the same parameter" do
+        class SubclassedMultipartDocument < MultipartDocument
+        end
+        @document.should_not ==
+          SubclassedMultipartDocument.new([:part1, :part2])
+      end
+      it "returns false when passed a MultipartDocument initialized with different parameter" do
+        @document.should_not == MultipartDocument.new([:part3, :part4])
+      end
+    end
+    describe "load" do
+      it "loads document with no parts" do
+        "Boundary: =====\n\n".should load_as_multipart_document(
+          @document_with_no_parts
+        )
+      end
+      it "loads document with multiple parts" do
+        [
+          "Boundary: =====",
+          "",
+          "--=====",
+          "a: 42",
+          "b: 43",
+          "",
+          "line1",
+          "line2",
+          "--=====",
+          "c: 44",
+          "d: 45",
+          "",
+          "line3",
+          "line4",
+        ].join("\n").should load_as_multipart_document(
+          @document_with_multiple_parts
+        )
+      end
+      it "does not load document with no Boundary: header" do
+        lambda {
+          MultipartDocument.load("\n\n")
+        }.should raise_error(SyntaxError, "No boundary defined.")
+      end
+      it "loads document from IO-like object" do
+        StringIO.open(
+          [
+            "Boundary: =====",
+            "",
+            "--=====",
+            "a: 42",
+            "b: 43",
+            "",
+            "line1",
+            "line2",
+            "--=====",
+            "c: 44",
+            "d: 45",
+            "",
+            "line3",
+            "line4",
+          ].join("\n")
+        ) do |io|
+          MultipartDocument.load(io).should == @document_with_multiple_parts
+        end
+      end
+    end
+    describe "load_file" do
+      it "loads document" do
+        MultipartDocument.load_file(MULTIPART_FIXTURE_FILE).should ==
+          @document_with_multiple_parts
+      end
+    end
+    describe "to_s" do
+      it "serializes document with no parts" do
+        srand 0
+        @document_with_no_parts.to_s.should ==
+          "Boundary: SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI\n\n"
+      end
+      it "serializes document with multiple parts" do
+        srand 0
+        @document_with_multiple_parts.to_s.should == [
+          "Boundary: SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
+          "",
+          "--SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
+          "a: 42",
+          "b: 43",
+          "",
+          "line1",
+          "line2",
+          "--SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
+          "c: 44",
+          "d: 45",
+          "",
+          "line3",
+          "line4",
+        ].join("\n")
+      end
+    end
+    describe "save" do
+      it "saves document" do
+        StringIO.open("", "w") do |io|
+          srand 0
+          @document_with_multiple_parts.save(io)
+          io.string.should == [
+            "Boundary: SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
+            "",
+            "--SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
+            "a: 42",
+            "b: 43",
+            "",
+            "line1",
+            "line2",
+            "--SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
+            "c: 44",
+            "d: 45",
+            "",
+            "line3",
+            "line4",
+          ].join("\n")
+        end
+      end
+    end
+    describe "save_file" do
+      it "saves document" do
+        # The "ensure" blocks aren't really necessary -- the tempfile will be
+        # closed and unlinked upon its object destruction automatically. However
+        # I think that being explicit and deterministic doesn't hurt.
+        begin
+          tempfile = Tempfile.new("doc_storage")
+          tempfile.close
+          srand 0
+          @document_with_multiple_parts.save_file(tempfile.path)
+          tempfile.open
+          begin
+            tempfile.read.should == [
+              "Boundary: SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
+              "",
+              "--SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
+              "a: 42",
+              "b: 43",
+              "",
+              "line1",
+              "line2",
+              "--SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
+              "c: 44",
+              "d: 45",
+              "",
+              "line3",
+              "line4",
+            ].join("\n")
+          ensure
+            tempfile.close
+          end
+        ensure
+          tempfile.unlink
+        end
+      end
+    end
+  end
+end

data/spec/simple_document_spec.rb CHANGED Viewed

@@ -1,26 +1,39 @@
+require "tempfile"
 require File.dirname(__FILE__) + "/../lib/doc_storage"
 module DocStorage
   describe SimpleDocument do
-    Spec::Matchers.define :parse_as_document do |document|
+    SIMPLE_FIXTURE_FILE = File.dirname(__FILE__) + "/fixtures/simple.txt"
+    Spec::Matchers.define :load_as_document do |document|
       match do |string|
-        SimpleDocument.parse(string) == document
+        SimpleDocument.load(string) == document
       end
     end
     before :each do
-      @document = SimpleDocument.new({"a" => 42, "b" => 43}, "body")
+      @document = SimpleDocument.new({ "a" => 42, "b" => 43 }, "body")
       @document_without_headers_without_body = SimpleDocument.new({}, "")
       @document_without_headers_with_body = SimpleDocument.new({}, "line1\nline2")
       @document_with_headers_without_body = SimpleDocument.new(
-        {"a" => "42", "b" => "43"},
+        { "a" => "42", "b" => "43" },
         ""
       )
       @document_with_headers_with_body = SimpleDocument.new(
-        {"a" => "42", "b" => "43"},
+        { "a" => "42", "b" => "43" },
         "line1\nline2"
       )
+      @document_with_ugly_header = SimpleDocument.new(
+        { "a" => "\xFF\377\0\a\b\t\n\v\f\r\"'\\\xFF\377\0\a\b\t\n\v\f\r\"'\\" },
+        ""
+      )
+      @document_with_invalid_header = SimpleDocument.new(
+        { "in\nvalid" => "42" },
+        ""
+      )
     end
     describe "initialize" do
@@ -52,77 +65,159 @@ module DocStorage
       end
       it "returns false when passed a SimpleDocument initialized with different parameters" do
-        @document.should_not == SimpleDocument.new({"a" => 44, "b" => 45}, "body")
-        @document.should_not == SimpleDocument.new({"a" => 42, "b" => 43}, "nobody")
+        @document.should_not == SimpleDocument.new({ "a" => 44, "b" => 45 }, "body")
+        @document.should_not == SimpleDocument.new({ "a" => 42, "b" => 43 }, "nobody")
       end
     end
-    describe "parse" do
-      it "parses document with no headers and no body" do
-        "\n".should parse_as_document(@document_without_headers_without_body)
+    describe "load" do
+      it "loads document with no headers and no body" do
+        "\n".should load_as_document(@document_without_headers_without_body)
       end
-      it "parses document with no headers and body" do
-        "\nline1\nline2".should parse_as_document(
+      it "loads document with no headers and body" do
+        "\nline1\nline2".should load_as_document(
           @document_without_headers_with_body
         )
       end
-      it "parses document with headers and no body" do
-        "a: 42\nb: 43\n\n".should parse_as_document(
+      it "loads document with headers and no body" do
+        "a: 42\nb: 43\n\n".should load_as_document(
           @document_with_headers_without_body
         )
       end
-      it "parses document with headers and body" do
-        "a: 42\nb: 43\n\nline1\nline2".should parse_as_document(
+      it "loads document with headers and body" do
+        "a: 42\nb: 43\n\nline1\nline2".should load_as_document(
           @document_with_headers_with_body
         )
       end
-      it "does not parse document with invalid headers" do
+      it "loads document with no whitespace after the colon in headers" do
+        "a:42\nb:43\n\n".should load_as_document(
+          @document_with_headers_without_body
+        )
+      end
+      it "loads document with multiple whitespace after the colon in headers" do
+        "a: \t 42\nb: \t 43\n\n".should load_as_document(
+          @document_with_headers_without_body
+        )
+      end
+      it "loads document with multiple whitespace after the value in headers" do
+        "a:42 \t \nb:43 \t \n\n".should load_as_document(
+          @document_with_headers_without_body
+        )
+      end
+      it "loads document with quoted header value" do
+        "a: \"42\"\nb: \"43\"\n\n".should load_as_document(
+          @document_with_headers_without_body
+        )
+        "a: '42'\nb: '43'\n\n".should load_as_document(
+          @document_with_headers_without_body
+        )
+        "a: \"\\xFF\\377\\0\\a\\b\\t\\n\\v\\f\\r\\\"\\'\\\\\\xFF\\377\\0\\a\\b\\t\\n\\v\\f\\r\\\"\\'\\\\\"\n\n".should load_as_document(
+          @document_with_ugly_header
+        )
+        "a: '\\xFF\\377\\0\\a\\b\\t\\n\\v\\f\\r\\\"\\'\\\\\\xFF\\377\\0\\a\\b\\t\\n\\v\\f\\r\\\"\\'\\\\'\n\n".should load_as_document(
+          @document_with_ugly_header
+        )
+      end
+      it "does not load document with unterminated header value" do
+        lambda {
+          SimpleDocument.load("a: \"42\n\n")
+        }.should raise_error(SyntaxError, "Unterminated header value: \"\\\"42\".")
+        lambda {
+          SimpleDocument.load("a: '42\n\n")
+        }.should raise_error(SyntaxError, "Unterminated header value: \"'42\".")
+      end
+      it "does not load document with badly quoted header value" do
+        lambda {
+          SimpleDocument.load("a: \"4\"2\"\n\n")
+        }.should raise_error(SyntaxError, "Badly quoted header value: \"\\\"4\\\"2\\\"\".")
         lambda {
-          SimpleDocument.parse("bullshit")
-        }.should raise_error(SyntaxError, "Invalid header: \"bullshit\".")
+          SimpleDocument.load("a: '4'2'\n\n")
+        }.should raise_error(SyntaxError, "Badly quoted header value: \"'4'2'\".")
       end
-      it "does not parse document with unterminated headers" do
+      it "does not load document with quoted header value containing invalid escape sequence" do
         lambda {
-          SimpleDocument.parse("a: 42\nb: 42\n")
+          SimpleDocument.load("a: \"4\\z2\"\n\n")
+        }.should raise_error(SyntaxError, "Invalid escape sequence in header value: \"\\\"4\\\\z2\\\"\".")
+        lambda {
+          SimpleDocument.load("a: '4\\z2'\n\n")
+        }.should raise_error(SyntaxError, "Invalid escape sequence in header value: \"'4\\\\z2'\".")
+      end
+      it "does not load document with invalid headers" do
+        lambda {
+          SimpleDocument.load("bull\tshit\n")
+        }.should raise_error(SyntaxError, "Invalid header: \"bull\\tshit\".")
+      end
+      it "does not load document with unterminated headers" do
+        lambda {
+          SimpleDocument.load("a: 42\nb: 42\n")
         }.should raise_error(SyntaxError, "Unterminated headers.")
       end
-      it "parses document from IO-like object" do
+      it "loads document from IO-like object" do
         StringIO.open("a: 42\nb: 43\n\nline1\nline2") do |io|
-          SimpleDocument.parse(io).should == @document_with_headers_with_body
+          SimpleDocument.load(io).should == @document_with_headers_with_body
         end
       end
-      it "parses document when detecting a boundary" do
-        SimpleDocument.parse(
+      it "loads document when detecting a boundary" do
+        SimpleDocument.load(
           "a: 42\nb: 43\nBoundary: =====\n\nline1\nline2\n--=====\nbullshit",
           :detect
         ).should == SimpleDocument.new(
-          {"a" => "42", "b" => "43", "Boundary" => "====="},
+          { "a" => "42", "b" => "43", "Boundary" => "=====" },
           "line1\nline2"
         )
       end
-      it "does not parse document when detecting a boundary and no boundary defined" do
+      it "does not load document when detecting a boundary and no boundary defined" do
         lambda {
-          SimpleDocument.parse(
+          SimpleDocument.load(
             "a: 42\nb: 43\n\nline1\nline2\n--=====\nbullshit",
             :detect
           )
         }.should raise_error(SyntaxError, "No boundary defined.")
       end
-      it "parses document when passed a boundary" do
-        SimpleDocument.parse(
+      it "loads document when passed a boundary" do
+        SimpleDocument.load(
           "a: 42\nb: 43\n\nline1\nline2\n--=====\nbullshit",
           "====="
         ).should == @document_with_headers_with_body
       end
+      it "works around the IO#readline bug" do
+        File.open(SIMPLE_FIXTURE_FILE, "r") do |f|
+          SimpleDocument.load(f).should == @document_with_headers_with_body
+        end
+      end
+      it "works around the IO#read bug when passed a boundary" do
+        File.open(SIMPLE_FIXTURE_FILE, "r") do |f|
+          SimpleDocument.load(f, "=====").should ==
+            @document_with_headers_with_body
+        end
+      end
+    end
+    describe "load_file" do
+      it "loads document" do
+        SimpleDocument.load_file(SIMPLE_FIXTURE_FILE).should ==
+          @document_with_headers_with_body
+      end
     end
     describe "to_s" do
@@ -142,6 +237,50 @@ module DocStorage
         @document_with_headers_with_body.to_s.should ==
           "a: 42\nb: 43\n\nline1\nline2"
       end
+      it "serializes document with ugly header" do
+        @document_with_ugly_header.to_s.should ==
+          "a: \"\\377\\377\\000\\a\\b\\t\\n\\v\\f\\r\\\"'\\\\\\377\\377\\000\\a\\b\\t\\n\\v\\f\\r\\\"'\\\\\"\n\n"
+      end
+      it "does not serialize document with invalid header name" do
+        lambda {
+          @document_with_invalid_header.to_s
+        }.should raise_error(SyntaxError, "Invalid header name: \"in\\nvalid\".")
+      end
+    end
+    describe "save" do
+      it "saves document" do
+        StringIO.open("", "w") do |io|
+          @document_with_headers_with_body.save(io)
+          io.string.should == "a: 42\nb: 43\n\nline1\nline2"
+        end
+      end
+    end
+    describe "save_file" do
+      it "saves document" do
+        # The "ensure" blocks aren't really necessary -- the tempfile will be
+        # closed and unlinked upon its object destruction automatically. However
+        # I think that being explicit and deterministic doesn't hurt.
+        begin
+          tempfile = Tempfile.new("doc_storage")
+          tempfile.close
+          @document_with_headers_with_body.save_file(tempfile.path)
+          tempfile.open
+          begin
+            tempfile.read.should == "a: 42\nb: 43\n\nline1\nline2"
+          ensure
+            tempfile.close
+          end
+        ensure
+          tempfile.unlink
+        end
+      end
     end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: doc_storage
 version: !ruby/object:Gem::Version
-  version: "0.9"
+  version: "1.0"
 platform: ruby
 authors:
 - David Majda
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-11-19 00:00:00 +01:00
+date: 2010-02-14 00:00:00 +01:00
 default_executable:
 dependencies: []
@@ -27,17 +27,17 @@ files:
 - LICENSE
 - VERSION
 - lib/doc_storage.rb
-- lib/doc_storage/multi_part_document.rb
 - lib/doc_storage/simple_document.rb
 - lib/doc_storage/syntax_error.rb
+- lib/doc_storage/multipart_document.rb
+- spec/multipart_document_spec.rb
 - spec/simple_document_spec.rb
-- spec/multi_part_document_spec.rb
 - examples/simple.txt
 - examples/multipart.rb
 - examples/simple.rb
 - examples/multipart.txt
 has_rdoc: true
-homepage: http://github.com/dmajda/doc_storage
+homepage: http://bitbucket.org/dmajda/doc_storage/
 licenses: []
 post_install_message:

data/spec/multi_part_document_spec.rb DELETED Viewed

@@ -1,139 +0,0 @@
-require File.dirname(__FILE__) + "/../lib/doc_storage"
-module DocStorage
-  describe MultiPartDocument do
-    Spec::Matchers.define :parse_as_multi_part_document do |document|
-      match do |string|
-        MultiPartDocument::parse(string) == document
-      end
-    end
-    before :each do
-      @document = MultiPartDocument.new([:part1, :part2])
-      @document_with_no_parts = MultiPartDocument.new([])
-      @document_with_multiple_parts = MultiPartDocument.new([
-        SimpleDocument.new({"a" => "42", "b" => "43"}, "line1\nline2"),
-        SimpleDocument.new({"c" => "44", "d" => "45"}, "line3\nline4"),
-      ])
-    end
-    describe "initialize" do
-      it "sets attributes correctly" do
-        @document.parts.should == [:part1, :part2]
-      end
-    end
-    describe "==" do
-      it "returns true when passed the same object" do
-        @document.should == @document
-      end
-      it "returns true when passed a MultiPartDocument initialized with the same parameter" do
-        @document.should == MultiPartDocument.new([:part1, :part2])
-      end
-      it "returns false when passed some random object" do
-        @document.should_not == Object.new
-      end
-      it "returns false when passed a subclass of MultiPartDocument initialized with the same parameter" do
-        class SubclassedMultiPartDocument < MultiPartDocument
-        end
-        @document.should_not ==
-          SubclassedMultiPartDocument.new([:part1, :part2])
-      end
-      it "returns false when passed a MultiPartDocument initialized with different parameter" do
-        @document.should_not == MultiPartDocument.new([:part3, :part4])
-      end
-    end
-    describe "parse" do
-      it "parses document with no parts" do
-        "Boundary: =====\n\n".should parse_as_multi_part_document(
-          @document_with_no_parts
-        )
-      end
-      it "parses document with multiple parts" do
-        [
-          "Boundary: =====",
-          "",
-          "--=====",
-          "a: 42",
-          "b: 43",
-          "",
-          "line1",
-          "line2",
-          "--=====",
-          "c: 44",
-          "d: 45",
-          "",
-          "line3",
-          "line4",
-        ].join("\n").should parse_as_multi_part_document(
-          @document_with_multiple_parts
-        )
-      end
-      it "does not parse document with no Boundary: header" do
-        lambda {
-          MultiPartDocument.parse("\n\n")
-        }.should raise_error(SyntaxError, "No boundary defined.")
-      end
-      it "parses document from IO-like object" do
-        StringIO.open(
-          [
-            "Boundary: =====",
-            "",
-            "--=====",
-            "a: 42",
-            "b: 43",
-            "",
-            "line1",
-            "line2",
-            "--=====",
-            "c: 44",
-            "d: 45",
-            "",
-            "line3",
-            "line4",
-          ].join("\n")
-        ) do |io|
-          MultiPartDocument.parse(io).should == @document_with_multiple_parts
-        end
-      end
-    end
-    describe "to_s" do
-      it "serializes document with no parts" do
-        srand 0
-        @document_with_no_parts.to_s.should ==
-          "Boundary: SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI\n\n"
-      end
-      it "serializes document with multiple parts" do
-        srand 0
-        @document_with_multiple_parts.to_s.should == [
-          "Boundary: SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
-          "",
-          "--SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
-          "a: 42",
-          "b: 43",
-          "",
-          "line1",
-          "line2",
-          "--SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
-          "c: 44",
-          "d: 45",
-          "",
-          "line3",
-          "line4",
-        ].join("\n")
-      end
-    end
-  end
-end