doc_storage 0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +22 -0
- data/README.rdoc +123 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/examples/multipart.rb +40 -0
- data/examples/multipart.txt +12 -0
- data/examples/simple.rb +26 -0
- data/examples/simple.txt +7 -0
- data/lib/doc_storage/multi_part_document.rb +141 -0
- data/lib/doc_storage/simple_document.rb +189 -0
- data/lib/doc_storage/syntax_error.rb +5 -0
- data/lib/doc_storage.rb +3 -0
- data/spec/multi_part_document_spec.rb +139 -0
- data/spec/simple_document_spec.rb +147 -0
- metadata +69 -0
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2009 David Majda
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
= DocStorage
|
2
|
+
|
3
|
+
http://github.com/dmajda/doc_storage
|
4
|
+
|
5
|
+
DocStorage is a simple Ruby library for manipulating documents containing a text
|
6
|
+
and metadata. These documents can be used to implement a blog, wiki, or similar
|
7
|
+
application without a relational database.
|
8
|
+
|
9
|
+
The library distinguishes between <em>simple documents</em> and <em>multipart
|
10
|
+
documents</em>. A simple document looks like a RFC 822 message and it is
|
11
|
+
suitable for storing a text associated with some metadata (e.g. a blog article
|
12
|
+
with a title and a publication date). A multipart document is loosely based on
|
13
|
+
the MIME multipart message format and allows storing multiple simple documents
|
14
|
+
(e.g. blog comments, each with an author and a publication date) in one file.
|
15
|
+
|
16
|
+
== Document Format
|
17
|
+
|
18
|
+
A simple document looks like this:
|
19
|
+
|
20
|
+
Title: My blog article
|
21
|
+
Datetime: 2009-11-01 18:03:27
|
22
|
+
|
23
|
+
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vel lorem
|
24
|
+
massa. Sed blandit orci id leo blandit ut fermentum lacus ullamcorper.
|
25
|
+
Suspendisse metus sapien, consectetur vitae imperdiet vel, ornare a metus.
|
26
|
+
In imperdiet euismod mi, nec volutpat lorem porta id.
|
27
|
+
|
28
|
+
|
29
|
+
A multipart document looks like this:
|
30
|
+
|
31
|
+
Boundary: =====
|
32
|
+
|
33
|
+
--=====
|
34
|
+
Author: Fan
|
35
|
+
Datetime: 2009-11-01 20:07:15
|
36
|
+
|
37
|
+
Your article is really great!
|
38
|
+
--=====
|
39
|
+
Author: Critic
|
40
|
+
Datetime: 2009-11-01 20:10:54
|
41
|
+
|
42
|
+
Your article sucks!
|
43
|
+
|
44
|
+
See the documentation of <tt>DocStorage::SimpleDocument</tt> and
|
45
|
+
<tt>DocStorage::MultiPartDocument</tt> classes for more formal format
|
46
|
+
description.
|
47
|
+
|
48
|
+
== Installation
|
49
|
+
|
50
|
+
sudo gem install doc_storage --source http://gemcutter.org
|
51
|
+
|
52
|
+
== Example Usage
|
53
|
+
|
54
|
+
=== Simple Documents
|
55
|
+
|
56
|
+
require "lib/doc_storage"
|
57
|
+
|
58
|
+
# Create a new document with headers and body
|
59
|
+
document = DocStorage::SimpleDocument.new(
|
60
|
+
{
|
61
|
+
"Title" => "Finishing the documentation",
|
62
|
+
"Priority" => "urgent"
|
63
|
+
},
|
64
|
+
"We should finish the documentation ASAP."
|
65
|
+
)
|
66
|
+
|
67
|
+
# Parse a file
|
68
|
+
document = File.open("examples/simple.txt", "r") do |f|
|
69
|
+
DocStorage::SimpleDocument.parse(f)
|
70
|
+
end
|
71
|
+
|
72
|
+
# Document manipulation
|
73
|
+
document.headers["Tags"] = "example"
|
74
|
+
document.body += "Nulla mi dui, pellentesque et accumsan vitae, mattis et velit."
|
75
|
+
|
76
|
+
# Save the modified document
|
77
|
+
File.open("examples/simple_modified.txt", "w") do |f|
|
78
|
+
f.write(document)
|
79
|
+
end
|
80
|
+
|
81
|
+
=== Multipart Documents
|
82
|
+
require "lib/doc_storage"
|
83
|
+
|
84
|
+
# Create a new document with two parts
|
85
|
+
document = DocStorage::MultiPartDocument.new([
|
86
|
+
DocStorage::SimpleDocument.new(
|
87
|
+
{
|
88
|
+
"Title" => "Finishing the documentation",
|
89
|
+
"Priority" => "urgent"
|
90
|
+
},
|
91
|
+
"We should finish the documentation ASAP."
|
92
|
+
),
|
93
|
+
DocStorage::SimpleDocument.new(
|
94
|
+
{
|
95
|
+
"Title" => "Finishing the code",
|
96
|
+
"Priority" => "more urgent"
|
97
|
+
},
|
98
|
+
"But we should finish the code first!"
|
99
|
+
),
|
100
|
+
])
|
101
|
+
|
102
|
+
# Parse a file
|
103
|
+
document = File.open("examples/multipart.txt", "r") do |f|
|
104
|
+
DocStorage::MultiPartDocument.parse(f)
|
105
|
+
end
|
106
|
+
|
107
|
+
# Document manipulation
|
108
|
+
document.parts << DocStorage::SimpleDocument.new(
|
109
|
+
{
|
110
|
+
"Author" => "Middle man",
|
111
|
+
"Datetime" => "2009-11-01 21:15:33",
|
112
|
+
},
|
113
|
+
"I think your article is neither good nor bad."
|
114
|
+
)
|
115
|
+
|
116
|
+
# Save the modified document
|
117
|
+
File.open("examples/multipart_modified.txt", "w") do |f|
|
118
|
+
f.write(document)
|
119
|
+
end
|
120
|
+
|
121
|
+
== Author
|
122
|
+
|
123
|
+
DocStorage was brought to you by David Majda (david@majda.cz[mailto:david@majda.cz], www.majda.cz).
|
data/Rakefile
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require "rake/gempackagetask"
|
2
|
+
require "rake/rdoctask"
|
3
|
+
require "spec/rake/spectask"
|
4
|
+
|
5
|
+
Spec::Rake::SpecTask.new do |t|
|
6
|
+
t.spec_opts = ["--color", "--format", "nested"]
|
7
|
+
end
|
8
|
+
|
9
|
+
Rake::RDocTask.new do |t|
|
10
|
+
t.main = "README.rdoc"
|
11
|
+
t.rdoc_dir = "doc"
|
12
|
+
t.rdoc_files.add("README.rdoc", "lib/**/*.rb")
|
13
|
+
end
|
14
|
+
|
15
|
+
specification = Gem::Specification.new do |s|
|
16
|
+
s.name = "doc_storage"
|
17
|
+
s.version = "0.9"
|
18
|
+
s.summary = "Simple Ruby library for manipulating documents containing a " +
|
19
|
+
"text and metadata."
|
20
|
+
s.description = "DocStorage is a simple Ruby library for manipulating " +
|
21
|
+
"documents containing a text and metadata. These documents " +
|
22
|
+
"can be used to implement a blog, wiki, or similar " +
|
23
|
+
"application without a relational database."
|
24
|
+
s.required_ruby_version = ">= 1.8.6"
|
25
|
+
|
26
|
+
s.author = "David Majda"
|
27
|
+
s.email = "david@majda.cz"
|
28
|
+
s.homepage = "http://github.com/dmajda/doc_storage"
|
29
|
+
|
30
|
+
s.files = FileList[
|
31
|
+
"Rakefile",
|
32
|
+
"README.rdoc",
|
33
|
+
"LICENSE",
|
34
|
+
"VERSION",
|
35
|
+
Dir["lib/**/*.rb"],
|
36
|
+
Dir["spec/**/*.rb"],
|
37
|
+
Dir["examples/**/*"]
|
38
|
+
]
|
39
|
+
|
40
|
+
s.has_rdoc = true
|
41
|
+
s.extra_rdoc_files = ["README.rdoc"]
|
42
|
+
s.rdoc_options = ["--main", "README.rdoc"]
|
43
|
+
end
|
44
|
+
|
45
|
+
Rake::GemPackageTask.new(specification) do |t|
|
46
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.9
|
@@ -0,0 +1,40 @@
|
|
1
|
+
dir = File.dirname(__FILE__)
|
2
|
+
|
3
|
+
require "#{dir}/../lib/doc_storage"
|
4
|
+
|
5
|
+
# Create a new document with two parts
|
6
|
+
document = DocStorage::MultiPartDocument.new([
|
7
|
+
DocStorage::SimpleDocument.new(
|
8
|
+
{
|
9
|
+
"Title" => "Finishing the documentation",
|
10
|
+
"Priority" => "urgent"
|
11
|
+
},
|
12
|
+
"We should finish the documentation ASAP."
|
13
|
+
),
|
14
|
+
DocStorage::SimpleDocument.new(
|
15
|
+
{
|
16
|
+
"Title" => "Finishing the code",
|
17
|
+
"Priority" => "more urgent"
|
18
|
+
},
|
19
|
+
"But we should finish the code first!"
|
20
|
+
),
|
21
|
+
])
|
22
|
+
|
23
|
+
# Parse a file
|
24
|
+
document = File.open("#{dir}/multipart.txt", "r") do |f|
|
25
|
+
DocStorage::MultiPartDocument.parse(f)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Document manipulation
|
29
|
+
document.parts << DocStorage::SimpleDocument.new(
|
30
|
+
{
|
31
|
+
"Author" => "Middle man",
|
32
|
+
"Datetime" => "2009-11-01 21:15:33",
|
33
|
+
},
|
34
|
+
"I think your article is neither good nor bad."
|
35
|
+
)
|
36
|
+
|
37
|
+
# Save the modified document
|
38
|
+
File.open("#{dir}/multipart_modified.txt", "w") do |f|
|
39
|
+
f.write(document)
|
40
|
+
end
|
data/examples/simple.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
dir = File.dirname(__FILE__)
|
2
|
+
|
3
|
+
require "#{dir}/../lib/doc_storage"
|
4
|
+
|
5
|
+
# Create a new document with headers and body
|
6
|
+
document = DocStorage::SimpleDocument.new(
|
7
|
+
{
|
8
|
+
"Title" => "Finishing the documentation",
|
9
|
+
"Priority" => "urgent"
|
10
|
+
},
|
11
|
+
"We should finish the documentation ASAP."
|
12
|
+
)
|
13
|
+
|
14
|
+
# Parse a file
|
15
|
+
document = File.open("#{dir}/simple.txt", "r") do |f|
|
16
|
+
DocStorage::SimpleDocument.parse(f)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Document manipulation
|
20
|
+
document.headers["Tags"] = "example"
|
21
|
+
document.body += "Nulla mi dui, pellentesque et accumsan vitae, mattis et velit."
|
22
|
+
|
23
|
+
# Save the modified document
|
24
|
+
File.open("#{dir}/simple_modified.txt", "w") do |f|
|
25
|
+
f.write(document)
|
26
|
+
end
|
data/examples/simple.txt
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
Title: My blog article
|
2
|
+
Datetime: 2009-11-01 18:03:27
|
3
|
+
|
4
|
+
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vel lorem
|
5
|
+
massa. Sed blandit orci id leo blandit ut fermentum lacus ullamcorper.
|
6
|
+
Suspendisse metus sapien, consectetur vitae imperdiet vel, ornare a metus.
|
7
|
+
In imperdiet euismod mi, nec volutpat lorem porta id.
|
@@ -0,0 +1,141 @@
|
|
1
|
+
module DocStorage
|
2
|
+
# The +MultiPartDocument+ class represents a document consisting of several
|
3
|
+
# simple documents (see the +SimpleDocument+ class documentation for a
|
4
|
+
# description), loosely based on the MIME multipart message format. It is
|
5
|
+
# suitable for storing multiple documents containing a text associated with
|
6
|
+
# some metadata (e.g. blog comments, each with an author and a publication
|
7
|
+
# date). The +MultiPartDocument+ class allows to create the document
|
8
|
+
# programatically, parse it from a file, manipulate its structure and save it
|
9
|
+
# to a file.
|
10
|
+
#
|
11
|
+
# == Document Format
|
12
|
+
#
|
13
|
+
# In serialized form, a multipart document looks like this:
|
14
|
+
#
|
15
|
+
# Boundary: =====
|
16
|
+
#
|
17
|
+
# --=====
|
18
|
+
# Author: Fan
|
19
|
+
# Datetime: 2009-11-01 20:07:15
|
20
|
+
#
|
21
|
+
# Your article is really great!
|
22
|
+
# --=====
|
23
|
+
# Author: Critic
|
24
|
+
# Datetime: 2009-11-01 20:10:54
|
25
|
+
#
|
26
|
+
# Your article sucks!
|
27
|
+
#
|
28
|
+
# The document is composed of one or more simple documents, separated by a
|
29
|
+
# _boundary_ -- a line beginning with "--" and containing a predefined
|
30
|
+
# <em>boundary string</em>. The first document is a _prologue_ and it defines
|
31
|
+
# the boundary string (without the "--" prefix) in its "Boundary" header. All
|
32
|
+
# other headers of the prologue are ignored and so is its body. Remaining
|
33
|
+
# documents are the _parts_ of the multipart document. Documents without any
|
34
|
+
# parts are perfectly legal, however the prologue with the boundary definition
|
35
|
+
# must be always present.
|
36
|
+
#
|
37
|
+
# == Example Usage
|
38
|
+
#
|
39
|
+
# require "lib/doc_storage"
|
40
|
+
#
|
41
|
+
# # Create a new document with two parts
|
42
|
+
# document = DocStorage::MultiPartDocument.new([
|
43
|
+
# DocStorage::SimpleDocument.new(
|
44
|
+
# {
|
45
|
+
# "Title" => "Finishing the documentation",
|
46
|
+
# "Priority" => "urgent"
|
47
|
+
# },
|
48
|
+
# "We should finish the documentation ASAP."
|
49
|
+
# ),
|
50
|
+
# DocStorage::SimpleDocument.new(
|
51
|
+
# {
|
52
|
+
# "Title" => "Finishing the code",
|
53
|
+
# "Priority" => "more urgent"
|
54
|
+
# },
|
55
|
+
# "But we should finish the code first!"
|
56
|
+
# ),
|
57
|
+
# ])
|
58
|
+
#
|
59
|
+
# # Parse a file
|
60
|
+
# document = File.open("examples/multipart.txt", "r") do |f|
|
61
|
+
# DocStorage::MultiPartDocument.parse(f)
|
62
|
+
# end
|
63
|
+
#
|
64
|
+
# # Document manipulation
|
65
|
+
# document.parts << DocStorage::SimpleDocument.new(
|
66
|
+
# {
|
67
|
+
# "Author" => "Middle man",
|
68
|
+
# "Datetime" => "2009-11-01 21:15:33",
|
69
|
+
# },
|
70
|
+
# "I think your article is neither good nor bad."
|
71
|
+
# )
|
72
|
+
#
|
73
|
+
# # Save the modified document
|
74
|
+
# File.open("examples/multipart_modified.txt", "w") do |f|
|
75
|
+
# f.write(document)
|
76
|
+
# end
|
77
|
+
class MultiPartDocument
|
78
|
+
# document parts (+Array+ of <tt>DocStorage::SimpleDocument</tt>)
|
79
|
+
attr_accessor :parts
|
80
|
+
|
81
|
+
class << self
|
82
|
+
private
|
83
|
+
def parse_from_io(io)
|
84
|
+
prologue = SimpleDocument.parse(io, :detect)
|
85
|
+
boundary = prologue.headers["Boundary"]
|
86
|
+
|
87
|
+
parts = []
|
88
|
+
until io.eof?
|
89
|
+
parts << SimpleDocument.parse(io, boundary)
|
90
|
+
end
|
91
|
+
|
92
|
+
MultiPartDocument.new(parts)
|
93
|
+
end
|
94
|
+
|
95
|
+
public
|
96
|
+
# Parses a multipart document from its serialized form and returns a new
|
97
|
+
# +MultiPartDocument+ instance.
|
98
|
+
#
|
99
|
+
# The +source+ can be either an +IO+-like object or a +String+. In the
|
100
|
+
# latter case, it is assumed that the string contains a serialized
|
101
|
+
# document (not a file name).
|
102
|
+
#
|
103
|
+
# If any syntax error occurs, a +SyntaxError+ exception is raised. This
|
104
|
+
# can happen when parsing the prologue or parts and an invalid header is
|
105
|
+
# encountered, the headers are not terminated (no empty line separating
|
106
|
+
# headers and body is parsed before the end of file) or if no "Boundary"
|
107
|
+
# header is found in the prologue.
|
108
|
+
#
|
109
|
+
# See the +MultiPartDocument+ class documentation for a detailed
|
110
|
+
# document format description.
|
111
|
+
def parse(source)
|
112
|
+
parse_from_io(source.is_a?(String) ? StringIO.new(source) : source)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Creates a new +MultiPartDocument+ with given parts.
|
117
|
+
def initialize(parts)
|
118
|
+
@parts = parts
|
119
|
+
end
|
120
|
+
|
121
|
+
# Tests if two documents are equal, i.e. whether they have the same class
|
122
|
+
# and equal parts (in the <tt>==</tt> sense).
|
123
|
+
def ==(other)
|
124
|
+
other.instance_of?(self.class) && @parts == other.parts
|
125
|
+
end
|
126
|
+
|
127
|
+
# Returns string representation of this document. The result is in format
|
128
|
+
# described in the +MultiPartDocument+ class documentation.
|
129
|
+
def to_s
|
130
|
+
# The boundary is just a random string. We do not check if the boudnary
|
131
|
+
# appears anywhere in the subdocuments, which may lead to malformed
|
132
|
+
# document. This is of course principially wrong, but the probability of
|
133
|
+
# collision is so small that it does not bother me much.
|
134
|
+
chars = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a
|
135
|
+
boundary = Array.new(64) { chars[rand(chars.length)] }.join("")
|
136
|
+
|
137
|
+
SimpleDocument.new({"Boundary" => boundary}, "").to_s +
|
138
|
+
@parts.map { |part| "--#{boundary}\n#{part.to_s}" }.join("\n")
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
module DocStorage
|
2
|
+
# The +SimpleDocument+ class represents a simple RFC 822-like document,
|
3
|
+
# suitable for storing a text associated with some metadata (e.g. a blog
|
4
|
+
# article with a title and a publication date). The +SimpleDocument+ class
|
5
|
+
# allows to create the document programatically, parse it from a file,
|
6
|
+
# manipulate its structure and save it to a file.
|
7
|
+
#
|
8
|
+
# Each document consist of _headers_ and a _body_. Headers are a dictionary,
|
9
|
+
# mapping string names to string values. Body is a free-form text. The header
|
10
|
+
# names can contain only alphanumeric characters and a hyphen ("-") and they
|
11
|
+
# are case sensitive. The header values can contain any text that does not
|
12
|
+
# begin with whitespace and does not contain a CR or LF character.
|
13
|
+
#
|
14
|
+
# == Document Format
|
15
|
+
#
|
16
|
+
# In serialized form, a simple document looks like this:
|
17
|
+
#
|
18
|
+
# Title: My blog article
|
19
|
+
# Datetime: 2009-11-01 18:03:27
|
20
|
+
#
|
21
|
+
# Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vel lorem
|
22
|
+
# massa. Sed blandit orci id leo blandit ut fermentum lacus ullamcorper.
|
23
|
+
# Suspendisse metus sapien, consectetur vitae imperdiet vel, ornare a metus.
|
24
|
+
# In imperdiet euismod mi, nec volutpat lorem porta id.
|
25
|
+
#
|
26
|
+
# The headers are first, each on its own line. The header names are separated
|
27
|
+
# from values by a colon (":") and any amount of whitespace. Duplicate headers
|
28
|
+
# are allowed with later value overwriting the earlier one. Otherwise, the
|
29
|
+
# order of the headers does not matter. The body is separated from the headers
|
30
|
+
# by an empty line.
|
31
|
+
#
|
32
|
+
# Documents without any headers are perfectly legal and so are documents with
|
33
|
+
# an empty body. However, the separating line must be always present. This
|
34
|
+
# means that an empty file is not a valid document, but a file containing a
|
35
|
+
# single newline is.
|
36
|
+
#
|
37
|
+
# == Example Usage
|
38
|
+
#
|
39
|
+
# require "lib/doc_storage"
|
40
|
+
#
|
41
|
+
# # Create a new document with headers and body
|
42
|
+
# document = DocStorage::SimpleDocument.new(
|
43
|
+
# {
|
44
|
+
# "Title" => "Finishing the documentation",
|
45
|
+
# "Priority" => "urgent"
|
46
|
+
# },
|
47
|
+
# "We should finish the documentation ASAP."
|
48
|
+
# )
|
49
|
+
#
|
50
|
+
# # Parse a file
|
51
|
+
# document = File.open("examples/simple.txt", "r") do |f|
|
52
|
+
# DocStorage::SimpleDocument.parse(f)
|
53
|
+
# end
|
54
|
+
#
|
55
|
+
# # Document manipulation
|
56
|
+
# document.headers["Tags"] = "example"
|
57
|
+
# document.body += "Nulla mi dui, pellentesque et accumsan vitae, mattis et velit."
|
58
|
+
#
|
59
|
+
# # Save the modified document
|
60
|
+
# File.open("examples/simple_modified.txt", "w") do |f|
|
61
|
+
# f.write(document)
|
62
|
+
# end
|
63
|
+
class SimpleDocument
|
64
|
+
# document headers (+Hash+)
|
65
|
+
attr_accessor :headers
|
66
|
+
# document body (+String+)
|
67
|
+
attr_accessor :body
|
68
|
+
|
69
|
+
class << self
|
70
|
+
private
|
71
|
+
def parse_headers(io, detect_boundary)
|
72
|
+
result = {}
|
73
|
+
headers_terminated = false
|
74
|
+
|
75
|
+
until io.eof?
|
76
|
+
line = io.readline
|
77
|
+
case line
|
78
|
+
when /^([a-zA-Z0-9-]+):\s(.*)\n$/
|
79
|
+
result[$1] = $2
|
80
|
+
when "\n"
|
81
|
+
headers_terminated = true
|
82
|
+
break
|
83
|
+
else
|
84
|
+
raise SyntaxError, "Invalid header: \"#{line.strip}\"."
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
raise SyntaxError, "Unterminated headers." unless headers_terminated
|
89
|
+
if detect_boundary && !result.has_key?("Boundary")
|
90
|
+
raise SyntaxError, "No boundary defined."
|
91
|
+
end
|
92
|
+
|
93
|
+
result
|
94
|
+
end
|
95
|
+
|
96
|
+
def parse_body(io, boundary)
|
97
|
+
if boundary
|
98
|
+
result = ""
|
99
|
+
until io.eof?
|
100
|
+
line = io.readline
|
101
|
+
if line == "--#{boundary}\n"
|
102
|
+
# Trim last newline from the body as it belongs to the boudnary
|
103
|
+
# logically. This behavior is implemented to allow bodies with
|
104
|
+
# no trailing newline).
|
105
|
+
return result[0..-2]
|
106
|
+
end
|
107
|
+
|
108
|
+
result += line
|
109
|
+
end
|
110
|
+
result
|
111
|
+
else
|
112
|
+
io.read
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def parse_from_io(io, boundary)
|
117
|
+
headers = parse_headers(io, boundary == :detect)
|
118
|
+
boundary = headers["Boundary"] if boundary == :detect
|
119
|
+
body = parse_body(io, boundary)
|
120
|
+
|
121
|
+
SimpleDocument.new(headers, body)
|
122
|
+
end
|
123
|
+
|
124
|
+
public
|
125
|
+
# Parses a simple document from its serialized form and returns a new
|
126
|
+
# +SimpleDocument+ instance.
|
127
|
+
#
|
128
|
+
# The +source+ can be either an +IO+-like object or a +String+. In the
|
129
|
+
# latter case, it is assumed that the string contains a serialized
|
130
|
+
# document (not a file name).
|
131
|
+
#
|
132
|
+
# The +boundary+ determines how the end of the document body is detected:
|
133
|
+
#
|
134
|
+
# * If +boundary+ is +nil+, the document is read until the end of file.
|
135
|
+
#
|
136
|
+
# * If +boundary+ is <tt>:detect</tt>, the document is read until the
|
137
|
+
# end of file or until a line containing only a <em>boundary
|
138
|
+
# string</em> is read. The boundary string is the value of the
|
139
|
+
# "Boundary" header prefixed with "--".
|
140
|
+
#
|
141
|
+
# * Otherwise, it is assumed that +boundary+ contains a boundary string
|
142
|
+
# without the "--" prefix (the "Boundary" header value is ignored for
|
143
|
+
# the purpose of boundary detection). The document is read until the
|
144
|
+
# end of file or until a line containing only the boundary string is
|
145
|
+
# read.
|
146
|
+
#
|
147
|
+
# The +boundary+ parameter is provided mainly for parsing parts of
|
148
|
+
# multipart documents (see the +MultiPartDocument+ class documentation)
|
149
|
+
# and usually should not be used.
|
150
|
+
#
|
151
|
+
# If any syntax error occurs, a +SyntaxError+ exception is raised. This
|
152
|
+
# can happen when an invalid header is encountered, the headers are not
|
153
|
+
# terminated (no empty line separating headers and body is parsed before
|
154
|
+
# the end of file) or if no "Boundary" header is found when detecting a
|
155
|
+
# boundary.
|
156
|
+
#
|
157
|
+
# See the +SimpleDocument+ class documentation for a detailed document
|
158
|
+
# format description.
|
159
|
+
def parse(source, boundary = nil)
|
160
|
+
parse_from_io(
|
161
|
+
source.is_a?(String) ? StringIO.new(source) : source,
|
162
|
+
boundary
|
163
|
+
)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
# Creates a new +SimpleDocument+ with given headers and body.
|
168
|
+
def initialize(headers, body)
|
169
|
+
@headers, @body = headers, body
|
170
|
+
end
|
171
|
+
|
172
|
+
# Tests if two documents are equal, i.e. whether they have the same class
|
173
|
+
# and equal headers and body (in the <tt>==</tt> sense).
|
174
|
+
def ==(other)
|
175
|
+
other.instance_of?(self.class) &&
|
176
|
+
@headers == other.headers &&
|
177
|
+
@body == other.body
|
178
|
+
end
|
179
|
+
|
180
|
+
# Returns string representation of this document. The result is in format
|
181
|
+
# described in the +SimpleDocument+ class documentation.
|
182
|
+
def to_s
|
183
|
+
serialized_headers = @headers.keys.sort.inject("") do |acc, key|
|
184
|
+
acc + "#{key}: #{@headers[key]}\n"
|
185
|
+
end
|
186
|
+
serialized_headers + "\n" + @body
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
data/lib/doc_storage.rb
ADDED
@@ -0,0 +1,139 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../lib/doc_storage"
|
2
|
+
|
3
|
+
module DocStorage
|
4
|
+
describe MultiPartDocument do
|
5
|
+
Spec::Matchers.define :parse_as_multi_part_document do |document|
|
6
|
+
match do |string|
|
7
|
+
MultiPartDocument::parse(string) == document
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
before :each do
|
12
|
+
@document = MultiPartDocument.new([:part1, :part2])
|
13
|
+
|
14
|
+
@document_with_no_parts = MultiPartDocument.new([])
|
15
|
+
@document_with_multiple_parts = MultiPartDocument.new([
|
16
|
+
SimpleDocument.new({"a" => "42", "b" => "43"}, "line1\nline2"),
|
17
|
+
SimpleDocument.new({"c" => "44", "d" => "45"}, "line3\nline4"),
|
18
|
+
])
|
19
|
+
end
|
20
|
+
|
21
|
+
describe "initialize" do
|
22
|
+
it "sets attributes correctly" do
|
23
|
+
@document.parts.should == [:part1, :part2]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe "==" do
|
28
|
+
it "returns true when passed the same object" do
|
29
|
+
@document.should == @document
|
30
|
+
end
|
31
|
+
|
32
|
+
it "returns true when passed a MultiPartDocument initialized with the same parameter" do
|
33
|
+
@document.should == MultiPartDocument.new([:part1, :part2])
|
34
|
+
end
|
35
|
+
|
36
|
+
it "returns false when passed some random object" do
|
37
|
+
@document.should_not == Object.new
|
38
|
+
end
|
39
|
+
|
40
|
+
it "returns false when passed a subclass of MultiPartDocument initialized with the same parameter" do
|
41
|
+
class SubclassedMultiPartDocument < MultiPartDocument
|
42
|
+
end
|
43
|
+
|
44
|
+
@document.should_not ==
|
45
|
+
SubclassedMultiPartDocument.new([:part1, :part2])
|
46
|
+
end
|
47
|
+
|
48
|
+
it "returns false when passed a MultiPartDocument initialized with different parameter" do
|
49
|
+
@document.should_not == MultiPartDocument.new([:part3, :part4])
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "parse" do
|
54
|
+
it "parses document with no parts" do
|
55
|
+
"Boundary: =====\n\n".should parse_as_multi_part_document(
|
56
|
+
@document_with_no_parts
|
57
|
+
)
|
58
|
+
end
|
59
|
+
|
60
|
+
it "parses document with multiple parts" do
|
61
|
+
[
|
62
|
+
"Boundary: =====",
|
63
|
+
"",
|
64
|
+
"--=====",
|
65
|
+
"a: 42",
|
66
|
+
"b: 43",
|
67
|
+
"",
|
68
|
+
"line1",
|
69
|
+
"line2",
|
70
|
+
"--=====",
|
71
|
+
"c: 44",
|
72
|
+
"d: 45",
|
73
|
+
"",
|
74
|
+
"line3",
|
75
|
+
"line4",
|
76
|
+
].join("\n").should parse_as_multi_part_document(
|
77
|
+
@document_with_multiple_parts
|
78
|
+
)
|
79
|
+
end
|
80
|
+
|
81
|
+
it "does not parse document with no Boundary: header" do
|
82
|
+
lambda {
|
83
|
+
MultiPartDocument.parse("\n\n")
|
84
|
+
}.should raise_error(SyntaxError, "No boundary defined.")
|
85
|
+
end
|
86
|
+
|
87
|
+
it "parses document from IO-like object" do
|
88
|
+
StringIO.open(
|
89
|
+
[
|
90
|
+
"Boundary: =====",
|
91
|
+
"",
|
92
|
+
"--=====",
|
93
|
+
"a: 42",
|
94
|
+
"b: 43",
|
95
|
+
"",
|
96
|
+
"line1",
|
97
|
+
"line2",
|
98
|
+
"--=====",
|
99
|
+
"c: 44",
|
100
|
+
"d: 45",
|
101
|
+
"",
|
102
|
+
"line3",
|
103
|
+
"line4",
|
104
|
+
].join("\n")
|
105
|
+
) do |io|
|
106
|
+
MultiPartDocument.parse(io).should == @document_with_multiple_parts
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
describe "to_s" do
|
112
|
+
it "serializes document with no parts" do
|
113
|
+
srand 0
|
114
|
+
@document_with_no_parts.to_s.should ==
|
115
|
+
"Boundary: SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI\n\n"
|
116
|
+
end
|
117
|
+
|
118
|
+
it "serializes document with multiple parts" do
|
119
|
+
srand 0
|
120
|
+
@document_with_multiple_parts.to_s.should == [
|
121
|
+
"Boundary: SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
|
122
|
+
"",
|
123
|
+
"--SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
|
124
|
+
"a: 42",
|
125
|
+
"b: 43",
|
126
|
+
"",
|
127
|
+
"line1",
|
128
|
+
"line2",
|
129
|
+
"--SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
|
130
|
+
"c: 44",
|
131
|
+
"d: 45",
|
132
|
+
"",
|
133
|
+
"line3",
|
134
|
+
"line4",
|
135
|
+
].join("\n")
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../lib/doc_storage"
|
2
|
+
|
3
|
+
module DocStorage
|
4
|
+
describe SimpleDocument do
|
5
|
+
Spec::Matchers.define :parse_as_document do |document|
|
6
|
+
match do |string|
|
7
|
+
SimpleDocument.parse(string) == document
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
before :each do
|
12
|
+
@document = SimpleDocument.new({"a" => 42, "b" => 43}, "body")
|
13
|
+
|
14
|
+
@document_without_headers_without_body = SimpleDocument.new({}, "")
|
15
|
+
@document_without_headers_with_body = SimpleDocument.new({}, "line1\nline2")
|
16
|
+
@document_with_headers_without_body = SimpleDocument.new(
|
17
|
+
{"a" => "42", "b" => "43"},
|
18
|
+
""
|
19
|
+
)
|
20
|
+
@document_with_headers_with_body = SimpleDocument.new(
|
21
|
+
{"a" => "42", "b" => "43"},
|
22
|
+
"line1\nline2"
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
describe "initialize" do
|
27
|
+
it "sets attributes correctly" do
|
28
|
+
@document.headers.should == {"a" => 42, "b" => 43}
|
29
|
+
@document.body.should == "body"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe "==" do
|
34
|
+
it "returns true when passed the same object" do
|
35
|
+
@document.should == @document
|
36
|
+
end
|
37
|
+
|
38
|
+
it "returns true when passed a SimpleDocument initialized with the same parameters" do
|
39
|
+
@document.should == SimpleDocument.new({"a" => 42, "b" => 43}, "body")
|
40
|
+
end
|
41
|
+
|
42
|
+
it "returns false when passed some random object" do
|
43
|
+
@document.should_not == Object.new
|
44
|
+
end
|
45
|
+
|
46
|
+
it "returns false when passed a subclass of SimpleDocument initialized with the same parameters" do
|
47
|
+
class SubclassedSimpleDocument < SimpleDocument
|
48
|
+
end
|
49
|
+
|
50
|
+
@document.should_not ==
|
51
|
+
SubclassedSimpleDocument.new({"a" => 42, "b" => 43}, "body")
|
52
|
+
end
|
53
|
+
|
54
|
+
it "returns false when passed a SimpleDocument initialized with different parameters" do
|
55
|
+
@document.should_not == SimpleDocument.new({"a" => 44, "b" => 45}, "body")
|
56
|
+
@document.should_not == SimpleDocument.new({"a" => 42, "b" => 43}, "nobody")
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe "parse" do
|
61
|
+
it "parses document with no headers and no body" do
|
62
|
+
"\n".should parse_as_document(@document_without_headers_without_body)
|
63
|
+
end
|
64
|
+
|
65
|
+
it "parses document with no headers and body" do
|
66
|
+
"\nline1\nline2".should parse_as_document(
|
67
|
+
@document_without_headers_with_body
|
68
|
+
)
|
69
|
+
end
|
70
|
+
|
71
|
+
it "parses document with headers and no body" do
|
72
|
+
"a: 42\nb: 43\n\n".should parse_as_document(
|
73
|
+
@document_with_headers_without_body
|
74
|
+
)
|
75
|
+
end
|
76
|
+
|
77
|
+
it "parses document with headers and body" do
|
78
|
+
"a: 42\nb: 43\n\nline1\nline2".should parse_as_document(
|
79
|
+
@document_with_headers_with_body
|
80
|
+
)
|
81
|
+
end
|
82
|
+
|
83
|
+
it "does not parse document with invalid headers" do
|
84
|
+
lambda {
|
85
|
+
SimpleDocument.parse("bullshit")
|
86
|
+
}.should raise_error(SyntaxError, "Invalid header: \"bullshit\".")
|
87
|
+
end
|
88
|
+
|
89
|
+
it "does not parse document with unterminated headers" do
|
90
|
+
lambda {
|
91
|
+
SimpleDocument.parse("a: 42\nb: 42\n")
|
92
|
+
}.should raise_error(SyntaxError, "Unterminated headers.")
|
93
|
+
end
|
94
|
+
|
95
|
+
it "parses document from IO-like object" do
|
96
|
+
StringIO.open("a: 42\nb: 43\n\nline1\nline2") do |io|
|
97
|
+
SimpleDocument.parse(io).should == @document_with_headers_with_body
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
it "parses document when detecting a boundary" do
|
102
|
+
SimpleDocument.parse(
|
103
|
+
"a: 42\nb: 43\nBoundary: =====\n\nline1\nline2\n--=====\nbullshit",
|
104
|
+
:detect
|
105
|
+
).should == SimpleDocument.new(
|
106
|
+
{"a" => "42", "b" => "43", "Boundary" => "====="},
|
107
|
+
"line1\nline2"
|
108
|
+
)
|
109
|
+
end
|
110
|
+
|
111
|
+
it "does not parse document when detecting a boundary and no boundary defined" do
|
112
|
+
lambda {
|
113
|
+
SimpleDocument.parse(
|
114
|
+
"a: 42\nb: 43\n\nline1\nline2\n--=====\nbullshit",
|
115
|
+
:detect
|
116
|
+
)
|
117
|
+
}.should raise_error(SyntaxError, "No boundary defined.")
|
118
|
+
end
|
119
|
+
|
120
|
+
it "parses document when passed a boundary" do
|
121
|
+
SimpleDocument.parse(
|
122
|
+
"a: 42\nb: 43\n\nline1\nline2\n--=====\nbullshit",
|
123
|
+
"====="
|
124
|
+
).should == @document_with_headers_with_body
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
describe "to_s" do
|
129
|
+
it "serializes document with no headers and no body" do
|
130
|
+
@document_without_headers_without_body.to_s.should == "\n"
|
131
|
+
end
|
132
|
+
|
133
|
+
it "serializes document with no headers and body" do
|
134
|
+
@document_without_headers_with_body.to_s.should == "\nline1\nline2"
|
135
|
+
end
|
136
|
+
|
137
|
+
it "serializes document with headers and no body" do
|
138
|
+
@document_with_headers_without_body.to_s.should == "a: 42\nb: 43\n\n"
|
139
|
+
end
|
140
|
+
|
141
|
+
it "serializes document with headers and body" do
|
142
|
+
@document_with_headers_with_body.to_s.should ==
|
143
|
+
"a: 42\nb: 43\n\nline1\nline2"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: doc_storage
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.9"
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Majda
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-11-19 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: DocStorage is a simple Ruby library for manipulating documents containing a text and metadata. These documents can be used to implement a blog, wiki, or similar application without a relational database.
|
17
|
+
email: david@majda.cz
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.rdoc
|
24
|
+
files:
|
25
|
+
- Rakefile
|
26
|
+
- README.rdoc
|
27
|
+
- LICENSE
|
28
|
+
- VERSION
|
29
|
+
- lib/doc_storage.rb
|
30
|
+
- lib/doc_storage/multi_part_document.rb
|
31
|
+
- lib/doc_storage/simple_document.rb
|
32
|
+
- lib/doc_storage/syntax_error.rb
|
33
|
+
- spec/simple_document_spec.rb
|
34
|
+
- spec/multi_part_document_spec.rb
|
35
|
+
- examples/simple.txt
|
36
|
+
- examples/multipart.rb
|
37
|
+
- examples/simple.rb
|
38
|
+
- examples/multipart.txt
|
39
|
+
has_rdoc: true
|
40
|
+
homepage: http://github.com/dmajda/doc_storage
|
41
|
+
licenses: []
|
42
|
+
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options:
|
45
|
+
- --main
|
46
|
+
- README.rdoc
|
47
|
+
require_paths:
|
48
|
+
- lib
|
49
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 1.8.6
|
54
|
+
version:
|
55
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: "0"
|
60
|
+
version:
|
61
|
+
requirements: []
|
62
|
+
|
63
|
+
rubyforge_project:
|
64
|
+
rubygems_version: 1.3.5
|
65
|
+
signing_key:
|
66
|
+
specification_version: 3
|
67
|
+
summary: Simple Ruby library for manipulating documents containing a text and metadata.
|
68
|
+
test_files: []
|
69
|
+
|