doc_storage 0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +22 -0
- data/README.rdoc +123 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/examples/multipart.rb +40 -0
- data/examples/multipart.txt +12 -0
- data/examples/simple.rb +26 -0
- data/examples/simple.txt +7 -0
- data/lib/doc_storage/multi_part_document.rb +141 -0
- data/lib/doc_storage/simple_document.rb +189 -0
- data/lib/doc_storage/syntax_error.rb +5 -0
- data/lib/doc_storage.rb +3 -0
- data/spec/multi_part_document_spec.rb +139 -0
- data/spec/simple_document_spec.rb +147 -0
- metadata +69 -0
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2009 David Majda
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
= DocStorage
|
2
|
+
|
3
|
+
http://github.com/dmajda/doc_storage
|
4
|
+
|
5
|
+
DocStorage is a simple Ruby library for manipulating documents containing a text
|
6
|
+
and metadata. These documents can be used to implement a blog, wiki, or similar
|
7
|
+
application without a relational database.
|
8
|
+
|
9
|
+
The library distinguishes between <em>simple documents</em> and <em>multipart
|
10
|
+
documents</em>. A simple document looks like a RFC 822 message and it is
|
11
|
+
suitable for storing a text associated with some metadata (e.g. a blog article
|
12
|
+
with a title and a publication date). A multipart document is loosely based on
|
13
|
+
the MIME multipart message format and allows storing multiple simple documents
|
14
|
+
(e.g. blog comments, each with an author and a publication date) in one file.
|
15
|
+
|
16
|
+
== Document Format
|
17
|
+
|
18
|
+
A simple document looks like this:
|
19
|
+
|
20
|
+
Title: My blog article
|
21
|
+
Datetime: 2009-11-01 18:03:27
|
22
|
+
|
23
|
+
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vel lorem
|
24
|
+
massa. Sed blandit orci id leo blandit ut fermentum lacus ullamcorper.
|
25
|
+
Suspendisse metus sapien, consectetur vitae imperdiet vel, ornare a metus.
|
26
|
+
In imperdiet euismod mi, nec volutpat lorem porta id.
|
27
|
+
|
28
|
+
|
29
|
+
A multipart document looks like this:
|
30
|
+
|
31
|
+
Boundary: =====
|
32
|
+
|
33
|
+
--=====
|
34
|
+
Author: Fan
|
35
|
+
Datetime: 2009-11-01 20:07:15
|
36
|
+
|
37
|
+
Your article is really great!
|
38
|
+
--=====
|
39
|
+
Author: Critic
|
40
|
+
Datetime: 2009-11-01 20:10:54
|
41
|
+
|
42
|
+
Your article sucks!
|
43
|
+
|
44
|
+
See the documentation of <tt>DocStorage::SimpleDocument</tt> and
|
45
|
+
<tt>DocStorage::MultiPartDocument</tt> classes for more formal format
|
46
|
+
description.
|
47
|
+
|
48
|
+
== Installation
|
49
|
+
|
50
|
+
sudo gem install doc_storage --source http://gemcutter.org
|
51
|
+
|
52
|
+
== Example Usage
|
53
|
+
|
54
|
+
=== Simple Documents
|
55
|
+
|
56
|
+
require "lib/doc_storage"
|
57
|
+
|
58
|
+
# Create a new document with headers and body
|
59
|
+
document = DocStorage::SimpleDocument.new(
|
60
|
+
{
|
61
|
+
"Title" => "Finishing the documentation",
|
62
|
+
"Priority" => "urgent"
|
63
|
+
},
|
64
|
+
"We should finish the documentation ASAP."
|
65
|
+
)
|
66
|
+
|
67
|
+
# Parse a file
|
68
|
+
document = File.open("examples/simple.txt", "r") do |f|
|
69
|
+
DocStorage::SimpleDocument.parse(f)
|
70
|
+
end
|
71
|
+
|
72
|
+
# Document manipulation
|
73
|
+
document.headers["Tags"] = "example"
|
74
|
+
document.body += "Nulla mi dui, pellentesque et accumsan vitae, mattis et velit."
|
75
|
+
|
76
|
+
# Save the modified document
|
77
|
+
File.open("examples/simple_modified.txt", "w") do |f|
|
78
|
+
f.write(document)
|
79
|
+
end
|
80
|
+
|
81
|
+
=== Multipart Documents
|
82
|
+
require "lib/doc_storage"
|
83
|
+
|
84
|
+
# Create a new document with two parts
|
85
|
+
document = DocStorage::MultiPartDocument.new([
|
86
|
+
DocStorage::SimpleDocument.new(
|
87
|
+
{
|
88
|
+
"Title" => "Finishing the documentation",
|
89
|
+
"Priority" => "urgent"
|
90
|
+
},
|
91
|
+
"We should finish the documentation ASAP."
|
92
|
+
),
|
93
|
+
DocStorage::SimpleDocument.new(
|
94
|
+
{
|
95
|
+
"Title" => "Finishing the code",
|
96
|
+
"Priority" => "more urgent"
|
97
|
+
},
|
98
|
+
"But we should finish the code first!"
|
99
|
+
),
|
100
|
+
])
|
101
|
+
|
102
|
+
# Parse a file
|
103
|
+
document = File.open("examples/multipart.txt", "r") do |f|
|
104
|
+
DocStorage::MultiPartDocument.parse(f)
|
105
|
+
end
|
106
|
+
|
107
|
+
# Document manipulation
|
108
|
+
document.parts << DocStorage::SimpleDocument.new(
|
109
|
+
{
|
110
|
+
"Author" => "Middle man",
|
111
|
+
"Datetime" => "2009-11-01 21:15:33",
|
112
|
+
},
|
113
|
+
"I think your article is neither good nor bad."
|
114
|
+
)
|
115
|
+
|
116
|
+
# Save the modified document
|
117
|
+
File.open("examples/multipart_modified.txt", "w") do |f|
|
118
|
+
f.write(document)
|
119
|
+
end
|
120
|
+
|
121
|
+
== Author
|
122
|
+
|
123
|
+
DocStorage was brought to you by David Majda (david@majda.cz[mailto:david@majda.cz], www.majda.cz).
|
data/Rakefile
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require "rake/gempackagetask"
|
2
|
+
require "rake/rdoctask"
|
3
|
+
require "spec/rake/spectask"
|
4
|
+
|
5
|
+
Spec::Rake::SpecTask.new do |t|
|
6
|
+
t.spec_opts = ["--color", "--format", "nested"]
|
7
|
+
end
|
8
|
+
|
9
|
+
Rake::RDocTask.new do |t|
|
10
|
+
t.main = "README.rdoc"
|
11
|
+
t.rdoc_dir = "doc"
|
12
|
+
t.rdoc_files.add("README.rdoc", "lib/**/*.rb")
|
13
|
+
end
|
14
|
+
|
15
|
+
specification = Gem::Specification.new do |s|
|
16
|
+
s.name = "doc_storage"
|
17
|
+
s.version = "0.9"
|
18
|
+
s.summary = "Simple Ruby library for manipulating documents containing a " +
|
19
|
+
"text and metadata."
|
20
|
+
s.description = "DocStorage is a simple Ruby library for manipulating " +
|
21
|
+
"documents containing a text and metadata. These documents " +
|
22
|
+
"can be used to implement a blog, wiki, or similar " +
|
23
|
+
"application without a relational database."
|
24
|
+
s.required_ruby_version = ">= 1.8.6"
|
25
|
+
|
26
|
+
s.author = "David Majda"
|
27
|
+
s.email = "david@majda.cz"
|
28
|
+
s.homepage = "http://github.com/dmajda/doc_storage"
|
29
|
+
|
30
|
+
s.files = FileList[
|
31
|
+
"Rakefile",
|
32
|
+
"README.rdoc",
|
33
|
+
"LICENSE",
|
34
|
+
"VERSION",
|
35
|
+
Dir["lib/**/*.rb"],
|
36
|
+
Dir["spec/**/*.rb"],
|
37
|
+
Dir["examples/**/*"]
|
38
|
+
]
|
39
|
+
|
40
|
+
s.has_rdoc = true
|
41
|
+
s.extra_rdoc_files = ["README.rdoc"]
|
42
|
+
s.rdoc_options = ["--main", "README.rdoc"]
|
43
|
+
end
|
44
|
+
|
45
|
+
Rake::GemPackageTask.new(specification) do |t|
|
46
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.9
|
@@ -0,0 +1,40 @@
|
|
1
|
+
dir = File.dirname(__FILE__)
|
2
|
+
|
3
|
+
require "#{dir}/../lib/doc_storage"
|
4
|
+
|
5
|
+
# Create a new document with two parts
|
6
|
+
document = DocStorage::MultiPartDocument.new([
|
7
|
+
DocStorage::SimpleDocument.new(
|
8
|
+
{
|
9
|
+
"Title" => "Finishing the documentation",
|
10
|
+
"Priority" => "urgent"
|
11
|
+
},
|
12
|
+
"We should finish the documentation ASAP."
|
13
|
+
),
|
14
|
+
DocStorage::SimpleDocument.new(
|
15
|
+
{
|
16
|
+
"Title" => "Finishing the code",
|
17
|
+
"Priority" => "more urgent"
|
18
|
+
},
|
19
|
+
"But we should finish the code first!"
|
20
|
+
),
|
21
|
+
])
|
22
|
+
|
23
|
+
# Parse a file
|
24
|
+
document = File.open("#{dir}/multipart.txt", "r") do |f|
|
25
|
+
DocStorage::MultiPartDocument.parse(f)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Document manipulation
|
29
|
+
document.parts << DocStorage::SimpleDocument.new(
|
30
|
+
{
|
31
|
+
"Author" => "Middle man",
|
32
|
+
"Datetime" => "2009-11-01 21:15:33",
|
33
|
+
},
|
34
|
+
"I think your article is neither good nor bad."
|
35
|
+
)
|
36
|
+
|
37
|
+
# Save the modified document
|
38
|
+
File.open("#{dir}/multipart_modified.txt", "w") do |f|
|
39
|
+
f.write(document)
|
40
|
+
end
|
data/examples/simple.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
dir = File.dirname(__FILE__)
|
2
|
+
|
3
|
+
require "#{dir}/../lib/doc_storage"
|
4
|
+
|
5
|
+
# Create a new document with headers and body
|
6
|
+
document = DocStorage::SimpleDocument.new(
|
7
|
+
{
|
8
|
+
"Title" => "Finishing the documentation",
|
9
|
+
"Priority" => "urgent"
|
10
|
+
},
|
11
|
+
"We should finish the documentation ASAP."
|
12
|
+
)
|
13
|
+
|
14
|
+
# Parse a file
|
15
|
+
document = File.open("#{dir}/simple.txt", "r") do |f|
|
16
|
+
DocStorage::SimpleDocument.parse(f)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Document manipulation
|
20
|
+
document.headers["Tags"] = "example"
|
21
|
+
document.body += "Nulla mi dui, pellentesque et accumsan vitae, mattis et velit."
|
22
|
+
|
23
|
+
# Save the modified document
|
24
|
+
File.open("#{dir}/simple_modified.txt", "w") do |f|
|
25
|
+
f.write(document)
|
26
|
+
end
|
data/examples/simple.txt
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
Title: My blog article
|
2
|
+
Datetime: 2009-11-01 18:03:27
|
3
|
+
|
4
|
+
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vel lorem
|
5
|
+
massa. Sed blandit orci id leo blandit ut fermentum lacus ullamcorper.
|
6
|
+
Suspendisse metus sapien, consectetur vitae imperdiet vel, ornare a metus.
|
7
|
+
In imperdiet euismod mi, nec volutpat lorem porta id.
|
@@ -0,0 +1,141 @@
|
|
1
|
+
module DocStorage
|
2
|
+
# The +MultiPartDocument+ class represents a document consisting of several
|
3
|
+
# simple documents (see the +SimpleDocument+ class documentation for a
|
4
|
+
# description), loosely based on the MIME multipart message format. It is
|
5
|
+
# suitable for storing multiple documents containing a text associated with
|
6
|
+
# some metadata (e.g. blog comments, each with an author and a publication
|
7
|
+
# date). The +MultiPartDocument+ class allows to create the document
|
8
|
+
# programatically, parse it from a file, manipulate its structure and save it
|
9
|
+
# to a file.
|
10
|
+
#
|
11
|
+
# == Document Format
|
12
|
+
#
|
13
|
+
# In serialized form, a multipart document looks like this:
|
14
|
+
#
|
15
|
+
# Boundary: =====
|
16
|
+
#
|
17
|
+
# --=====
|
18
|
+
# Author: Fan
|
19
|
+
# Datetime: 2009-11-01 20:07:15
|
20
|
+
#
|
21
|
+
# Your article is really great!
|
22
|
+
# --=====
|
23
|
+
# Author: Critic
|
24
|
+
# Datetime: 2009-11-01 20:10:54
|
25
|
+
#
|
26
|
+
# Your article sucks!
|
27
|
+
#
|
28
|
+
# The document is composed of one or more simple documents, separated by a
|
29
|
+
# _boundary_ -- a line beginning with "--" and containing a predefined
|
30
|
+
# <em>boundary string</em>. The first document is a _prologue_ and it defines
|
31
|
+
# the boundary string (without the "--" prefix) in its "Boundary" header. All
|
32
|
+
# other headers of the prologue are ignored and so is its body. Remaining
|
33
|
+
# documents are the _parts_ of the multipart document. Documents without any
|
34
|
+
# parts are perfectly legal, however the prologue with the boundary definition
|
35
|
+
# must be always present.
|
36
|
+
#
|
37
|
+
# == Example Usage
|
38
|
+
#
|
39
|
+
# require "lib/doc_storage"
|
40
|
+
#
|
41
|
+
# # Create a new document with two parts
|
42
|
+
# document = DocStorage::MultiPartDocument.new([
|
43
|
+
# DocStorage::SimpleDocument.new(
|
44
|
+
# {
|
45
|
+
# "Title" => "Finishing the documentation",
|
46
|
+
# "Priority" => "urgent"
|
47
|
+
# },
|
48
|
+
# "We should finish the documentation ASAP."
|
49
|
+
# ),
|
50
|
+
# DocStorage::SimpleDocument.new(
|
51
|
+
# {
|
52
|
+
# "Title" => "Finishing the code",
|
53
|
+
# "Priority" => "more urgent"
|
54
|
+
# },
|
55
|
+
# "But we should finish the code first!"
|
56
|
+
# ),
|
57
|
+
# ])
|
58
|
+
#
|
59
|
+
# # Parse a file
|
60
|
+
# document = File.open("examples/multipart.txt", "r") do |f|
|
61
|
+
# DocStorage::MultiPartDocument.parse(f)
|
62
|
+
# end
|
63
|
+
#
|
64
|
+
# # Document manipulation
|
65
|
+
# document.parts << DocStorage::SimpleDocument.new(
|
66
|
+
# {
|
67
|
+
# "Author" => "Middle man",
|
68
|
+
# "Datetime" => "2009-11-01 21:15:33",
|
69
|
+
# },
|
70
|
+
# "I think your article is neither good nor bad."
|
71
|
+
# )
|
72
|
+
#
|
73
|
+
# # Save the modified document
|
74
|
+
# File.open("examples/multipart_modified.txt", "w") do |f|
|
75
|
+
# f.write(document)
|
76
|
+
# end
|
77
|
+
class MultiPartDocument
|
78
|
+
# document parts (+Array+ of <tt>DocStorage::SimpleDocument</tt>)
|
79
|
+
attr_accessor :parts
|
80
|
+
|
81
|
+
class << self
|
82
|
+
private
|
83
|
+
def parse_from_io(io)
|
84
|
+
prologue = SimpleDocument.parse(io, :detect)
|
85
|
+
boundary = prologue.headers["Boundary"]
|
86
|
+
|
87
|
+
parts = []
|
88
|
+
until io.eof?
|
89
|
+
parts << SimpleDocument.parse(io, boundary)
|
90
|
+
end
|
91
|
+
|
92
|
+
MultiPartDocument.new(parts)
|
93
|
+
end
|
94
|
+
|
95
|
+
public
|
96
|
+
# Parses a multipart document from its serialized form and returns a new
|
97
|
+
# +MultiPartDocument+ instance.
|
98
|
+
#
|
99
|
+
# The +source+ can be either an +IO+-like object or a +String+. In the
|
100
|
+
# latter case, it is assumed that the string contains a serialized
|
101
|
+
# document (not a file name).
|
102
|
+
#
|
103
|
+
# If any syntax error occurs, a +SyntaxError+ exception is raised. This
|
104
|
+
# can happen when parsing the prologue or parts and an invalid header is
|
105
|
+
# encountered, the headers are not terminated (no empty line separating
|
106
|
+
# headers and body is parsed before the end of file) or if no "Boundary"
|
107
|
+
# header is found in the prologue.
|
108
|
+
#
|
109
|
+
# See the +MultiPartDocument+ class documentation for a detailed
|
110
|
+
# document format description.
|
111
|
+
def parse(source)
|
112
|
+
parse_from_io(source.is_a?(String) ? StringIO.new(source) : source)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Creates a new +MultiPartDocument+ with given parts.
|
117
|
+
def initialize(parts)
|
118
|
+
@parts = parts
|
119
|
+
end
|
120
|
+
|
121
|
+
# Tests if two documents are equal, i.e. whether they have the same class
|
122
|
+
# and equal parts (in the <tt>==</tt> sense).
|
123
|
+
def ==(other)
|
124
|
+
other.instance_of?(self.class) && @parts == other.parts
|
125
|
+
end
|
126
|
+
|
127
|
+
# Returns string representation of this document. The result is in format
|
128
|
+
# described in the +MultiPartDocument+ class documentation.
|
129
|
+
def to_s
|
130
|
+
# The boundary is just a random string. We do not check if the boudnary
|
131
|
+
# appears anywhere in the subdocuments, which may lead to malformed
|
132
|
+
# document. This is of course principially wrong, but the probability of
|
133
|
+
# collision is so small that it does not bother me much.
|
134
|
+
chars = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a
|
135
|
+
boundary = Array.new(64) { chars[rand(chars.length)] }.join("")
|
136
|
+
|
137
|
+
SimpleDocument.new({"Boundary" => boundary}, "").to_s +
|
138
|
+
@parts.map { |part| "--#{boundary}\n#{part.to_s}" }.join("\n")
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
module DocStorage
|
2
|
+
# The +SimpleDocument+ class represents a simple RFC 822-like document,
|
3
|
+
# suitable for storing a text associated with some metadata (e.g. a blog
|
4
|
+
# article with a title and a publication date). The +SimpleDocument+ class
|
5
|
+
# allows to create the document programatically, parse it from a file,
|
6
|
+
# manipulate its structure and save it to a file.
|
7
|
+
#
|
8
|
+
# Each document consist of _headers_ and a _body_. Headers are a dictionary,
|
9
|
+
# mapping string names to string values. Body is a free-form text. The header
|
10
|
+
# names can contain only alphanumeric characters and a hyphen ("-") and they
|
11
|
+
# are case sensitive. The header values can contain any text that does not
|
12
|
+
# begin with whitespace and does not contain a CR or LF character.
|
13
|
+
#
|
14
|
+
# == Document Format
|
15
|
+
#
|
16
|
+
# In serialized form, a simple document looks like this:
|
17
|
+
#
|
18
|
+
# Title: My blog article
|
19
|
+
# Datetime: 2009-11-01 18:03:27
|
20
|
+
#
|
21
|
+
# Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vel lorem
|
22
|
+
# massa. Sed blandit orci id leo blandit ut fermentum lacus ullamcorper.
|
23
|
+
# Suspendisse metus sapien, consectetur vitae imperdiet vel, ornare a metus.
|
24
|
+
# In imperdiet euismod mi, nec volutpat lorem porta id.
|
25
|
+
#
|
26
|
+
# The headers are first, each on its own line. The header names are separated
|
27
|
+
# from values by a colon (":") and any amount of whitespace. Duplicate headers
|
28
|
+
# are allowed with later value overwriting the earlier one. Otherwise, the
|
29
|
+
# order of the headers does not matter. The body is separated from the headers
|
30
|
+
# by an empty line.
|
31
|
+
#
|
32
|
+
# Documents without any headers are perfectly legal and so are documents with
|
33
|
+
# an empty body. However, the separating line must be always present. This
|
34
|
+
# means that an empty file is not a valid document, but a file containing a
|
35
|
+
# single newline is.
|
36
|
+
#
|
37
|
+
# == Example Usage
|
38
|
+
#
|
39
|
+
# require "lib/doc_storage"
|
40
|
+
#
|
41
|
+
# # Create a new document with headers and body
|
42
|
+
# document = DocStorage::SimpleDocument.new(
|
43
|
+
# {
|
44
|
+
# "Title" => "Finishing the documentation",
|
45
|
+
# "Priority" => "urgent"
|
46
|
+
# },
|
47
|
+
# "We should finish the documentation ASAP."
|
48
|
+
# )
|
49
|
+
#
|
50
|
+
# # Parse a file
|
51
|
+
# document = File.open("examples/simple.txt", "r") do |f|
|
52
|
+
# DocStorage::SimpleDocument.parse(f)
|
53
|
+
# end
|
54
|
+
#
|
55
|
+
# # Document manipulation
|
56
|
+
# document.headers["Tags"] = "example"
|
57
|
+
# document.body += "Nulla mi dui, pellentesque et accumsan vitae, mattis et velit."
|
58
|
+
#
|
59
|
+
# # Save the modified document
|
60
|
+
# File.open("examples/simple_modified.txt", "w") do |f|
|
61
|
+
# f.write(document)
|
62
|
+
# end
|
63
|
+
class SimpleDocument
|
64
|
+
# document headers (+Hash+)
|
65
|
+
attr_accessor :headers
|
66
|
+
# document body (+String+)
|
67
|
+
attr_accessor :body
|
68
|
+
|
69
|
+
class << self
|
70
|
+
private
|
71
|
+
def parse_headers(io, detect_boundary)
|
72
|
+
result = {}
|
73
|
+
headers_terminated = false
|
74
|
+
|
75
|
+
until io.eof?
|
76
|
+
line = io.readline
|
77
|
+
case line
|
78
|
+
when /^([a-zA-Z0-9-]+):\s(.*)\n$/
|
79
|
+
result[$1] = $2
|
80
|
+
when "\n"
|
81
|
+
headers_terminated = true
|
82
|
+
break
|
83
|
+
else
|
84
|
+
raise SyntaxError, "Invalid header: \"#{line.strip}\"."
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
raise SyntaxError, "Unterminated headers." unless headers_terminated
|
89
|
+
if detect_boundary && !result.has_key?("Boundary")
|
90
|
+
raise SyntaxError, "No boundary defined."
|
91
|
+
end
|
92
|
+
|
93
|
+
result
|
94
|
+
end
|
95
|
+
|
96
|
+
def parse_body(io, boundary)
|
97
|
+
if boundary
|
98
|
+
result = ""
|
99
|
+
until io.eof?
|
100
|
+
line = io.readline
|
101
|
+
if line == "--#{boundary}\n"
|
102
|
+
# Trim last newline from the body as it belongs to the boudnary
|
103
|
+
# logically. This behavior is implemented to allow bodies with
|
104
|
+
# no trailing newline).
|
105
|
+
return result[0..-2]
|
106
|
+
end
|
107
|
+
|
108
|
+
result += line
|
109
|
+
end
|
110
|
+
result
|
111
|
+
else
|
112
|
+
io.read
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def parse_from_io(io, boundary)
|
117
|
+
headers = parse_headers(io, boundary == :detect)
|
118
|
+
boundary = headers["Boundary"] if boundary == :detect
|
119
|
+
body = parse_body(io, boundary)
|
120
|
+
|
121
|
+
SimpleDocument.new(headers, body)
|
122
|
+
end
|
123
|
+
|
124
|
+
public
|
125
|
+
# Parses a simple document from its serialized form and returns a new
|
126
|
+
# +SimpleDocument+ instance.
|
127
|
+
#
|
128
|
+
# The +source+ can be either an +IO+-like object or a +String+. In the
|
129
|
+
# latter case, it is assumed that the string contains a serialized
|
130
|
+
# document (not a file name).
|
131
|
+
#
|
132
|
+
# The +boundary+ determines how the end of the document body is detected:
|
133
|
+
#
|
134
|
+
# * If +boundary+ is +nil+, the document is read until the end of file.
|
135
|
+
#
|
136
|
+
# * If +boundary+ is <tt>:detect</tt>, the document is read until the
|
137
|
+
# end of file or until a line containing only a <em>boundary
|
138
|
+
# string</em> is read. The boundary string is the value of the
|
139
|
+
# "Boundary" header prefixed with "--".
|
140
|
+
#
|
141
|
+
# * Otherwise, it is assumed that +boundary+ contains a boundary string
|
142
|
+
# without the "--" prefix (the "Boundary" header value is ignored for
|
143
|
+
# the purpose of boundary detection). The document is read until the
|
144
|
+
# end of file or until a line containing only the boundary string is
|
145
|
+
# read.
|
146
|
+
#
|
147
|
+
# The +boundary+ parameter is provided mainly for parsing parts of
|
148
|
+
# multipart documents (see the +MultiPartDocument+ class documentation)
|
149
|
+
# and usually should not be used.
|
150
|
+
#
|
151
|
+
# If any syntax error occurs, a +SyntaxError+ exception is raised. This
|
152
|
+
# can happen when an invalid header is encountered, the headers are not
|
153
|
+
# terminated (no empty line separating headers and body is parsed before
|
154
|
+
# the end of file) or if no "Boundary" header is found when detecting a
|
155
|
+
# boundary.
|
156
|
+
#
|
157
|
+
# See the +SimpleDocument+ class documentation for a detailed document
|
158
|
+
# format description.
|
159
|
+
def parse(source, boundary = nil)
|
160
|
+
parse_from_io(
|
161
|
+
source.is_a?(String) ? StringIO.new(source) : source,
|
162
|
+
boundary
|
163
|
+
)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
# Creates a new +SimpleDocument+ with given headers and body.
|
168
|
+
def initialize(headers, body)
|
169
|
+
@headers, @body = headers, body
|
170
|
+
end
|
171
|
+
|
172
|
+
# Tests if two documents are equal, i.e. whether they have the same class
|
173
|
+
# and equal headers and body (in the <tt>==</tt> sense).
|
174
|
+
def ==(other)
|
175
|
+
other.instance_of?(self.class) &&
|
176
|
+
@headers == other.headers &&
|
177
|
+
@body == other.body
|
178
|
+
end
|
179
|
+
|
180
|
+
# Returns string representation of this document. The result is in format
|
181
|
+
# described in the +SimpleDocument+ class documentation.
|
182
|
+
def to_s
|
183
|
+
serialized_headers = @headers.keys.sort.inject("") do |acc, key|
|
184
|
+
acc + "#{key}: #{@headers[key]}\n"
|
185
|
+
end
|
186
|
+
serialized_headers + "\n" + @body
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
data/lib/doc_storage.rb
ADDED
@@ -0,0 +1,139 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../lib/doc_storage"
|
2
|
+
|
3
|
+
module DocStorage
|
4
|
+
describe MultiPartDocument do
|
5
|
+
Spec::Matchers.define :parse_as_multi_part_document do |document|
|
6
|
+
match do |string|
|
7
|
+
MultiPartDocument::parse(string) == document
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
before :each do
|
12
|
+
@document = MultiPartDocument.new([:part1, :part2])
|
13
|
+
|
14
|
+
@document_with_no_parts = MultiPartDocument.new([])
|
15
|
+
@document_with_multiple_parts = MultiPartDocument.new([
|
16
|
+
SimpleDocument.new({"a" => "42", "b" => "43"}, "line1\nline2"),
|
17
|
+
SimpleDocument.new({"c" => "44", "d" => "45"}, "line3\nline4"),
|
18
|
+
])
|
19
|
+
end
|
20
|
+
|
21
|
+
describe "initialize" do
|
22
|
+
it "sets attributes correctly" do
|
23
|
+
@document.parts.should == [:part1, :part2]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe "==" do
|
28
|
+
it "returns true when passed the same object" do
|
29
|
+
@document.should == @document
|
30
|
+
end
|
31
|
+
|
32
|
+
it "returns true when passed a MultiPartDocument initialized with the same parameter" do
|
33
|
+
@document.should == MultiPartDocument.new([:part1, :part2])
|
34
|
+
end
|
35
|
+
|
36
|
+
it "returns false when passed some random object" do
|
37
|
+
@document.should_not == Object.new
|
38
|
+
end
|
39
|
+
|
40
|
+
it "returns false when passed a subclass of MultiPartDocument initialized with the same parameter" do
|
41
|
+
class SubclassedMultiPartDocument < MultiPartDocument
|
42
|
+
end
|
43
|
+
|
44
|
+
@document.should_not ==
|
45
|
+
SubclassedMultiPartDocument.new([:part1, :part2])
|
46
|
+
end
|
47
|
+
|
48
|
+
it "returns false when passed a MultiPartDocument initialized with different parameter" do
|
49
|
+
@document.should_not == MultiPartDocument.new([:part3, :part4])
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "parse" do
|
54
|
+
it "parses document with no parts" do
|
55
|
+
"Boundary: =====\n\n".should parse_as_multi_part_document(
|
56
|
+
@document_with_no_parts
|
57
|
+
)
|
58
|
+
end
|
59
|
+
|
60
|
+
it "parses document with multiple parts" do
|
61
|
+
[
|
62
|
+
"Boundary: =====",
|
63
|
+
"",
|
64
|
+
"--=====",
|
65
|
+
"a: 42",
|
66
|
+
"b: 43",
|
67
|
+
"",
|
68
|
+
"line1",
|
69
|
+
"line2",
|
70
|
+
"--=====",
|
71
|
+
"c: 44",
|
72
|
+
"d: 45",
|
73
|
+
"",
|
74
|
+
"line3",
|
75
|
+
"line4",
|
76
|
+
].join("\n").should parse_as_multi_part_document(
|
77
|
+
@document_with_multiple_parts
|
78
|
+
)
|
79
|
+
end
|
80
|
+
|
81
|
+
it "does not parse document with no Boundary: header" do
|
82
|
+
lambda {
|
83
|
+
MultiPartDocument.parse("\n\n")
|
84
|
+
}.should raise_error(SyntaxError, "No boundary defined.")
|
85
|
+
end
|
86
|
+
|
87
|
+
it "parses document from IO-like object" do
|
88
|
+
StringIO.open(
|
89
|
+
[
|
90
|
+
"Boundary: =====",
|
91
|
+
"",
|
92
|
+
"--=====",
|
93
|
+
"a: 42",
|
94
|
+
"b: 43",
|
95
|
+
"",
|
96
|
+
"line1",
|
97
|
+
"line2",
|
98
|
+
"--=====",
|
99
|
+
"c: 44",
|
100
|
+
"d: 45",
|
101
|
+
"",
|
102
|
+
"line3",
|
103
|
+
"line4",
|
104
|
+
].join("\n")
|
105
|
+
) do |io|
|
106
|
+
MultiPartDocument.parse(io).should == @document_with_multiple_parts
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
describe "to_s" do
|
112
|
+
it "serializes document with no parts" do
|
113
|
+
srand 0
|
114
|
+
@document_with_no_parts.to_s.should ==
|
115
|
+
"Boundary: SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI\n\n"
|
116
|
+
end
|
117
|
+
|
118
|
+
it "serializes document with multiple parts" do
|
119
|
+
srand 0
|
120
|
+
@document_with_multiple_parts.to_s.should == [
|
121
|
+
"Boundary: SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
|
122
|
+
"",
|
123
|
+
"--SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
|
124
|
+
"a: 42",
|
125
|
+
"b: 43",
|
126
|
+
"",
|
127
|
+
"line1",
|
128
|
+
"line2",
|
129
|
+
"--SV1ad7dNjtvYKxgyym6bMNxUyrLznijuZqZfpVasJyXZDttoNGbj5GFk0xJlY3CI",
|
130
|
+
"c: 44",
|
131
|
+
"d: 45",
|
132
|
+
"",
|
133
|
+
"line3",
|
134
|
+
"line4",
|
135
|
+
].join("\n")
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../lib/doc_storage"
|
2
|
+
|
3
|
+
module DocStorage
|
4
|
+
describe SimpleDocument do
|
5
|
+
Spec::Matchers.define :parse_as_document do |document|
|
6
|
+
match do |string|
|
7
|
+
SimpleDocument.parse(string) == document
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
before :each do
|
12
|
+
@document = SimpleDocument.new({"a" => 42, "b" => 43}, "body")
|
13
|
+
|
14
|
+
@document_without_headers_without_body = SimpleDocument.new({}, "")
|
15
|
+
@document_without_headers_with_body = SimpleDocument.new({}, "line1\nline2")
|
16
|
+
@document_with_headers_without_body = SimpleDocument.new(
|
17
|
+
{"a" => "42", "b" => "43"},
|
18
|
+
""
|
19
|
+
)
|
20
|
+
@document_with_headers_with_body = SimpleDocument.new(
|
21
|
+
{"a" => "42", "b" => "43"},
|
22
|
+
"line1\nline2"
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
describe "initialize" do
|
27
|
+
it "sets attributes correctly" do
|
28
|
+
@document.headers.should == {"a" => 42, "b" => 43}
|
29
|
+
@document.body.should == "body"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe "==" do
|
34
|
+
it "returns true when passed the same object" do
|
35
|
+
@document.should == @document
|
36
|
+
end
|
37
|
+
|
38
|
+
it "returns true when passed a SimpleDocument initialized with the same parameters" do
|
39
|
+
@document.should == SimpleDocument.new({"a" => 42, "b" => 43}, "body")
|
40
|
+
end
|
41
|
+
|
42
|
+
it "returns false when passed some random object" do
|
43
|
+
@document.should_not == Object.new
|
44
|
+
end
|
45
|
+
|
46
|
+
it "returns false when passed a subclass of SimpleDocument initialized with the same parameters" do
|
47
|
+
class SubclassedSimpleDocument < SimpleDocument
|
48
|
+
end
|
49
|
+
|
50
|
+
@document.should_not ==
|
51
|
+
SubclassedSimpleDocument.new({"a" => 42, "b" => 43}, "body")
|
52
|
+
end
|
53
|
+
|
54
|
+
it "returns false when passed a SimpleDocument initialized with different parameters" do
|
55
|
+
@document.should_not == SimpleDocument.new({"a" => 44, "b" => 45}, "body")
|
56
|
+
@document.should_not == SimpleDocument.new({"a" => 42, "b" => 43}, "nobody")
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe "parse" do
|
61
|
+
it "parses document with no headers and no body" do
|
62
|
+
"\n".should parse_as_document(@document_without_headers_without_body)
|
63
|
+
end
|
64
|
+
|
65
|
+
it "parses document with no headers and body" do
|
66
|
+
"\nline1\nline2".should parse_as_document(
|
67
|
+
@document_without_headers_with_body
|
68
|
+
)
|
69
|
+
end
|
70
|
+
|
71
|
+
it "parses document with headers and no body" do
|
72
|
+
"a: 42\nb: 43\n\n".should parse_as_document(
|
73
|
+
@document_with_headers_without_body
|
74
|
+
)
|
75
|
+
end
|
76
|
+
|
77
|
+
it "parses document with headers and body" do
|
78
|
+
"a: 42\nb: 43\n\nline1\nline2".should parse_as_document(
|
79
|
+
@document_with_headers_with_body
|
80
|
+
)
|
81
|
+
end
|
82
|
+
|
83
|
+
it "does not parse document with invalid headers" do
|
84
|
+
lambda {
|
85
|
+
SimpleDocument.parse("bullshit")
|
86
|
+
}.should raise_error(SyntaxError, "Invalid header: \"bullshit\".")
|
87
|
+
end
|
88
|
+
|
89
|
+
it "does not parse document with unterminated headers" do
|
90
|
+
lambda {
|
91
|
+
SimpleDocument.parse("a: 42\nb: 42\n")
|
92
|
+
}.should raise_error(SyntaxError, "Unterminated headers.")
|
93
|
+
end
|
94
|
+
|
95
|
+
it "parses document from IO-like object" do
|
96
|
+
StringIO.open("a: 42\nb: 43\n\nline1\nline2") do |io|
|
97
|
+
SimpleDocument.parse(io).should == @document_with_headers_with_body
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
it "parses document when detecting a boundary" do
|
102
|
+
SimpleDocument.parse(
|
103
|
+
"a: 42\nb: 43\nBoundary: =====\n\nline1\nline2\n--=====\nbullshit",
|
104
|
+
:detect
|
105
|
+
).should == SimpleDocument.new(
|
106
|
+
{"a" => "42", "b" => "43", "Boundary" => "====="},
|
107
|
+
"line1\nline2"
|
108
|
+
)
|
109
|
+
end
|
110
|
+
|
111
|
+
it "does not parse document when detecting a boundary and no boundary defined" do
|
112
|
+
lambda {
|
113
|
+
SimpleDocument.parse(
|
114
|
+
"a: 42\nb: 43\n\nline1\nline2\n--=====\nbullshit",
|
115
|
+
:detect
|
116
|
+
)
|
117
|
+
}.should raise_error(SyntaxError, "No boundary defined.")
|
118
|
+
end
|
119
|
+
|
120
|
+
it "parses document when passed a boundary" do
|
121
|
+
SimpleDocument.parse(
|
122
|
+
"a: 42\nb: 43\n\nline1\nline2\n--=====\nbullshit",
|
123
|
+
"====="
|
124
|
+
).should == @document_with_headers_with_body
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
describe "to_s" do
|
129
|
+
it "serializes document with no headers and no body" do
|
130
|
+
@document_without_headers_without_body.to_s.should == "\n"
|
131
|
+
end
|
132
|
+
|
133
|
+
it "serializes document with no headers and body" do
|
134
|
+
@document_without_headers_with_body.to_s.should == "\nline1\nline2"
|
135
|
+
end
|
136
|
+
|
137
|
+
it "serializes document with headers and no body" do
|
138
|
+
@document_with_headers_without_body.to_s.should == "a: 42\nb: 43\n\n"
|
139
|
+
end
|
140
|
+
|
141
|
+
it "serializes document with headers and body" do
|
142
|
+
@document_with_headers_with_body.to_s.should ==
|
143
|
+
"a: 42\nb: 43\n\nline1\nline2"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: doc_storage
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.9"
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Majda
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-11-19 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: DocStorage is a simple Ruby library for manipulating documents containing a text and metadata. These documents can be used to implement a blog, wiki, or similar application without a relational database.
|
17
|
+
email: david@majda.cz
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.rdoc
|
24
|
+
files:
|
25
|
+
- Rakefile
|
26
|
+
- README.rdoc
|
27
|
+
- LICENSE
|
28
|
+
- VERSION
|
29
|
+
- lib/doc_storage.rb
|
30
|
+
- lib/doc_storage/multi_part_document.rb
|
31
|
+
- lib/doc_storage/simple_document.rb
|
32
|
+
- lib/doc_storage/syntax_error.rb
|
33
|
+
- spec/simple_document_spec.rb
|
34
|
+
- spec/multi_part_document_spec.rb
|
35
|
+
- examples/simple.txt
|
36
|
+
- examples/multipart.rb
|
37
|
+
- examples/simple.rb
|
38
|
+
- examples/multipart.txt
|
39
|
+
has_rdoc: true
|
40
|
+
homepage: http://github.com/dmajda/doc_storage
|
41
|
+
licenses: []
|
42
|
+
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options:
|
45
|
+
- --main
|
46
|
+
- README.rdoc
|
47
|
+
require_paths:
|
48
|
+
- lib
|
49
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 1.8.6
|
54
|
+
version:
|
55
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: "0"
|
60
|
+
version:
|
61
|
+
requirements: []
|
62
|
+
|
63
|
+
rubyforge_project:
|
64
|
+
rubygems_version: 1.3.5
|
65
|
+
signing_key:
|
66
|
+
specification_version: 3
|
67
|
+
summary: Simple Ruby library for manipulating documents containing a text and metadata.
|
68
|
+
test_files: []
|
69
|
+
|