bible_reference_parser 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,179 @@
1
+ module BibleReferenceParser
2
+
3
+ # This class is used to hold collections of reference objects.
4
+ # It functions similar to an array, augmented with information about whether
5
+ # any parsing errors occured in the items it contains.
6
+ #
7
+ # A ReferenceCollection is returned from the following methods:
8
+ # - All BibleReferenceParser.parse methods
9
+ # - BookReference.parse_books
10
+ # - ChapterReference.parse_chapters
11
+ # - VerseReference.parse_verses
12
+ #
13
+ # Also, child references are ReferenceCollection objects. For example:
14
+ #
15
+ # books = BibleReferenceParser.parse "Genesis 1:1"
16
+ # books.first.chapter_references # => a reference collection
17
+ # books.first.children # => an alias for chapter_references, so also a reference collection
18
+ # books.first.chapter_references.first.verse_references # => another reference collection
19
+ #
20
+ # ReferenceCollections let you quickly see all the errors in the items it contains. Example:
21
+ #
22
+ # books = BibleReferenceParser.parse("Genthesis 1:1-10, Matthew 1:5, Rev. 5000") # books is a ReferenceCollection
23
+ # books.length # => 3
24
+ # books.has_errors? # => true
25
+ # books.no_errors? # => false
26
+ # books.errors.length # => 2
27
+ # books.errors # => ["The book 'Genthesis' could not be found", "Chapter '5000' does not exist for the book Revelation"]
28
+ # bad_books = books.clean
29
+ # books.length # => 1 (Matthew 1:5)
30
+ # books.invalid_references.length # => 2
31
+ #
32
+ # After calling the clean method, bad references are move to the invalid_references field. Please note
33
+ # that a reference will still be left in the collection as long as itself is valid. So a valid book could possibly only contain
34
+ # invalid chapters:
35
+ #
36
+ # books = BibleReferenceParser.parse("Genesis 51") # genesis is valid, but chapter 51 isn't
37
+ # books.clean
38
+ # books.length # => 1 (still contains the reference to Genesis)
39
+ # books.first.has_errors? # => true
40
+ # books.first.chapter_references.length # => 0
41
+ # books.first.chapter_references.invalid_references.length # => 1
42
+ # books.invalid_references.length # => 1 (returns the same as above, just less explicit)
43
+ # books.errors # => ["Chapter '51' does not exist for the book Genesis"]
44
+ #
45
+ # XXX next we can add a method for sorting items in the collection.
46
+
47
+ class ReferenceCollection
48
+ include TracksErrors
49
+
50
+ attr_reader :references, :invalid_references
51
+
52
+ # Initialization
53
+ #----------------------------------------------------------------------------
54
+
55
+ def initialize(initial_references = [], initial_invalid_references = [])
56
+ super
57
+
58
+ # Array of reference objects this collection contains
59
+ @references = initial_references
60
+
61
+ # Holds references that are invalid. The "clean" method finds all invalid
62
+ # references and moves them to this collection.
63
+ @invalid_references = initial_invalid_references
64
+ end
65
+
66
+
67
+ # Instance Methods
68
+ #----------------------------------------------------------------------------
69
+
70
+ # Get all the errors in this reference collection.
71
+ def errors(include_child_errors = true)
72
+ # start with errors added directly to this reference collection
73
+ all_errors = super(include_child_errors)
74
+
75
+ # include the errors from invalid references
76
+ @invalid_references.each do |reference|
77
+ all_errors += reference.errors(include_child_errors)
78
+ end
79
+
80
+ # include the errors from references
81
+ @references.each do |reference|
82
+ all_errors += reference.errors(include_child_errors)
83
+ end
84
+
85
+ all_errors.uniq
86
+ end
87
+
88
+ # Moves invalid references into the special "invalid_references" collection. A reference is valid
89
+ # if it's "valid_reference?" method returns true. This is useful if you want to loop through the valid
90
+ # references only, and deal with the invalid ones separately:
91
+ #
92
+ # books = BibleReferenceParser.parse("Matthew 1:1, Mark 1:1, Lkue 1:1")
93
+ # books.length # => 3 (all three references)
94
+ # books.clean
95
+ # books.length # => 2 ("Lkue" is now in the invalid_references collection)
96
+ # books.each do |book| ... # => loop through just the good references
97
+ # books.invalid_refernces.each do |invalid| ... # now deal with the bad ones
98
+ #
99
+ # The chain paremeter indicates whether child references should also be cleaned. For example, if you
100
+ # have a collection of book references, if chain is true it will also call clean on the chapter and
101
+ # verse references. Chain is true by default.
102
+ #
103
+ # Please note that a valid reference may not actually contain valid references. For example:
104
+ #
105
+ # books = BibleReferenceParser.parse("Genesis 51") # genesis is valid, but chapter 51 isn't
106
+ # books.chapter_references.length # => 1
107
+ # books.clean
108
+ # books.chapter_references.length # => 0
109
+ # books.chapter_references.invalid_references.length # => 1
110
+ #
111
+ #
112
+ def clean(chain = true)
113
+ removed = []
114
+ removed_through_chain = []
115
+
116
+ @references.each do |reference|
117
+ if reference.valid_reference?
118
+ removed_through_chain += reference.clean if chain
119
+ else
120
+ removed << reference
121
+ end
122
+ end
123
+
124
+ @references -= removed
125
+
126
+ all_removed = (removed + removed_through_chain)
127
+ @invalid_references += all_removed
128
+
129
+ all_removed
130
+ end
131
+
132
+
133
+ # Delegate Methods
134
+ #----------------------------------------------------------------------------
135
+
136
+ # Accepts either an Array or ReferenceCollection
137
+ def +(collection)
138
+ new_references = collection.kind_of?(ReferenceCollection) ? collection.references : collection
139
+ combined_references = references + new_references
140
+ ReferenceCollection.new(combined_references, invalid_references)
141
+ end
142
+
143
+ # Accepts either an Array or ReferenceCollection
144
+ def -(collection)
145
+ new_references = collection.kind_of?(ReferenceCollection) ? collection.references : collection
146
+ combined_references = references - new_references
147
+ ReferenceCollection.new(combined_references, invalid_references)
148
+ end
149
+
150
+ def [](index)
151
+ references[index]
152
+ end
153
+
154
+ def each(*args, &block)
155
+ references.each(*args, &block)
156
+ end
157
+
158
+ def <<(reference)
159
+ references << reference
160
+ end
161
+
162
+ def length
163
+ references.length
164
+ end
165
+
166
+ def first
167
+ references.first
168
+ end
169
+
170
+ def last
171
+ references.last
172
+ end
173
+
174
+ def empty?
175
+ references.empty?
176
+ end
177
+
178
+ end
179
+ end
@@ -0,0 +1,184 @@
1
+
2
+ module BibleReferenceParser
3
+
4
+ # This class handles the parsing of verses in a string.
5
+ #
6
+ # The main method of interest is VerseReference.parse_verses. This will parse a string
7
+ # and return a ReferenceCollection of VerseReference objects. One object for
8
+ # each verse identified. Example:
9
+ #
10
+ # verses = VerseReference.parse_verses("1-10, 15")
11
+ # verses[0].number # => 1
12
+ # verses[1].number # => 2
13
+ # verses.last.number # => 15
14
+ #
15
+ # You can see if there were any errors in parsing by checking the "has_errors?" method on the
16
+ # returned ReferenceCollection. Without specify metadata to validate against, only simple
17
+ # validation is possible. If you do provide metadata (ex. BibleMetadata["Genesis"]) and a
18
+ # chapter number, it can validate that the verse number actually exists for the book and chapter.
19
+ #
20
+ # If you want to validate the verse references against a book reference, it's better to use
21
+ # the parse_verses_in_reference method. This will parse the verses in a chapter reference and provide
22
+ # the right metadata information for validation. Example:
23
+ #
24
+ # chapter = ChapterReference(1, 500, BibleMetadata["Genesis"])
25
+ # verses = VerseReference.parse_verses_in_reference(chapter)
26
+ # verses.has_errors? # => true
27
+ # verses.no_errors? # => false
28
+ # verses.errors # => ["The verse '500' does not exist for Genesis 1"]
29
+ #
30
+ # You can check if an individiual VerseReference has errors as well:
31
+ #
32
+ # verses.first.has_errors? # => true
33
+
34
+ class VerseReference
35
+ include TracksErrors
36
+
37
+ attr_accessor :number
38
+
39
+ # Initialization
40
+ #----------------------------------------------------------------------------
41
+
42
+ # Initializes a new VerseReference object. if metadata and chapter_number is provided, it will validate
43
+ # that the verse number exists for the book and chapter.
44
+ #
45
+ # You probably shouldn't be calling VerseReference.new directly. Instead, see
46
+ # VerseReference.parse_verses or VerseReference.parse_verses_in_reference.
47
+ #
48
+ # Parameters:
49
+ # number - The verse number.
50
+ # metadata - An array of metadata information for a particular book, ex. BibleMetadata["Genesis"].
51
+ # This is used to validate the verse number exists for a book and chapter. If you provide
52
+ # this, also provide the chapter_number parameter.
53
+ # chapter_number - The chapter number this verse is for. Used to validate the verse number exists for a
54
+ # book and chapter.
55
+ def initialize(number, metadata = nil, chapter_number = nil)
56
+ super
57
+
58
+ number = number.to_i # allows passing the number parameter as string
59
+
60
+ # if number is less than 1 add a parsing error and stop processing
61
+ if number < 1
62
+ add_error "The verse number '#{number}' is not valid"
63
+ return
64
+ end
65
+
66
+ # if metadata and chapter number is given, we can check if the verse exists for the book and chapter.
67
+ unless metadata.nil? || chapter_number.nil?
68
+ total_verses_in_chapter = metadata["chapter_info"][chapter_number - 1] # subtract 1 for array offset
69
+ if number > total_verses_in_chapter
70
+ add_error "The verse '#{number}' does not exist for #{metadata['name']} #{chapter_number}" and return
71
+ end
72
+ end
73
+
74
+ @number = number
75
+ end
76
+
77
+
78
+ # Class Methods
79
+ #----------------------------------------------------------------------------
80
+
81
+ # Works similar to parse_verses. Use this if you want to parse the verses
82
+ # in a ChapterReference object. It will assume we want all of the verses
83
+ # if a chapter's raw_content is nil. But the only way we can do this is if
84
+ # the chapter reference has metadata defined. If not, we will just assume
85
+ # the first verse is wanted. Otherwise if raw_content is not nil, we will
86
+ # use that.
87
+ def self.parse_verses_in_reference(chapter_ref)
88
+ unless chapter_ref.raw_content.nil?
89
+ return self.parse_verses(chapter_ref.raw_content, chapter_ref.metadata, chapter_ref.number)
90
+ else
91
+ unless chapter_ref.metadata.nil?
92
+ # select all the verses in the chapter
93
+ chapter_info = chapter_ref.metadata["chapter_info"]
94
+ total_verses = chapter_info[chapter_ref.number - 1] # -1 for the array offset
95
+ return self.parse_verses("1-#{total_verses}", chapter_ref.metadata, chapter_ref.number)
96
+ else
97
+ # no real solution here, just assume the first verse
98
+ return self.parse_verses 1
99
+ end
100
+ end
101
+ end
102
+
103
+ # Parse the verses in a string. Returns a ReferenceCollection
104
+ # of VerseReference objects.
105
+ #
106
+ # Parameters:
107
+ # string - The string to parse, ex. "1-10, 15"
108
+ # metadata - An array of metadata information for a particular book, ex. BibleMetadata["Genesis"].
109
+ # NOTE: if you are passing this in, you probably should
110
+ # be calling parse_chapters_in_reference instead of this one.
111
+ # chapter_number - The chapter number for the verse. Should be provided in conjunction with the metadata.
112
+ #
113
+ # Example:
114
+ #
115
+ # verses = VerseReference.parse_verses("1-10, 15")
116
+ # verses.first.number # => 1
117
+ # verses.last.number # => 15
118
+ # verses.length # => 11
119
+ #
120
+ # More Examples:
121
+ #
122
+ # VerseReference.parse_verses("1")
123
+ # VerseReference.parse_verses("1-10")
124
+ # VerseReference.parse_verses("1,5,7")
125
+ # VerseReference.parse_verses("1;5;7") # => same as above
126
+ # VerseReference.parse_verses("1-5, 10, 15-20")
127
+ #
128
+ # XXX we could add an option to allow a "beginning" or "end" for ranges.
129
+ # XXX we could allow an option to remove duplicates
130
+ def self.parse_verses(string, metadata = nil, chapter_number = nil)
131
+ string = string.to_s # allows string to be passed as an int
132
+
133
+ verses = ReferenceCollection.new
134
+
135
+ # remove everything except for numbers and these punctuation marks -> -,;
136
+ string_slim = string.gsub(/[^0-9;,\-]/, "")
137
+
138
+ # This pattern matches for verses. It first tries to match a range of verses,
139
+ # then single verses.
140
+ #
141
+ # Group 1: Verse Range ([0-9]+\-[0-9]+)
142
+ # - Any digits then a dash then any digits
143
+ #
144
+ # Group 2: Single Verse ([0-9]+)
145
+ # - any digits
146
+ pattern = /([0-9]+\-[0-9]+)|([0-9]+)/
147
+
148
+ # find the verses
149
+ string_slim.scan pattern do |verse_range, single_verse|
150
+ if verse_range
151
+ # get the beginning and end of the range
152
+ range = verse_range.split "-"
153
+ first = range.first.to_i
154
+ last = range.last.to_i
155
+
156
+ # add each verse in the range
157
+ (first..last).each do |number|
158
+ verses << VerseReference.new(number, metadata, chapter_number)
159
+ end
160
+ else
161
+ verses << VerseReference.new(single_verse, metadata, chapter_number)
162
+ end
163
+ end
164
+
165
+ verses
166
+ end
167
+
168
+
169
+ # Instance Methods
170
+ #----------------------------------------------------------------------------
171
+
172
+ # Whether this reference itself is valid.
173
+ def valid_reference?
174
+ !number.nil?
175
+ end
176
+
177
+ # The standard clean method that all references must have. Because verses are leaf nodes and don't
178
+ # contain other references, this method will just return an empty array.
179
+ def clean(chain = true)
180
+ []
181
+ end
182
+
183
+ end
184
+ end
@@ -0,0 +1,66 @@
1
+ require "spec_helper"
2
+
3
+ include BibleReferenceParser
4
+
5
+ describe BibleMetadata do
6
+ before :all do
7
+ @matthew = "Matthew"
8
+ end
9
+
10
+ it "should find the correct book for an all lower-cased name" do
11
+ info = BibleMetadata["matthew"]
12
+ info.should_not be_nil
13
+ info["name"].should eql @matthew
14
+ end
15
+
16
+ it "should find the correct book for a title-cased name" do
17
+ info = BibleMetadata["Matthew"]
18
+ info.should_not be_nil
19
+ info["name"].should eql @matthew
20
+ end
21
+
22
+ it "should find the correct book for an all upper-cased name" do
23
+ info = BibleMetadata["MATTHEW"]
24
+ info.should_not be_nil
25
+ info["name"].should eql @matthew
26
+ end
27
+
28
+ it "should find the correct book for an abbreviated name" do
29
+ info = BibleMetadata["matt"]
30
+ info.should_not be_nil
31
+ info["name"].should eql @matthew
32
+ end
33
+
34
+ it "should find the correct book for an abbreviated name with a period at the end" do
35
+ info = BibleMetadata["Matt."]
36
+ info.should_not be_nil
37
+ info["name"].should eql @matthew
38
+ end
39
+
40
+ it "should find the correct book for a name given with spaces" do
41
+ info = BibleMetadata["Song of Solomon"]
42
+ info.should_not be_nil
43
+ info["name"].should eql "Song of Solomon"
44
+ end
45
+
46
+ it "should find the correct book for a name beginning with a number" do
47
+ info = BibleMetadata["1 Samuel"]
48
+ info.should_not be_nil
49
+ info["name"].should eql "1 Samuel"
50
+ end
51
+
52
+ it "should return nil for a name that can't be found" do
53
+ info = BibleMetadata["anathema"]
54
+ info.should be_nil
55
+ end
56
+
57
+ it "should return the book's name, short_name, number of chapters and an chapter info array" do
58
+ info = BibleMetadata["genesis"]
59
+ info.should_not be_nil
60
+ info["name"].should eql "Genesis"
61
+ info["short_name"].should eql "Gen."
62
+ info["chapter_info"].should be_kind_of Array
63
+ info["chapter_info"].length.should eql 50
64
+ end
65
+
66
+ end
@@ -0,0 +1,205 @@
1
+ require 'spec_helper'
2
+
3
+ include BibleReferenceParser
4
+
5
+ describe BookReference do
6
+
7
+ it_should_behave_like "it tracks errors", BookReference.new("Matthew", "1:1")
8
+
9
+ describe "initialization" do
10
+
11
+ it "should set the 'metadata' field" do
12
+ ref = BookReference.new "Genesis", "1:1"
13
+ ref.metadata.should_not be_nil
14
+ ref.metadata["name"].should eql "Genesis"
15
+ end
16
+
17
+ context "for a valid book" do
18
+ before :each do
19
+ @name = "Matthew"
20
+ @short_name = "Matt."
21
+ @raw = "1:1-10"
22
+ @ref = BookReference.new @name, @raw
23
+ end
24
+
25
+ it "should set the 'name' field" do
26
+ @ref.name.should eql @name
27
+ end
28
+
29
+ it "should set the 'short_name' field" do
30
+ @ref.short_name.should eql @short_name
31
+ end
32
+
33
+ it "should set the 'raw_content' field" do
34
+ @ref.raw_content.should eql @raw
35
+ end
36
+
37
+ it "should parse it's raw content" do
38
+ @ref.chapter_references.length.should eql 1
39
+ @ref.chapter_references.first.number.should eql 1
40
+ end
41
+ end
42
+
43
+ context "for an invalid book" do
44
+ before :each do
45
+ @ref = BookReference.new "anathema"
46
+ end
47
+
48
+ it "should add a parsing error for an invalid book name" do
49
+ @ref.errors.first.should eql "The book 'anathema' could not be found"
50
+ end
51
+
52
+ it "should not set the 'name' field" do
53
+ @ref.name.should be_nil
54
+ end
55
+
56
+ it "should not set the 'short_name' field" do
57
+ @ref.short_name.should be_nil
58
+ end
59
+
60
+ it "should not set the 'raw_content' field" do
61
+ @ref.raw_content.should be_nil
62
+ end
63
+
64
+ it "should not parse its raw_content" do
65
+ @ref.chapter_references.should be_nil
66
+ end
67
+ end
68
+ end
69
+
70
+ describe "the valid_reference? method" do
71
+ it "should return true if the name is set" do
72
+ book = BookReference.new "Genesis"
73
+ book.should be_valid_reference
74
+ end
75
+
76
+ it "should return false if the name is not set" do
77
+ book = BookReference.new "Genthesis"
78
+ book.should_not be_valid_reference
79
+ end
80
+ end
81
+
82
+ describe "the 'parse_contents' method" do
83
+ it "should set the chapter references" do
84
+ book = BookReference.new "Matthew", "1:1-10"
85
+ book.parse_contents
86
+ book.chapter_references.should_not be_nil
87
+ end
88
+ end
89
+
90
+ describe "when parsing the books in a passage" do
91
+
92
+ describe "the parse_books method" do
93
+ it "should correctly identify when there is only 1 book" do
94
+ books = BookReference.parse_books("Genesis 1:1")
95
+ books.length.should eql 1
96
+ end
97
+
98
+ it "should correctly identify when there are 2 books" do
99
+ books = BookReference.parse_books("Genesis 1:1, Exodus 1:1")
100
+ books.length.should eql 2
101
+ end
102
+
103
+ it "should correctly identify when there are 10 books" do
104
+ passage = "Genesis 1:1, Exodus 1:1, Leviticus 1:1, Matthew 1:1, Mark 1:1,
105
+ Luke 1:1;John 1:1;Rev. 1:1, 1 Sam 1:1, Prov 1:1"
106
+
107
+ books = BookReference.parse_books(passage)
108
+ books.length.should eql 10
109
+ end
110
+
111
+ it "should parse names beginning with a number" do
112
+ books = BookReference.parse_books("1 samuel 1:1, 2 samuel, 1cor 3, 2cor")
113
+ books.length.should eql 4
114
+ books[0].name.should eql "1 Samuel"
115
+ books[1].name.should eql "2 Samuel"
116
+ books[2].name.should eql "1 Corinthians"
117
+ books[3].name.should eql "2 Corinthians"
118
+ end
119
+
120
+ it "should correctly identify the raw_content for a book" do
121
+ books = BookReference.parse_books("Genesis 1:1-10, 25, 6:13; Exodus 5:14, Leviticus 1, James")
122
+ books[0].raw_content.should eql "1:1-10,25,6:13"
123
+ books[1].raw_content.should eql "5:14"
124
+ books[2].raw_content.should eql "1"
125
+ books[3].raw_content.should be_nil
126
+ end
127
+
128
+ it "should correctly identify books and raw content in a complex passage, in the correct order" do
129
+ # Books should include Genesis, Mark, Proverbs, Isaiah, Revelation, Galatians, Exodus, Hebrews, 1 Samuel
130
+ passage = "Genesis 1:5-10, 11-15;25,;, Mark 10-21, 22,24,25, 28-32;prov2:2,\nisa9:9,rev10000,"
131
+ passage += "galatians, exod.12-19, 21, 22;25;28,32;29,,,, hebrews1:1-10002 1 samuel 10"
132
+ books = BookReference.parse_books(passage)
133
+
134
+ books.length.should eql 9
135
+
136
+ books[0].name.should eql "Genesis"
137
+ books[0].raw_content.should eql "1:5-10,11-15;25"
138
+
139
+ books[1].name.should eql "Mark"
140
+ books[1].raw_content.should eql "10-21,22,24,25,28-32"
141
+
142
+ books[2].name.should eql "Proverbs"
143
+ books[2].raw_content.should eql "2:2"
144
+
145
+ books[3].name.should eql "Isaiah"
146
+ books[3].raw_content.should eql "9:9"
147
+
148
+ books[4].name.should eql "Revelation"
149
+ books[4].raw_content.should eql "10000"
150
+
151
+ books[5].name.should eql "Galatians"
152
+ books[5].raw_content.should be_nil
153
+
154
+ books[6].name.should eql "Exodus"
155
+ books[6].raw_content.should eql "12-19,21,22;25;28,32;29"
156
+
157
+ books[7].name.should eql "Hebrews"
158
+ books[7].raw_content.should eql "1:1-10002"
159
+
160
+ books[8].name.should eql "1 Samuel"
161
+ books[8].raw_content.should eql "10"
162
+ end
163
+ end
164
+
165
+ describe "each returned book" do
166
+ it "should correctly set the name attribute" do
167
+ books = BookReference.parse_books("Matthew 1")
168
+ books.first.name.should eql "Matthew"
169
+ end
170
+
171
+ it "should correctly set the raw_content attribute" do
172
+ books = BookReference.parse_books("Matthew 1:15, 2:2-20, 25")
173
+ books.first.raw_content.should eql "1:15,2:2-20,25"
174
+ end
175
+ end
176
+
177
+ describe "the returned value" do
178
+ it "should be a reference collection" do
179
+ books = BookReference.parse_books("Genesis 1:1-10, Mark 1")
180
+ books.should be_kind_of ReferenceCollection
181
+ end
182
+
183
+ it "should only contain BookReference objects" do
184
+ books = BookReference.parse_books("Genesis 1:1-10, Mark 1")
185
+ books.each do |book|
186
+ book.should be_kind_of BookReference
187
+ end
188
+ end
189
+ end
190
+
191
+ end
192
+
193
+ describe "the clean method" do
194
+ it "should call clean on it's chapter_references" do
195
+ book = BookReference.new "Genesis", "1:1, 51:1"
196
+ book.chapter_references.length.should eql 2
197
+
198
+ book.clean
199
+ book.chapter_references.length.should eql 1
200
+ book.chapter_references.invalid_references.length.should eql 1
201
+ end
202
+
203
+ end
204
+
205
+ end