bible_reference_parser 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ # TODO doc me
2
+ module BibleReferenceParser
3
+
4
+ # TODO doc me
5
+ def self.parse(passage)
6
+ BookReference.parse_books(passage)
7
+ end
8
+
9
+ # See BookReference.parse_books
10
+ def self.parse_books(passage)
11
+ BookReference.parse_books(passage)
12
+ end
13
+
14
+ # See ChapterReference.parse_chapters
15
+ def self.parse_chapters(passage)
16
+ ChapterReference.parse_chapters(passage)
17
+ end
18
+
19
+ # See VerseReference.parse_verses
20
+ def self.parse_verses(string)
21
+ VerseReference.parse_verses(string)
22
+ end
23
+
24
+ end
@@ -0,0 +1,46 @@
1
+ module BibleReferenceParser
2
+
3
+ # This module encapsulates shared behavior for classes that keep track of parsing errors.
4
+ # For example, a BookReference may encounter a parsing error due to a book that doesn't
5
+ # exist. A ChapterReference may have a parsing error because the chapter number isn't valid
6
+ # for the book it is referencing.
7
+ module TracksErrors
8
+
9
+ def initialize(*args, &block)
10
+ super
11
+
12
+ # A collection of error messages.
13
+ @errors = []
14
+ end
15
+
16
+ # Add an error message.
17
+ def add_error(message)
18
+ @errors << message
19
+ end
20
+
21
+ # Erase all error messages.
22
+ def clear_errors
23
+ @errors = []
24
+ end
25
+
26
+ # Get the list of error messages. This will include any errors in child references
27
+ # if include_child_errors is true (by default it's true).
28
+ def errors(include_child_errors = true)
29
+ if(include_child_errors && respond_to?("children") && children)
30
+ return @errors + children.errors(true)
31
+ end
32
+
33
+ @errors
34
+ end
35
+
36
+ # Whether any errors occured when parsing.
37
+ def has_errors?
38
+ !errors.empty?
39
+ end
40
+
41
+ # Convienence method for the reverse of "has_errors?"
42
+ def no_errors?
43
+ errors.empty?
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,168 @@
1
+ module BibleReferenceParser
2
+
3
+ # This class handles parsing the books in a passage reference.
4
+ #
5
+ # Each BookReference contains the book's name, short name (abbreviation), and a ReferenceCollection
6
+ # of chapters.
7
+ #
8
+ # The main method of interest is BookReference.parse_books. This will
9
+ # parse a passage and return a ReferenceCollection containing BookReference objects;
10
+ # one object for every book found in the passage. Example:
11
+ #
12
+ # books = BookReference.parse_books("Matt. 1:1-10, Revelation 5:6-11, Luke 7:7")
13
+ # books[0].name # => "Matthew"
14
+ # books[1].short_name # => "Rev."
15
+ # books[2].name # => "Luke"
16
+ #
17
+ # Access the chapter references found through the chapter_references method:
18
+ #
19
+ # books.chapter_references
20
+ # books.children # => alias for chapter_references
21
+ #
22
+ # You can see if there were any errors in parsing by checking the "has_errors?" method on the returned
23
+ # ReferenceCollection. Example:
24
+ #
25
+ # books = BookReference.parse_books("Gethensis 1:1, Matthew 1:5000")
26
+ # books.has_errors? # => true
27
+ # books.no_errors? # => false
28
+ # books.errors # => ["The book 'Genthesis' could not be found", "The verse '5000' does not exist for Matthew 1"]
29
+ #
30
+ # You can check if an individiual BookReference has errors as well:
31
+ #
32
+ # books = BookReference.parse_books("Gethensis 1:1, Matthew 1:5000")
33
+ # books.first.has_errors? # => true
34
+ # books.first.errors # => ["The book 'Genthesis' could not be found"]
35
+
36
+ class BookReference
37
+ include TracksErrors
38
+
39
+ attr_reader :name, :short_name, :raw_content, :chapter_references, :metadata
40
+
41
+ alias :children :chapter_references
42
+
43
+ # Instance Initialization
44
+ #----------------------------------------------------------------------------
45
+
46
+ # Initializes a new BookReference object. If the book cannot be found, a parsing error will occur.
47
+ #
48
+ # Parameters:
49
+ # book_name - can be either the full name or the abbreviation.
50
+ # raw_content - the raw string of chapters/verses selected for this book reference (ex. "1:1-10").
51
+ def initialize(book_name, raw_content = nil)
52
+ super
53
+
54
+ # get the metadata for the given book name
55
+ @metadata = BibleMetadata[book_name]
56
+
57
+ # if the book doesn't exist add a parsing error and stop processing
58
+ if @metadata.nil?
59
+ add_error "The book '#{book_name}' could not be found" and return
60
+ end
61
+
62
+ # name of the book, ex. Genesis
63
+ @name = metadata["name"]
64
+
65
+ # abbreviated name of the book, ex. Gen.
66
+ @short_name = metadata["short_name"]
67
+
68
+ # the string representing the chapters/verses for this reference, ex. "1:1-10"
69
+ @raw_content = raw_content
70
+
71
+ parse_contents
72
+ end
73
+
74
+
75
+ # Class Methods
76
+ #----------------------------------------------------------------------------
77
+
78
+ # Parse the books in a passage. Returns a ReferenceCollection of BookReference objects.
79
+ #
80
+ # Parameters:
81
+ # passage - The passage to parse, ex. "Genesis 1:1-10, Exodus 1:5-7"
82
+ #
83
+ # Example:
84
+ #
85
+ # books = BookReference.parse_books("Genesis 1:1-10, mark 1:5-7")
86
+ # books.first.name # => "Genesis"
87
+ #
88
+ # The above example will return a ReferenceCollection of two BookReference objects, one for
89
+ # each book identified in the passage.
90
+ #
91
+ # More Examples:
92
+ #
93
+ # Book.parse_books("Matt 1")
94
+ # Book.parse_books("Gen. 1-5, Ex. 7:14")
95
+ # Book.parse_books("Genesis 1:1-15, 2:12, Exod. 7:14")
96
+ # Book.parse_books("gen 5, exodus") # => will assume Exodus 1
97
+ # Book.parse_books("gen. 1-5, gen 9:1") # => two different references to genesis
98
+ # Book.parse_books("[rev1:15][daniel 12: 1]") # => white space and unnecessary punctuation is ignored
99
+ def self.parse_books(passage)
100
+
101
+ books = ReferenceCollection.new
102
+
103
+ # remove everything except for numbers, letters and these punctuation marks -> -,;:
104
+ passage_slim = passage.gsub(/[^0-9a-zA-Z:;,\-]/, "")
105
+
106
+ # This pattern matches for book name and chapter/verses pairs. It consists of two capture groups:
107
+ #
108
+ # Group 1: Book's name ([0-9]?[a-zA-Z]+)
109
+ # - An optional digit. Some books like "1 Samuel" begin with a single digit.
110
+ # - Any letter, one or more times.
111
+ #
112
+ # Group 2: Chapters and verses ([^a-zA-Z]+(?![a-zA-Z]))?
113
+ # - Any non-letter character (like digits and punctuation) one or more times.
114
+ #
115
+ # - Don't capture the last character if it's followed by a letter. Which basically means
116
+ # don't capture the last character. Usually the last character will be punctuation, like
117
+ # a comma or semi-colon. We don't need to capture that information. Sometimes it will
118
+ # be a number, like in "Matt. 1:1, 2 Sam 1:1", where "2" would be the last character.
119
+ # In this case we want to assume the last character belongs with the next book anyway,
120
+ # so we shouldn't include it with this one.
121
+ #
122
+ # - The last question mark indicates that the chapters/verses are optional. If it's not
123
+ # there, then we assume just the first chapter is wanted. So the passage
124
+ # "John", is the same as "John 1". This assumption comes
125
+ # from what BibleGateway does for the same scenario.
126
+ pattern = /([0-9]?[a-zA-Z]+)([^a-zA-Z]+(?![a-zA-Z]))?/
127
+
128
+ # find the books
129
+ passage_slim.scan pattern do |book_name, contents|
130
+
131
+ # remove all characters from the end util we get to a number.
132
+ # This basically removes any extraneous punctation at the end.
133
+ contents = contents.gsub(/[^0-9]+$/, "") unless contents.nil?
134
+
135
+ books << BookReference.new(book_name, contents)
136
+ end
137
+
138
+ books
139
+ end
140
+
141
+
142
+ # Instance Methods
143
+ #----------------------------------------------------------------------------
144
+
145
+ # Whether this reference itself is valid. Please note this does not consider the chapters inside
146
+ # the book, just the book itself.
147
+ def valid_reference?
148
+ !name.nil?
149
+ end
150
+
151
+ # Parse the raw_content in order to find the chapters in this book.
152
+ def parse_contents
153
+ @chapter_references = ChapterReference.parse_chapters_in_reference self
154
+ end
155
+
156
+ # Cleans invalid chapter references. After calling this, the chapter_references method will only return good
157
+ # chapter references. You can access the invalid references through chapater_references.invalid_references.
158
+ # See ReferenceCollection.clean for more information.
159
+ #
160
+ # If the chain parameter is true (which it is by default) it will also tell valid chapters
161
+ # to clean their verse references. In this case chapter_references.invalid_references will include both bad
162
+ # chapters and bad verses.
163
+ def clean(chain = true)
164
+ chapter_references.clean(chain)
165
+ end
166
+
167
+ end
168
+ end
@@ -0,0 +1,242 @@
1
+ module BibleReferenceParser
2
+ # This class handles the parsing of chapters in a passage or string.
3
+ #
4
+ # Each ChapterReference object contains the chapter number and a ReferenceCollection of
5
+ # verses.
6
+ #
7
+ # The main method of interest is ChapterReference.parse_chapters. This will parse a passage
8
+ # or string and return a ReferenceCollection of ChapterReference objects. One object for
9
+ # each chapter identified. Example:
10
+ #
11
+ # chapters = ChapterReference.parse_chapters("1:1-10, 5:6")
12
+ # chapters[0].number # => 1
13
+ # chapters[1].number # => 5
14
+ #
15
+ # Although less useful, parse_chapters can even parse just the chapters in a complete passage:
16
+ #
17
+ # chapters = ChapterReference.parse_chapters("Genesis 1:1-10, Mark 5:6")
18
+ # chapters[0].number # => 1
19
+ # chapters[1].number # => 5
20
+ #
21
+ # You can see if there were any errors in parsing by checking the "has_errors?" method on the
22
+ # returned ReferenceCollection. Without specify metadata to validate against, only simple
23
+ # validation is possible. If you do provide metadata, (ex. BibleMetadata["Genesis"]),
24
+ # The ChapterReference will add an error message if the chapter doesn't exist for the book.
25
+ #
26
+ # If you want to parse chapters for a particular book, its better to use the
27
+ # parse_chapters_in_reference method. This method takes an existing book reference.
28
+ # Example:
29
+ #
30
+ # book = BookReference.new("Genesis", "1:1000, 51:10")
31
+ # chapters = ChapterReference.parse_chapters_in_reference(book)
32
+ # chapters.has_errors? # => true
33
+ # chapters.no_errors? # => false
34
+ # chapters.errors # => ["The verse '1000' does not exist for Genesis 1",
35
+ # "Chapter '51' does not exist for the book Genesis"]
36
+ #
37
+ # You can check if an individiual ChapterReference has errors as well:
38
+ #
39
+ # book = BookReference.new("Genesis", "1:1000, 51:10")
40
+ # chapters = ChapterReference.parse_chapters_in_reference(book)
41
+ # chapters.first.has_errors? # => true
42
+ # chapters.first.no_errors # => false
43
+ # chapters.first.errors # => ["The verse '1000' does not exist for Genesis 1"]
44
+
45
+ class ChapterReference
46
+ include TracksErrors
47
+
48
+ attr_reader :number, :raw_content, :verse_references, :metadata
49
+
50
+ alias :children :verse_references
51
+
52
+ # Instance Initialization
53
+ #----------------------------------------------------------------------------
54
+
55
+ # Initializes a new ChapterReference object.
56
+ #
57
+ # Parameters:
58
+ # number - The chapter number. Can either be an string or integer
59
+ # raw_content - A string representing the verses referenced, ex. "1-10"
60
+ # metadata - (optional) An array of metadata information for a particular
61
+ # book, ex. BibleMetadata["Genesis"]. This is used to check if
62
+ # the chapter number is valid for a book.
63
+ def initialize(number, raw_content = nil, metadata = nil)
64
+ super
65
+
66
+ number = number.to_i # allows passing the number parameter as string
67
+
68
+ # if number is less than 1 add a parsing error and stop processing
69
+ if number < 1
70
+ add_error "The chapter number '#{number}' is not valid" and return
71
+ end
72
+
73
+ # metadata info for a particular book in the bible
74
+ @metadata = metadata
75
+
76
+ # if the metadata is given, we can verify if the chapter exists for the book
77
+ unless @metadata.nil?
78
+ total_chapters_in_book = @metadata["chapter_info"].length
79
+
80
+ if number > total_chapters_in_book
81
+ add_error "Chapter '#{number}' does not exist for the book #{@metadata['name']}" and return
82
+ end
83
+ end
84
+
85
+ # The chapter number
86
+ @number = number
87
+
88
+ # The string representing the verses referenced in this chapter
89
+ @raw_content = raw_content
90
+
91
+ parse_contents
92
+ end
93
+
94
+ # Class Methods
95
+ #----------------------------------------------------------------------------
96
+
97
+ # Works similar to parse_chapters, however this should be used instead if you want
98
+ # to associate the chapter references with a book. This will decide what chapters
99
+ # are referenced based on the raw_content of the book reference. If the raw_content
100
+ # is nil, it will assume only the first chapter is desired.
101
+ def self.parse_chapters_in_reference(book_reference)
102
+ if book_reference.raw_content.nil?
103
+ # if the raw_content is nil, assume we want just the first chapter. This is what
104
+ # Bible Gateway does if you just give a book name.
105
+ return self.parse_chapters(1, book_reference.metadata)
106
+ else
107
+ return self.parse_chapters(book_reference.raw_content, book_reference.metadata)
108
+ end
109
+ end
110
+
111
+ # Parse the chapters in a passage or string. Returns a ReferenceCollection
112
+ # of ChapterReference objects.
113
+ #
114
+ # Parameters:
115
+ # passage - The passage to parse, ex. "1:1-10, 2:5-7"
116
+ # metadata - An array of metadata information for a particular book, ex. BibleMetadata["Genesis"].
117
+ # NOTE: if you are passing this in, you probably should
118
+ # be calling parse_chapters_in_reference instead of this one.
119
+ #
120
+ # Example:
121
+ #
122
+ # chapters = ChapterReference.parse_chapters("1:1-10, 2:5-7")
123
+ # chapters.first.number # => 1
124
+ #
125
+ # This can also parse just the chapters in a whole passage. It will ignore the book names:
126
+ #
127
+ # chapters = ChapterReference.parse_chapters("Genesis 1:1-10; mark 1:5-7")
128
+ # chapters.first.number # => 1
129
+ #
130
+ # More Examples:
131
+ #
132
+ # ChapterReference.parse_chapters("1:1")
133
+ # ChapterReference.parse_chapters("1:1-10")
134
+ # ChapterReference.parse_chapters("1:1-10; 5-10")
135
+ # ChapterReference.parse_chapters("1:5,8,11; 2:10, 5-20")
136
+ # ChapterReference.parse_chapters(10)
137
+ #
138
+ # XXX allow option to remove duplicate chapters
139
+ def self.parse_chapters(passage, metadata = nil)
140
+ passage = passage.to_s # allows for integer passage
141
+
142
+ chapters = ReferenceCollection.new
143
+
144
+ # ~ Do some massaging of the data before we scan it...
145
+
146
+ # Replace letters with a semi-colon. We would just remove all letters, but in cases
147
+ # where books are separated by just a space, it will cause errors. For example
148
+ # "Genesis 1 Exodus 1" would end up as "11".
149
+ passage = passage.gsub(/[a-zA-Z]+/, ";")
150
+
151
+ # Next remove everything except for numbers and these punctuation marks -> -,;:
152
+ # We don't care about spaces or any other characters.
153
+ passage = passage.gsub(/[^0-9:;,\-]/, "")
154
+
155
+ # Finally insert a semi-colon before digits that precede a colon. This is for chapters
156
+ # that reference specific verses, like "15:1". Semi-colons are used to indicate
157
+ # the following sequence is separate from the preceding sequence. This is important
158
+ # for back-to-back chapters with verses, ex. "1:5,10,5:10". Here we want chapter 1
159
+ # verses 5 and 10, then chapter 5 verse 10. The way we know it's not chapter 1 verse
160
+ # 5, 10, and 5 is if there is a semi-colon there: "1:5,10,;5:10".
161
+ passage = passage.gsub(/[0-9]+:/, ';\0')
162
+
163
+ # This will look for digits followed by a semi-colon. If we match that,
164
+ # we know what's before the colon is the chapter, and we know every digit or dash
165
+ # directly after it are the verses.
166
+ match_chapter_with_verses = /([0-9]+:)([0-9,\-]+)/
167
+
168
+ # This will match a chapter range, like "1-10"
169
+ match_chapter_range = /([0-9]+\-[0-9]+)/
170
+
171
+ # This will match a single chapter selection that doesn't specify any verses.
172
+ # Something like "Genesis 1, 2" tells us we want chapters 1 and chapter 2.
173
+ # It looks for any digits directly followed by an optional comma or semi-colon.
174
+ # It's optional because it won't be there if it's the last or only chapter.
175
+ match_single_chapter = /([0-9]+[,;]?)/
176
+
177
+ # First try to match the chapter with verses, then the chapter range, then finally the single chapter
178
+ pattern = Regexp.union(match_chapter_with_verses, match_chapter_range, match_single_chapter)
179
+
180
+ # Let's find the chapters already!
181
+ passage.scan pattern do |with_verses, verses, chapter_range, without_verses|
182
+
183
+ if chapter_range
184
+ # get the beginning and end of the range
185
+ range = chapter_range.split "-"
186
+ first = range.first.to_i
187
+ last = range.last.to_i
188
+
189
+ # add each chapter in the range
190
+ (first..last).each do |number|
191
+ chapters << ChapterReference.new(number, nil, metadata)
192
+ end
193
+ else
194
+ number = with_verses ? with_verses.to_i : without_verses.to_i
195
+
196
+ # remove all characters from the end util we get to a number.
197
+ # This basically removes any extraneous punctation at the end.
198
+ verses = verses.gsub(/[^0-9]+$/, "") unless verses.nil?
199
+
200
+ chapters << ChapterReference.new(number, verses, metadata)
201
+ end
202
+ end
203
+
204
+ chapters
205
+ end
206
+
207
+ # Instance Methods
208
+ #----------------------------------------------------------------------------
209
+
210
+ # Whether this reference itself is valid. Please note this does not consider the verses inside
211
+ # the chapter, just the chapter itself.
212
+ def valid_reference?
213
+ !number.nil?
214
+ end
215
+
216
+ # Parse the raw_content in order to find the verses referenced for this chapter.
217
+ def parse_contents
218
+ @verse_references = VerseReference.parse_verses_in_reference self
219
+ end
220
+
221
+ # Cleans invalid verse references. After calling this, the verse_references method will only return good
222
+ # verse references. You can access the invalid references through verse_references.invalid_references.
223
+ # See ReferenceCollection.clean for more information.
224
+ #
225
+ # If the chain parameter is true (which it is by default) it will also tell valid verses to do a clean.
226
+ # Since verses are leaf-nodes so to speak, they don't contain any references to clean so it won't do anything.
227
+ def clean(chain = true)
228
+ verse_references.clean(chain)
229
+ end
230
+
231
+ # TODO write specs
232
+ # Get an array of ints containing the verse numbers referenced
233
+ def verse_numbers
234
+ verses = []
235
+ @verse_references.each do |ref|
236
+ verses << ref.number
237
+ end
238
+ verses
239
+ end
240
+
241
+ end
242
+ end