bible_ref_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 444fb0ee3218f9caa74109ee60f834efad718af2f2982453ac9918980d5da614
4
+ data.tar.gz: 839da96a5fedb211d67b645cba63335ef12a16db86b89bf1bc8a52a86f4af783
5
+ SHA512:
6
+ metadata.gz: 9a40f9a4068dc57f0316e2ac77a15ec27fd9709135eb0530c8dac2859b75fb0f2d25e0b11d5ea7e37cd4c08d145745c49d6a0c6c4e882f28195ff8cbde7e21c1
7
+ data.tar.gz: f5234fd33d0c25a4d2ef4ac5d89db6bb771ff2ad4e93e4aaaf4452c30bd462756688ee60cf403d2fe00c503d61112522792821f68e76b912a86ccefe1c60241e
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.standard.yml ADDED
@@ -0,0 +1,19 @@
1
+ ruby_version: 2.7
2
+ ignore:
3
+ - "bin/**/*"
4
+ - "db/**/*"
5
+ - "script/**/*"
6
+ - "tmp/**/*"
7
+ - "vendor/**/*"
8
+ - "doc/**/*"
9
+
10
+ Style/Documentation:
11
+ Enabled: false
12
+
13
+ Metrics/BlockLength:
14
+ Exclude:
15
+ - "**/*_spec.rb"
16
+ - "Rakefile"
17
+
18
+ Metrics/MethodLength:
19
+ Max: 15
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2025 Your Name
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # BibleRefParser
2
+
3
+ BibleRefParser is a Ruby gem for parsing and normalizing Bible references.
4
+
5
+ ## Installation
6
+
7
+ Install the gem and add to the application's Gemfile by executing:
8
+
9
+ ```bash
10
+ $ bundle add bible_ref_parser
11
+ ```
12
+
13
+ If bundler is not being used to manage dependencies, install the gem by executing:
14
+
15
+ ```bash
16
+ $ gem install bible_ref_parser
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ```ruby
22
+ require 'bible_ref_parser'
23
+
24
+ # Parse a Bible reference
25
+ reference = BibleRefParser::BibleReferenceText.new("John 3:16")
26
+ # Work with the parsed reference
27
+ ```
28
+
29
+ ## Development
30
+
31
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
32
+
33
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
34
+
35
+ ## Contributing
36
+
37
+ Bug reports and pull requests are welcome on GitHub at https://github.com/fapapa/bible_ref_parser.
38
+
39
+ ## License
40
+
41
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+ require "yard"
6
+
7
+ YARD::Rake::YardocTask.new do |t|
8
+ t.files = ["lib/**/*.rb"]
9
+ t.options = ["--output-dir=doc"]
10
+ end
11
+
12
+ RSpec::Core::RakeTask.new(:spec)
13
+
14
+ require "standard/rake"
15
+
16
+ task :coverage do
17
+ require "simplecov"
18
+ SimpleCov.start do
19
+ add_filter "/spec/"
20
+ end
21
+ end
22
+
23
+ task test: [:spec, :standard]
24
+
25
+ desc "Run all checks"
26
+ task ci: [:test, :yard]
27
+
28
+ task default: :ci
@@ -0,0 +1,189 @@
1
+ # Prompt Plan
2
+
3
+ ### Step-by-Step Blueprint
4
+
5
+ 1. **Project Setup & Infrastructure**
6
+ 2. **Core Parser Implementation (Incremental Grammar Rules)**
7
+ 3. **Segment & BibleReferenceText Classes**
8
+ 4. **Error Handling & Edge Cases**
9
+ 5. **Integration & Final Polish**
10
+
11
+ ---
12
+
13
+ ## Prompt 1: Gem Setup & Test Infrastructure
14
+ ```text
15
+ Create a new gem called 'bible_ref_parser' with:
16
+ - Required dependencies: parslet (~> 2.0), rspec
17
+ - Basic directory structure
18
+ - RSpec configuration
19
+ - Empty module scaffold in lib/bible_ref_parser.rb
20
+ - Empty InvalidInputError class
21
+ - Empty BibleReferenceText and Segment classes
22
+ - A spec verifying the InvalidInputError is raised for non-string inputs
23
+ ```
24
+
25
+ ---
26
+
27
+ ## Prompt 2: Book Name Recognition
28
+ ```text
29
+ Implement the book name parser component:
30
+ 1. Create lib/bible_ref_parser/books.rb with canonical book list
31
+ 2. Build Parslet grammar rule for book names with:
32
+ - Full names (e.g., "Genesis")
33
+ - Numeric prefixes ("1 Corinthians", "III John")
34
+ - Common abbreviations ("Gen", "Jn")
35
+ 3. Add test cases for:
36
+ - Matching all 66 books in various formats
37
+ - Rejecting non-biblical book names
38
+ - Case insensitivity
39
+ - Whitespace tolerance
40
+ ```
41
+
42
+ ---
43
+
44
+ ## Prompt 3: Chapter/Verse Number Parsing
45
+ ```text
46
+ Implement chapter/verse number parsing:
47
+ 1. Add Parslet rules for:
48
+ - Simple chapter (e.g., "3")
49
+ - Chapter-verse ("3:16")
50
+ - Verse ranges ("16-18", "3:16–4:2")
51
+ - Comma-separated verses ("16,17")
52
+ 2. Create test cases for:
53
+ - Valid/invalid number formats
54
+ - Range detection
55
+ - Punctuation handling
56
+ - Position tracking
57
+ ```
58
+
59
+ ---
60
+
61
+ ## Prompt 4: Basic Reference Parsing
62
+ ```text
63
+ Combine book and number parsing for single references:
64
+ 1. Create rule: book_name >> (chapter/verse components)
65
+ 2. Handle optional whitespace between components
66
+ 3. Add position tracking for references
67
+ 4. Test cases for:
68
+ - "Gen 1"
69
+ - "John 3:16"
70
+ - "1 Cor 15:20-22"
71
+ - Invalid partial matches
72
+ ```
73
+
74
+ ---
75
+
76
+ ## Prompt 5: Segment Class Implementation
77
+ ```text
78
+ Implement Segment class with:
79
+ - Type discrimination (:reference/:text)
80
+ - Original text storage
81
+ - Position tracking
82
+ - to_s delegation
83
+ - Validation against empty segments
84
+ Add tests verifying:
85
+ - Correct type assignment
86
+ - Position accuracy
87
+ - Original text preservation
88
+ - Empty segment filtering
89
+ ```
90
+
91
+ ---
92
+
93
+ ## Prompt 6: Text Segmentation Logic
94
+ ```text
95
+ Implement reference/text segmentation:
96
+ 1. Create parser that splits input into alternating text/reference segments
97
+ 2. Handle whitespace between references
98
+ 3. Preserve original text including newlines
99
+ 4. Test cases for:
100
+ - Mixed text/reference content
101
+ - Leading/trailing whitespace
102
+ - Consecutive references
103
+ - Newline handling
104
+ ```
105
+
106
+ ---
107
+
108
+ ## Prompt 7: BibleReferenceText Class
109
+ ```text
110
+ Implement BibleReferenceText with:
111
+ - Enumerable mixin
112
+ - #segments and #references methods
113
+ - to_s reconstruction
114
+ - Error handling (return nil on parse errors)
115
+ Test:
116
+ - Enumeration behavior
117
+ - Reference filtering
118
+ - Round-trip text reconstruction
119
+ - Nil return on invalid references
120
+ ```
121
+
122
+ ---
123
+
124
+ ## Prompt 8: Punctuation Handling
125
+ ```text
126
+ Add punctuation rules:
127
+ 1. Strip trailing punctuation from references (, . ; ! ?)
128
+ 2. Keep punctuation in original text
129
+ 3. Test cases:
130
+ - "See John 3:16!"
131
+ - "Hello (John 1:1) world"
132
+ - "Rev. 22:21?"
133
+ - Em dash in ranges ("Gen 1–2")
134
+ ```
135
+
136
+ ---
137
+
138
+ ## Prompt 9: Compound References
139
+ ```text
140
+ Implement compound reference parsing:
141
+ 1. Handle "and"/"&" separators
142
+ 2. Support cross-book ranges ("Gen 1-Exod 2")
143
+ 3. Add grammar rules for reference lists
144
+ 4. Test cases:
145
+ - "John 3:16 and 17"
146
+ - "Gen 1:1 & Exod 2:2"
147
+ - "Matt 5-7, Luke 3"
148
+ - Cross-book ranges
149
+ ```
150
+
151
+ ---
152
+
153
+ ## Prompt 10: Greedy Parsing & Edge Cases
154
+ ```text
155
+ Implement greedy parsing behavior:
156
+ 1. Longest-possible match strategy
157
+ 2. Prevent mid-reference false positives
158
+ 3. Test cases:
159
+ - "Johnjohn 3:16" (shouldn't parse)
160
+ - "1 John 2:3" vs "John 2:3"
161
+ - References followed by book names
162
+ - Invalid partial matches
163
+ ```
164
+
165
+ ---
166
+
167
+ ## Prompt 11: Error Resilience & Final Integration
168
+ ```text
169
+ Implement final error handling:
170
+ 1. Maintain original text even with parse errors
171
+ 2. Validate all position ranges
172
+ 3. Integration tests with real-world examples
173
+ 4. Test ActionText-style HTML content
174
+ 5. Verify nil return vs error raising behavior
175
+ ```
176
+
177
+ ---
178
+
179
+ ## Prompt 12: Documentation & Examples
180
+ ```text
181
+ Add final polish:
182
+ 1. README with usage examples
183
+ 2. YARD documentation
184
+ 3. Example test case from spec
185
+ 4. Version compatibility notes
186
+ 5. Caveats section
187
+ ```
188
+
189
+ Each step builds on the previous implementation with incremental complexity. Test coverage grows progressively, ensuring stability at each stage. The final integration connects all components while preserving the original text integrity.
@@ -0,0 +1,181 @@
1
+ # bible_ref_parser — Developer Specification
2
+
3
+ ## Overview
4
+ The `bible_ref_parser` gem extracts Bible references from arbitrary English-language text and returns a structured Ruby object that preserves the original content and annotates the recognized references. It's designed to be integrated into Rails applications (including ActionText) but is generic and framework-agnostic.
5
+
6
+ ## Key Features
7
+
8
+ - Parses English Bible references in natural text.
9
+ - Supports:
10
+ - Full book names and common abbreviations.
11
+ - Single references, compound references, verse/chapter ranges.
12
+ - Cross-book references.
13
+ - Greedy parsing behavior.
14
+ - Case-insensitive, whitespace-tolerant, and punctuation-aware.
15
+ - Outputs an object (`BibleReferenceText`) containing annotated segments.
16
+ - Supports reconstruction of the original text with optional embellishment.
17
+
18
+ ## Architecture
19
+
20
+ ### Top-Level Entry Point
21
+
22
+ ```ruby
23
+ BibleRefParser.parse(string)
24
+ ```
25
+
26
+ - Accepts: `String`
27
+ - Returns: `BibleReferenceText` object
28
+ - Raises `BibleRefParser::InvalidInputError` if the input is not a string.
29
+ - Returns `nil` if there are parsing errors.
30
+
31
+ ## Core Class: `BibleReferenceText`
32
+
33
+ A wrapper for the parsed result that:
34
+ - Exposes the parsed segments.
35
+ - Implements `Enumerable` for iterating over segments.
36
+ - Supports `#to_s` to reconstruct the original text.
37
+ - Allows clients to decorate or transform references.
38
+
39
+ ### Public Interface
40
+
41
+ ```ruby
42
+ class BibleReferenceText
43
+ include Enumerable
44
+
45
+ # Returns all parsed segments (reference and text)
46
+ def segments => [Segment]
47
+
48
+ # Iterate through segments
49
+ def each(&block)
50
+
51
+ # Returns only reference segments
52
+ def references => [Segment]
53
+
54
+ # Reconstructs original text
55
+ def to_s => String
56
+ end
57
+ ```
58
+
59
+ ## Class: `Segment`
60
+
61
+ Each segment represents either a Bible reference or plain text.
62
+
63
+ ### Attributes
64
+
65
+ ```ruby
66
+ segment.type # => :reference or :text
67
+ segment.original_text # => Original unaltered text of the segment
68
+ segment.position # => [start_index, end_index] in original string (only for :reference)
69
+ segment.to_s # => Delegates to original_text
70
+ ```
71
+
72
+ ### Behavior
73
+
74
+ - Empty segments are skipped.
75
+ - Whitespace before a reference is included in the `:text` segment preceding it.
76
+ - Punctuation immediately after a reference (like `.`, `,`, `!`, `...`) is excluded from the reference.
77
+ - Newlines (`
78
+ `) are treated as regular characters.
79
+ - Em dashes (`—`) are valid range indicators.
80
+ - Parentheses around references are parsed as separate text segments.
81
+ - No references may span line breaks.
82
+
83
+ ## Grammar & Parsing Details
84
+
85
+ - Built using [Parslet](https://github.com/kschiess/parslet)
86
+ - Case-insensitive parsing
87
+ - Normalizes excess internal whitespace (but preserves original in output)
88
+ - Greedy recognition: references are parsed as long as valid format continues, ending only at a new book name or unrecognized structure.
89
+
90
+ ### Recognized Reference Forms
91
+
92
+ - Book names: full names or standard abbreviations (e.g., `John`, `Jn`, `1 Cor`, `I Samuel`)
93
+ - Chapters and verses:
94
+ - `John 3`
95
+ - `John 3:16`
96
+ - `John 3:16-18`
97
+ - `John 3:16–4:2` (multi-chapter)
98
+ - `John 3-4`
99
+ - `John 3:16, 17`
100
+ - `John 3:16 and 17`
101
+ - `John 3:16 & 17`
102
+ - Compound references:
103
+ - `John 3:16 and 17` → one reference
104
+ - `John 3:16 and Gen 1:1` → two references
105
+ - Cross-book references:
106
+ - `Gen 1:1–Exod 2:3` → single compound reference
107
+ - Supports dashes and em dashes as range separators
108
+ - Allows hyphens/dashes between book name and chapter: `John-3:16`, `John—3:16`
109
+
110
+ ### Not Supported (Treated as plain text)
111
+
112
+ - Invalid or broken references (e.g., split across lines)
113
+ - Book names without chapter/verse (e.g., just `Genesis`)
114
+ - Roman numerals in chapter/verse (but allowed in book names)
115
+ - URLs or text that resembles a reference but isn't
116
+ - Parentheses, brackets or quotes are not part of the reference
117
+
118
+ ## Error Handling
119
+
120
+ - Non-string inputs: Raise `BibleRefParser::InvalidInputError`
121
+ - Parser errors (e.g., malformed reference syntax): return `nil`
122
+ - Even on errors, `to_s` must still return the original input string
123
+ - Segments preserve all original content exactly
124
+
125
+ ## Configuration (Future Feature)
126
+
127
+ - Eventually allow for:
128
+ - Custom book lists (e.g., apocrypha)
129
+ - Custom abbreviations
130
+ - For now, built-in list of the 66 canonical books only
131
+
132
+ ## Implementation Notes
133
+
134
+ - Keep a **small, clear class hierarchy**.
135
+ - Parslet grammar should be isolated and tested.
136
+ - No normalization to OSIS or other external formats (for now).
137
+ - No database or external dependencies.
138
+ - Minimize gem dependencies wherever possible.
139
+
140
+ ## Compatibility
141
+
142
+ - Ruby `>= 3.0`
143
+ - SemVer-compliant versioning
144
+ - Designed for integration with Rails (including ActionText), but not Rails-dependent
145
+
146
+ ## Testing Plan
147
+
148
+ - RSpec test suite with 100% segment and parser coverage
149
+ - Fixtures for:
150
+ - Valid single references
151
+ - Compound references
152
+ - Punctuation-adjacent references
153
+ - Invalid or partial references
154
+ - Multi-line inputs
155
+ - References inside ActionText-style content
156
+ - Test `#segments`, `#references`, `#to_s`, and mutation behavior
157
+
158
+ ## Documentation
159
+
160
+ - Usage examples in README
161
+ - YARD-style comments for all public classes and methods
162
+ - Document edge cases clearly
163
+ - Include a section for "Gotchas" or "Caveats" in the README
164
+
165
+ ## Example Usage
166
+
167
+ ```ruby
168
+ input = "Check out John 3:16 and Gen 1:1!"
169
+ parsed = BibleRefParser.parse(input)
170
+
171
+ parsed.segments.each do |segment|
172
+ if segment.type == :reference
173
+ puts "Reference: #{segment.original_text} at #{segment.position}"
174
+ else
175
+ puts "Text: #{segment.original_text}"
176
+ end
177
+ end
178
+
179
+ # Reconstruct the original:
180
+ puts parsed.to_s
181
+ ```
@@ -0,0 +1,131 @@
1
+ # bible_ref_parser — Development Checklist
2
+
3
+ ## Setup & Infrastructure
4
+ - [ ] Initialize gem: `bundle gem bible_ref_parser`
5
+ - [ ] Add dependencies: `parslet (~> 2.0)`, `rspec`
6
+ - [ ] Set up RSpec with `bundle exec rspec --init`
7
+ - [ ] Create `lib/bible_ref_parser.rb` with module scaffold
8
+ - [ ] Define `BibleRefParser::InvalidInputError` class
9
+ - [ ] Add empty placeholder classes: `BibleReferenceText`, `Segment`
10
+ - [ ] Write spec: Raise `InvalidInputError` for non-string inputs
11
+
12
+ ---
13
+
14
+ ## Core Parser Implementation
15
+ ### Book Names
16
+ - [ ] Create `lib/bible_ref_parser/books.rb` with canonical book list (66 books)
17
+ - [ ] Parslet grammar rules for:
18
+ - [ ] Full book names (e.g., "Genesis")
19
+ - [ ] Numeric prefixes ("1 Corinthians", "III John")
20
+ - [ ] Common abbreviations ("Gen", "Jn")
21
+ - [ ] Tests:
22
+ - [ ] Match all 66 books in all formats
23
+ - [ ] Case insensitivity (e.g., "gen" vs "GEN")
24
+ - [ ] Reject non-biblical names (e.g., "Harry Potter")
25
+
26
+ ### Chapter/Verse Parsing
27
+ - [ ] Parslet rules for:
28
+ - [ ] Simple chapter (`3`)
29
+ - [ ] Chapter-verse (`3:16`)
30
+ - [ ] Ranges (`16-18`, `3:16–4:2`)
31
+ - [ ] Comma-separated verses (`16,17`)
32
+ - [ ] Tests:
33
+ - [ ] Valid numbers (`John 3:16-18`)
34
+ - [ ] Invalid formats (`John 3:abc`)
35
+ - [ ] Position tracking accuracy
36
+
37
+ ### Reference Assembly
38
+ - [ ] Combine book + chapter/verse rules into `reference` rule
39
+ - [ ] Handle optional whitespace between components
40
+ - [ ] Tests:
41
+ - [ ] Basic references (`Gen 1`, `John 3:16`)
42
+ - [ ] Edge cases (`1 Cor 15:20-22`)
43
+
44
+ ---
45
+
46
+ ## Segmentation & Classes
47
+ ### Segment Class
48
+ - [ ] Implement `type` attribute (`:reference`/`:text`)
49
+ - [ ] Store `original_text` and `position`
50
+ - [ ] Add `to_s` delegation to `original_text`
51
+ - [ ] Validate and skip empty segments
52
+ - [ ] Tests:
53
+ - [ ] Segment type assignment
54
+ - [ ] Position accuracy
55
+ - [ ] Empty segment filtering
56
+
57
+ ### Text Segmentation
58
+ - [ ] Split input into alternating text/reference segments
59
+ - [ ] Preserve whitespace/newlines in original text
60
+ - [ ] Tests:
61
+ - [ ] Mixed content (`"Hello John 3:16!"`)
62
+ - [ ] Consecutive references (`Gen 1:1, Exod 2:2`)
63
+ - [ ] Newline handling (`"John\n3:16"`)
64
+
65
+ ### BibleReferenceText Class
66
+ - [ ] Implement `Enumerable` with `each` method
67
+ - [ ] Add `#segments` and `#references` filters
68
+ - [ ] Reconstruct original text via `to_s`
69
+ - [ ] Return `nil` on parser errors
70
+ - [ ] Tests:
71
+ - [ ] Enumeration behavior
72
+ - [ ] Round-trip reconstruction (`parsed.to_s == input`)
73
+
74
+ ---
75
+
76
+ ## Advanced Parsing
77
+ ### Punctuation Handling
78
+ - [ ] Strip trailing punctuation from references (`!`, `,`, `.`)
79
+ - [ ] Preserve punctuation in original text
80
+ - [ ] Tests:
81
+ - [ ] `"John 3:16!"` → reference is `John 3:16`
82
+ - [ ] Parentheses handling (`(Gen 1:1)`)
83
+ - [ ] Em dash in ranges (`Gen 1–2`)
84
+
85
+ ### Compound References
86
+ - [ ] Support `and`/`&` separators
87
+ - [ ] Cross-book ranges (`Gen 1:1–Exod 2:3`)
88
+ - [ ] Tests:
89
+ - [ ] `John 3:16 and 17` → single reference
90
+ - [ ] `Gen 1:1 & Exod 2:2` → two references
91
+ - [ ] Cross-book range parsing
92
+
93
+ ### Greedy Parsing
94
+ - [ ] Longest-possible match strategy
95
+ - [ ] Prevent false positives (e.g., `Johnjohn 3:16`)
96
+ - [ ] Tests:
97
+ - [ ] Ambiguous cases (`1 John 2:3` vs `John 2:3`)
98
+ - [ ] References followed by book names (`Gen 1:1 Exodus`)
99
+
100
+ ---
101
+
102
+ ## Error Handling & Integration
103
+ - [ ] Ensure `to_s` returns original text even on errors
104
+ - [ ] Validate all `position` ranges are within input bounds
105
+ - [ ] Integration tests:
106
+ - [ ] Real-world examples (e.g., sermon text)
107
+ - [ ] ActionText-style HTML content
108
+ - [ ] Finalize error behavior:
109
+ - [ ] Raise `InvalidInputError` for non-strings
110
+ - [ ] Return `nil` for malformed references
111
+
112
+ ---
113
+
114
+ ## Documentation & Polish
115
+ - [ ] Write README with:
116
+ - [ ] Usage examples
117
+ - [ ] Key features/limitations
118
+ - [ ] Version compatibility (Ruby >= 3.0)
119
+ - [ ] Add YARD comments to public methods
120
+ - [ ] Include example from spec (`John 3:16 and Gen 1:1`)
121
+ - [ ] Document caveats:
122
+ - [ ] No support for standalone book names
123
+ - [ ] Parentheses treated as plain text
124
+
125
+ ---
126
+
127
+ ## Final Checks
128
+ - [ ] Verify 100% test coverage for segments/parser
129
+ - [ ] Run RuboCop for style consistency
130
+ - [ ] Test installation in a dummy Rails app
131
+ - [ ] Publish gem to RubyGems (optional)
@@ -0,0 +1,35 @@
1
+ module BibleRefParser
2
+ class BibleReferenceText
3
+ include Enumerable
4
+
5
+ def initialize(text)
6
+ raise InvalidInputError, "Expected a string, got #{text.class}" unless text.respond_to?(:to_str)
7
+ @original_text = text.to_str
8
+ @segments = parse_segments
9
+ end
10
+
11
+ def segments
12
+ @segments || []
13
+ end
14
+
15
+ def references
16
+ segments.select(&:reference?)
17
+ end
18
+
19
+ def each(&block)
20
+ segments.each(&block)
21
+ end
22
+
23
+ def to_s
24
+ segments.map(&:to_s).join
25
+ end
26
+
27
+ private
28
+
29
+ def parse_segments
30
+ SegmentParser.new.parse(@original_text)
31
+ rescue Parslet::ParseFailed
32
+ nil
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BibleRefParser
4
+ BOOKS = {
5
+ # Old Testament (39 books)
6
+ "Genesis" => ["Gen", "Ge", "Gn"],
7
+ "Exodus" => ["Ex", "Exod", "Exo"],
8
+ "Leviticus" => ["Lev", "Le", "Lv"],
9
+ "Numbers" => ["Num", "Nu", "Nm", "Nb"],
10
+ "Deuteronomy" => ["Deut", "De", "Dt"],
11
+ "Joshua" => ["Josh", "Jos", "Jsh"],
12
+ "Judges" => ["Judg", "Jdg", "Jg", "Jdgs"],
13
+ "Ruth" => ["Rth", "Ru"],
14
+ "1 Samuel" => ["1 Sam", "1 Sm", "1 Sa", "1S"],
15
+ "2 Samuel" => ["2 Sam", "2 Sm", "2 Sa", "2S"],
16
+ "1 Kings" => ["1 Kgs", "1 Ki", "1K"],
17
+ "2 Kings" => ["2 Kgs", "2 Ki", "2K"],
18
+ "1 Chronicles" => ["1 Chron", "1 Chr", "1 Ch", "1Ch"],
19
+ "2 Chronicles" => ["2 Chron", "2 Chr", "2 Ch", "2Ch"],
20
+ "Ezra" => ["Ezr", "Ez"],
21
+ "Nehemiah" => ["Neh", "Ne"],
22
+ "Esther" => ["Est", "Es"],
23
+ "Job" => ["Jb"],
24
+ "Psalms" => ["Psalm", "Ps", "Psa", "Psm", "Pss"],
25
+ "Proverbs" => ["Prov", "Pr", "Prv"],
26
+ "Ecclesiastes" => ["Eccl", "Ecc", "Qoh"],
27
+ "Song of Solomon" => ["Song", "SOS", "Song of Songs"],
28
+ "Isaiah" => ["Isa", "Is"],
29
+ "Jeremiah" => ["Jer", "Je", "Jr"],
30
+ "Lamentations" => ["Lam", "La"],
31
+ "Ezekiel" => ["Ezek", "Eze", "Ezk"],
32
+ "Daniel" => ["Dan", "Da", "Dn"],
33
+ "Hosea" => ["Hos", "Ho"],
34
+ "Joel" => ["Jl"],
35
+ "Amos" => ["Am"],
36
+ "Obadiah" => ["Obad", "Ob"],
37
+ "Jonah" => ["Jon", "Jnh"],
38
+ "Micah" => ["Mic", "Mc"],
39
+ "Nahum" => ["Nah", "Na"],
40
+ "Habakkuk" => ["Hab", "Hb"],
41
+ "Zephaniah" => ["Zeph", "Zep", "Zp"],
42
+ "Haggai" => ["Hag", "Hg"],
43
+ "Zechariah" => ["Zech", "Zec", "Zc"],
44
+ "Malachi" => ["Mal", "Ml"],
45
+
46
+ # New Testament (27 books)
47
+ "Matthew" => ["Matt", "Mt"],
48
+ "Mark" => ["Mrk", "Mk", "Mr"],
49
+ "Luke" => ["Luk", "Lk"],
50
+ "John" => ["Jn", "Jhn"],
51
+ "Acts" => ["Act", "Ac"],
52
+ "Romans" => ["Rom", "Ro", "Rm"],
53
+ "1 Corinthians" => ["1 Cor", "1 Co", "1C"],
54
+ "2 Corinthians" => ["2 Cor", "2 Co", "2C"],
55
+ "Galatians" => ["Gal", "Ga"],
56
+ "Ephesians" => ["Eph", "Ep"],
57
+ "Philippians" => ["Phil", "Php", "Pp"],
58
+ "Colossians" => ["Col", "Co"],
59
+ "1 Thessalonians" => ["1 Thess", "1 Thes", "1 Th", "1Th"],
60
+ "2 Thessalonians" => ["2 Thess", "2 Thes", "2 Th", "2Th"],
61
+ "1 Timothy" => ["1 Tim", "1 Ti", "1T"],
62
+ "2 Timothy" => ["2 Tim", "2 Ti", "2T"],
63
+ "Titus" => ["Tit", "Ti"],
64
+ "Philemon" => ["Philem", "Phm", "Pm"],
65
+ "Hebrews" => ["Heb"],
66
+ "James" => ["Jas", "Jm"],
67
+ "1 Peter" => ["1 Pet", "1 Pe", "1P", "1 Pt"],
68
+ "2 Peter" => ["2 Pet", "2 Pe", "2P", "2 Pt"],
69
+ "1 John" => ["1 Jn", "1 Jhn", "1J", "1 Jo"],
70
+ "2 John" => ["2 Jn", "2 Jhn", "2J", "2 Jo"],
71
+ "3 John" => ["3 Jn", "3 Jhn", "3J", "3 Jo"],
72
+ "Jude" => ["Jd"],
73
+ "Revelation" => ["Rev", "Re", "Rv"]
74
+ }.freeze
75
+
76
+ def self.book_names(alternative_names: true)
77
+ BOOKS.keys
78
+ .then { |full_names| alternative_names ? full_names + BOOKS.values.flatten : full_names }
79
+ end
80
+ end
@@ -0,0 +1,121 @@
1
+ # lib/parser.rb
2
+
3
+ require "parslet"
4
+
5
+ module BibleRefParser
6
+ class Parser < Parslet::Parser
7
+ def initialize(book_names: BibleRefParser.book_names)
8
+ @book_names = book_names
9
+ super()
10
+ end
11
+
12
+ def istr(string)
13
+ # Case-insensitve, whitespace-optional, allow to be terminated with a dot
14
+ string
15
+ .chars
16
+ .map { |char| (char == " ") ? space? : match["#{char.downcase}#{char.upcase}"] }
17
+ .reduce(:>>) >> dot?
18
+ end
19
+
20
+ rule(:space) { match('\s').repeat(1) }
21
+ rule(:space?) { space.maybe }
22
+
23
+ rule(:dot) { match('\.').repeat(1) }
24
+ rule(:dot?) { dot.maybe }
25
+
26
+ rule(:book_name) do
27
+ # Sort book names by length (longest first) for greedy matching
28
+ @book_names
29
+ .sort_by { |name| -name.length }
30
+ .map { |book_name| istr(book_name) }
31
+ .reduce(:|)
32
+ .as(:book)
33
+ end
34
+
35
+ rule(:range_sep) do
36
+ # Dash, en dash or em dash, surrounded by optional whitespace
37
+ space? >> match["-–—"] >> space?
38
+ end
39
+
40
+ rule(:list_sep) do
41
+ space? >> (match[",;&"] | str("and")) >> space? >> book_name.absent?
42
+ end
43
+
44
+ rule(:colon) do
45
+ space? >> str(":") >> space?
46
+ end
47
+
48
+ rule(:reference_int) do
49
+ match("[1-9]") >> match("[0-9]").repeat(0)
50
+ end
51
+
52
+ rule(:chapter) do
53
+ reference_int.as(:chapter)
54
+ end
55
+
56
+ rule(:chapter_range) do
57
+ (chapter.as(:start) >> range_sep >> chapter.as(:end)).as(:chapter_range)
58
+ end
59
+
60
+ rule(:verse) do
61
+ reference_int.as(:verse)
62
+ end
63
+
64
+ rule(:verse_range) do
65
+ (verse.as(:start) >> range_sep >> verse.as(:end)).as(:verse_range)
66
+ end
67
+
68
+ rule(:explicit_verse) do
69
+ (chapter >> colon >> verse).as(:explicit_verse)
70
+ end
71
+
72
+ rule(:explicit_verse_range) do
73
+ (explicit_verse.as(:start) >> range_sep >> explicit_verse.as(:end)).as(:explicit_verse_range)
74
+ end
75
+
76
+ rule(:verse_list) do
77
+ (verse >> (list_sep >> verse).repeat(1)).as(:verse_list)
78
+ end
79
+
80
+ rule(:chapter_with_verse_list) do
81
+ (chapter >> colon >> verse_list).as(:chapter_with_verse_list)
82
+ end
83
+
84
+ rule(:chapter_with_verse_range) do
85
+ (chapter >> colon >> verse_range).as(:chapter_with_verse_range)
86
+ end
87
+
88
+ rule(:chapter_with_verses) do
89
+ chapter_with_verse_list | chapter_with_verse_range
90
+ end
91
+
92
+ rule(:indicator) do
93
+ explicit_verse_range | chapter_range | chapter_with_verses | explicit_verse | chapter
94
+ end
95
+
96
+ rule(:indicator_list) do
97
+ indicator.repeat(1).as(:indicator_list) |
98
+ (indicator >> (list_sep >> indicator).repeat(1)).as(:indicator_list)
99
+ end
100
+
101
+ rule(:cross_book_range) do
102
+ (
103
+ (book_name >> space? >> (explicit_verse | chapter)).as(:start) >>
104
+ range_sep >>
105
+ (book_name >> space? >> (explicit_verse | chapter)).as(:end)
106
+ ).as(:cross_book_range)
107
+ end
108
+
109
+ rule(:reference) { cross_book_range | (book_name >> space? >> indicator_list) }
110
+
111
+ rule(:non_reference_text) { (reference.absent? >> any).repeat(1) }
112
+
113
+ rule(:html_tag) { str("<") >> (str(">").absent? >> any).repeat >> str(">") }
114
+
115
+ rule(:document) do
116
+ (html_tag.absent? >> (reference.as(:reference) | non_reference_text.as(:text))).repeat.as(:segments)
117
+ end
118
+
119
+ root(:document)
120
+ end
121
+ end
@@ -0,0 +1,60 @@
1
+ # lib/segment.rb
2
+
3
+ module BibleRefParser
4
+ # Class to represent a segment of a Bible reference
5
+ class Segment
6
+ attr_reader :type, :original_text, :position
7
+
8
+ # @param type [Symbol] :reference or :text
9
+ # @param original_text [String] The original text segment
10
+ # @param position [Range] The character positions in the original string
11
+ def initialize(type, original_text, position)
12
+ @type = type
13
+ @original_text = original_text
14
+ @position = position
15
+ validate!
16
+ end
17
+
18
+ # Returns the original text
19
+ def to_s
20
+ original_text
21
+ end
22
+
23
+ # @return [Boolean] true if this is a Bible reference segment
24
+ def reference?
25
+ type == :reference
26
+ end
27
+
28
+ # @return [Boolean] true if this is a plain text segment
29
+ def text?
30
+ type == :text
31
+ end
32
+
33
+ private
34
+
35
+ def validate!
36
+ raise ArgumentError, "Type must be :reference or :text" unless [:reference, :text].include?(type)
37
+ validate_position!
38
+ validate_text!
39
+ end
40
+
41
+ private
42
+
43
+ def validate_position!
44
+ unless position.is_a?(Range) &&
45
+ position.begin.is_a?(Integer) &&
46
+ position.end.is_a?(Integer) &&
47
+ position.begin >= 0 &&
48
+ position.end >= position.begin
49
+ raise InvalidInputError, "Invalid position range: #{position.inspect}"
50
+ end
51
+ end
52
+
53
+ def validate_text!
54
+ if reference?
55
+ raise InvalidInputError, "Empty reference" if original_text.strip.empty?
56
+ raise InvalidInputError, "Reference contains HTML" if original_text.match?(/<[^>]+>/)
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,77 @@
1
+ module BibleRefParser
2
+ class SegmentParser
3
+ def initialize(book_names: BibleRefParser.book_names)
4
+ @reference_parser = Parser.new(book_names: book_names)
5
+ end
6
+
7
+ def parse(text)
8
+ text = text.to_s.dup.freeze
9
+ begin
10
+ @tree = @reference_parser.parse(text)[:segments]
11
+ segments = build_segments(text)
12
+ validate_segments!(segments, text)
13
+ segments
14
+ rescue Parslet::ParseFailed, InvalidInputError
15
+ # Return single text segment containing full original text on error
16
+ [Segment.new(:text, text, 0...text.length)]
17
+ end
18
+ end
19
+
20
+ def get_last_element(element)
21
+ if element.is_a?(Hash)
22
+ return get_last_element(element.values.last)
23
+ elsif element.is_a?(Array)
24
+ return get_last_element(element.last)
25
+ end
26
+
27
+ element
28
+ end
29
+
30
+ private
31
+
32
+ def build_segments(text)
33
+ positions = segment_positions
34
+ raise InvalidInputError, "Position mismatch" unless positions.size == @tree.size
35
+
36
+ @tree.each_with_index.map do |seg, i|
37
+ pos = positions[i]
38
+ seg_text = text[pos]
39
+ Segment.new(seg.keys.first, seg_text, pos)
40
+ end
41
+ end
42
+
43
+ def validate_segments!(segments, full_text)
44
+ # Verify the segments cover the entire input
45
+ coverage = segments.map(&:position).reduce(0..0) { |a, b| a.begin..b.end }
46
+ unless coverage.begin == 0 && coverage.end == full_text.length
47
+ raise InvalidInputError, "Input contains unparsed content"
48
+ end
49
+ end
50
+
51
+ def position(first_slice, last_slice = first_slice)
52
+ first_slice.offset...(last_slice.offset + last_slice.size)
53
+ end
54
+
55
+ def segment_positions
56
+ @segment_positions ||= @tree.collect do |seg|
57
+ if seg.key?(:text)
58
+ t = seg[:text]
59
+ position(t)
60
+ elsif seg.key?(:reference)
61
+ ref = seg[:reference]
62
+ book = ref[:book]
63
+ last = get_last_element(ref)
64
+ position(book, last)
65
+ end
66
+ end
67
+ end
68
+
69
+ def segment_texts(text)
70
+ @segment_texts ||= segment_positions.collect { |pos| text[pos] }
71
+ end
72
+
73
+ def segment_types
74
+ @segment_types ||= @tree.collect { |seg| seg.keys }.flatten
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BibleRefParser
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "bible_ref_parser/version"
4
+
5
+ require_relative "bible_ref_parser/bible_reference_text"
6
+ require_relative "bible_ref_parser/books"
7
+ require_relative "bible_ref_parser/parser"
8
+ require_relative "bible_ref_parser/segment"
9
+ require_relative "bible_ref_parser/segment_parser"
10
+
11
+ module BibleRefParser
12
+ # Custom error class for invalid input
13
+ class InvalidInputError < StandardError; end
14
+ end
@@ -0,0 +1,4 @@
1
+ module BibleRefParser
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
metadata ADDED
@@ -0,0 +1,172 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bible_ref_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Fabio Papa
8
+ bindir: exe
9
+ cert_chain: []
10
+ date: 2025-05-03 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: parslet
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '2.0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '2.0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: pry
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: 0.15.0
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 0.15.0
40
+ - !ruby/object:Gem::Dependency
41
+ name: pry-byebug
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ type: :development
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: rdoc
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ - !ruby/object:Gem::Dependency
69
+ name: reline
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ - !ruby/object:Gem::Dependency
83
+ name: rspec
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '3.0'
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '3.0'
96
+ - !ruby/object:Gem::Dependency
97
+ name: standard
98
+ requirement: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: '1.3'
103
+ type: :development
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '1.3'
110
+ - !ruby/object:Gem::Dependency
111
+ name: yard
112
+ requirement: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ type: :development
118
+ prerelease: false
119
+ version_requirements: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ description: A Ruby gem for parsing an normalizing Bible references
125
+ email:
126
+ - fabtheman@gmail.com
127
+ executables: []
128
+ extensions: []
129
+ extra_rdoc_files: []
130
+ files:
131
+ - ".rspec"
132
+ - ".standard.yml"
133
+ - LICENSE.txt
134
+ - README.md
135
+ - Rakefile
136
+ - ai_artifacts/prompt_plan.md
137
+ - ai_artifacts/spec.md
138
+ - ai_artifacts/todo.md
139
+ - lib/bible_ref_parser.rb
140
+ - lib/bible_ref_parser/bible_reference_text.rb
141
+ - lib/bible_ref_parser/books.rb
142
+ - lib/bible_ref_parser/parser.rb
143
+ - lib/bible_ref_parser/segment.rb
144
+ - lib/bible_ref_parser/segment_parser.rb
145
+ - lib/bible_ref_parser/version.rb
146
+ - sig/bible_ref_parser.rbs
147
+ homepage: https://github.com/fapapa/bible_ref_parser
148
+ licenses:
149
+ - MIT
150
+ metadata:
151
+ allowed_push_host: https://rubygems.org
152
+ homepage_uri: https://github.com/fapapa/bible_ref_parser
153
+ source_code_uri: https://github.com/fapapa/bible_ref_parser
154
+ changelog_uri: https://github.com/fapapa/bible_ref_parser/blob/main/CHANGELOG.md
155
+ rdoc_options: []
156
+ require_paths:
157
+ - lib
158
+ required_ruby_version: !ruby/object:Gem::Requirement
159
+ requirements:
160
+ - - ">="
161
+ - !ruby/object:Gem::Version
162
+ version: 3.1.0
163
+ required_rubygems_version: !ruby/object:Gem::Requirement
164
+ requirements:
165
+ - - ">="
166
+ - !ruby/object:Gem::Version
167
+ version: '0'
168
+ requirements: []
169
+ rubygems_version: 3.6.2
170
+ specification_version: 4
171
+ summary: A parser for Bible references
172
+ test_files: []