bible_ref_parser 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +45 -0
- data/lib/bible_ref_parser/bible_reference.rb +24 -1
- data/lib/bible_ref_parser/parser.rb +9 -3
- data/lib/bible_ref_parser/reference_node.rb +85 -0
- data/lib/bible_ref_parser/segment.rb +3 -1
- data/lib/bible_ref_parser/segment_node.rb +45 -0
- data/lib/bible_ref_parser/segment_parser.rb +2 -49
- data/lib/bible_ref_parser/version.rb +1 -1
- data/lib/bible_ref_parser.rb +2 -0
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 785c346d7af39dc9c8f2eee1088d443fb78ff980c33572bd27c28610ba364b04
|
|
4
|
+
data.tar.gz: 87fcbaa84fb663b042352e4f78339a940922b673a62a76dd0bca0af68ca07d36
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a7d0e3c59c1af88185012447a6f7db2e2feb13a351fa3b524bf3cdac98c4c5dbdb387bd1a467d6b08140c4bc8ea7a241331e2c0955c8ed0e18bc19a12d8526fc
|
|
7
|
+
data.tar.gz: 79230b5b2b8a4b3bf1f2626089a3bc776552f92ca1f6ea5ec05d2364e2de1b976f5177f8ab37fef85c85212895f4e1d1a3321ad71c9820f157489f2b7f518c9c
|
data/CLAUDE.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project
|
|
6
|
+
|
|
7
|
+
Ruby gem that parses Bible references from text using Parslet (~> 2.0) PEG parser. Given a string like `"Read John 3:16 and Romans 8:28"`, it segments the text into reference and plain-text parts with character positions.
|
|
8
|
+
|
|
9
|
+
## Commands
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
rake # Default: runs ci (spec + standard + yard)
|
|
13
|
+
rake spec # Run all RSpec tests
|
|
14
|
+
bundle exec rspec spec/lib/bible_ref_parser/parser_spec.rb # Run a single spec file
|
|
15
|
+
bundle exec rspec spec/lib/bible_ref_parser/parser_spec.rb:42 # Run a single example by line
|
|
16
|
+
rake standard # Lint with StandardRB
|
|
17
|
+
bundle exec standardrb --fix # Auto-fix lint issues
|
|
18
|
+
rake yard # Generate YARD docs
|
|
19
|
+
bin/console # Pry REPL with gem loaded
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Architecture
|
|
23
|
+
|
|
24
|
+
**Parsing pipeline:** `BibleReferenceText.new(text)` → `SegmentParser.parse(text)` → `Parser` (Parslet grammar) → array of `Segment` objects.
|
|
25
|
+
|
|
26
|
+
- **Parser** (`lib/bible_ref_parser/parser.rb`) — Parslet grammar defining rules for book names, chapter:verse notation, ranges, cross-book ranges, and verse lists. Case-insensitive matching with abbreviation support.
|
|
27
|
+
- **SegmentParser** (`lib/bible_ref_parser/segment_parser.rb`) — Converts the Parslet parse tree into `Segment` objects, tracking character positions in the original text.
|
|
28
|
+
- **Segment** (`lib/bible_ref_parser/segment.rb`) — Represents a `:reference` or `:text` segment with original text and position offsets.
|
|
29
|
+
- **BibleReferenceText** (`lib/bible_ref_parser/bible_reference_text.rb`) — Main API class. Includes `Enumerable` and delegates array-like methods to its segments array.
|
|
30
|
+
- **BibleReference** (`lib/bible_ref_parser/bible_reference.rb`) — `Data` class holding text, start_offset, end_offset.
|
|
31
|
+
- **Books** (`lib/bible_ref_parser/books.rb`) — Dictionary of 66 Bible books (39 OT, 27 NT) with abbreviation variants.
|
|
32
|
+
|
|
33
|
+
On parse failure, the library gracefully degrades to a single text segment rather than raising.
|
|
34
|
+
|
|
35
|
+
## Testing
|
|
36
|
+
|
|
37
|
+
RSpec with custom matchers in `spec/support/matchers/reference_parser_matchers.rb`. The custom `parse` matcher validates Parslet rule output. Tests cover unit, integration, and edge cases (Unicode, HTML tags, invalid input).
|
|
38
|
+
|
|
39
|
+
## Linting
|
|
40
|
+
|
|
41
|
+
StandardRB configured in `.standard.yml` targeting Ruby 2.7. Max method length: 15 lines.
|
|
42
|
+
|
|
43
|
+
## CI
|
|
44
|
+
|
|
45
|
+
GitHub Actions (`.github/workflows/main.yml`): Ruby 3.4.2 on ubuntu-latest, runs `bundle exec rake` on push to main and PRs.
|
|
@@ -1,5 +1,28 @@
|
|
|
1
1
|
module BibleRefParser
|
|
2
|
-
BibleReference = Data.define(:text, :start_offset, :end_offset) do
|
|
2
|
+
BibleReference = Data.define(:text, :start_offset, :end_offset, :book_name, :_parts) do
|
|
3
|
+
def initialize(text:, start_offset:, end_offset:, book_name: nil, parts: [])
|
|
4
|
+
super(
|
|
5
|
+
text: text, start_offset: start_offset, end_offset: end_offset,
|
|
6
|
+
book_name: book_name, _parts: parts.freeze
|
|
7
|
+
)
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def parts
|
|
11
|
+
_parts.empty? ? [self] : _parts
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def compound?
|
|
15
|
+
parts.size > 1
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def qualified_text
|
|
19
|
+
if book_name && !text.start_with?(book_name)
|
|
20
|
+
"#{book_name} #{text}"
|
|
21
|
+
else
|
|
22
|
+
text
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
3
26
|
def to_s
|
|
4
27
|
text
|
|
5
28
|
end
|
|
@@ -73,8 +73,14 @@ module BibleRefParser
|
|
|
73
73
|
(explicit_verse.as(:start) >> range_sep >> explicit_verse.as(:end)).as(:explicit_verse_range)
|
|
74
74
|
end
|
|
75
75
|
|
|
76
|
+
# A verse number that is not followed by a colon, preventing
|
|
77
|
+
# it from consuming a chapter number in a chapter:verse pair
|
|
78
|
+
rule(:standalone_verse) do
|
|
79
|
+
(reference_int >> colon.absent?).as(:verse)
|
|
80
|
+
end
|
|
81
|
+
|
|
76
82
|
rule(:verse_list) do
|
|
77
|
-
(verse >> (list_sep >>
|
|
83
|
+
(verse >> (list_sep >> standalone_verse).repeat(1)).as(:verse_list)
|
|
78
84
|
end
|
|
79
85
|
|
|
80
86
|
rule(:chapter_with_verse_list) do
|
|
@@ -94,8 +100,8 @@ module BibleRefParser
|
|
|
94
100
|
end
|
|
95
101
|
|
|
96
102
|
rule(:indicator_list) do
|
|
97
|
-
indicator.repeat(1).as(:indicator_list) |
|
|
98
|
-
|
|
103
|
+
(indicator >> (list_sep >> indicator).repeat(1)).as(:indicator_list) |
|
|
104
|
+
indicator.repeat(1).as(:indicator_list)
|
|
99
105
|
end
|
|
100
106
|
|
|
101
107
|
rule(:cross_book_range) do
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
module BibleRefParser
|
|
2
|
+
# Wraps a raw reference hash from the Parslet parse tree.
|
|
3
|
+
# Encapsulates all slice traversal and position extraction logic,
|
|
4
|
+
# so callers can work with named concepts instead of raw nested hashes.
|
|
5
|
+
class ReferenceNode
|
|
6
|
+
def initialize(ref_hash)
|
|
7
|
+
@ref_hash = ref_hash
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def book_name
|
|
11
|
+
@ref_hash[:book]&.to_s
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# The first Parslet::Slice in the entire reference structure
|
|
15
|
+
def first_slice
|
|
16
|
+
leaf_first(@ref_hash)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# The last Parslet::Slice in the entire reference structure
|
|
20
|
+
def last_slice
|
|
21
|
+
leaf_last(@ref_hash)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Character range covered by this reference in the original text
|
|
25
|
+
def position
|
|
26
|
+
book = @ref_hash[:book]
|
|
27
|
+
last = last_slice
|
|
28
|
+
book.offset...(last.offset + last.size)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def compound?
|
|
32
|
+
indicators.is_a?(Array) && indicators.size > 1
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def to_bible_reference(text)
|
|
36
|
+
BibleReference.new(
|
|
37
|
+
text: text[position],
|
|
38
|
+
start_offset: position.begin,
|
|
39
|
+
end_offset: position.end,
|
|
40
|
+
book_name: book_name,
|
|
41
|
+
parts: extract_parts(text)
|
|
42
|
+
)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def indicators
|
|
48
|
+
@ref_hash[:indicator_list]
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def extract_parts(text)
|
|
52
|
+
return [] unless compound?
|
|
53
|
+
|
|
54
|
+
indicators.each_with_index.map do |indicator, idx|
|
|
55
|
+
first = leaf_first(indicator)
|
|
56
|
+
last = leaf_last(indicator)
|
|
57
|
+
end_pos = last.offset + last.size
|
|
58
|
+
start_pos = (idx == 0) ? @ref_hash[:book].offset : first.offset
|
|
59
|
+
|
|
60
|
+
BibleReference.new(
|
|
61
|
+
text: text[start_pos...end_pos],
|
|
62
|
+
start_offset: start_pos,
|
|
63
|
+
end_offset: end_pos,
|
|
64
|
+
book_name: book_name
|
|
65
|
+
)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def leaf_first(node)
|
|
70
|
+
case node
|
|
71
|
+
when Hash then leaf_first(node.values.first)
|
|
72
|
+
when Array then leaf_first(node.first)
|
|
73
|
+
else node
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def leaf_last(node)
|
|
78
|
+
case node
|
|
79
|
+
when Hash then leaf_last(node.values.last)
|
|
80
|
+
when Array then leaf_last(node.last)
|
|
81
|
+
else node
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
@@ -8,10 +8,11 @@ module BibleRefParser
|
|
|
8
8
|
# @param type [Symbol] :reference or :text
|
|
9
9
|
# @param original_text [String] The original text segment
|
|
10
10
|
# @param position [Range] The character positions in the original string
|
|
11
|
-
def initialize(type, original_text, position)
|
|
11
|
+
def initialize(type, original_text, position, bible_reference: nil)
|
|
12
12
|
@type = type
|
|
13
13
|
@original_text = original_text
|
|
14
14
|
@position = position
|
|
15
|
+
@bible_reference = bible_reference
|
|
15
16
|
validate!
|
|
16
17
|
end
|
|
17
18
|
|
|
@@ -37,6 +38,7 @@ module BibleRefParser
|
|
|
37
38
|
# @return [BibleReference, nil] a BibleReference for reference segments, nil for text
|
|
38
39
|
def bible_reference
|
|
39
40
|
return nil unless reference?
|
|
41
|
+
return @bible_reference if @bible_reference
|
|
40
42
|
|
|
41
43
|
BibleReference.new(
|
|
42
44
|
text: original_text,
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
module BibleRefParser
|
|
2
|
+
# Wraps a raw segment hash (keyed +:reference+ or +:text+) from the Parslet
|
|
3
|
+
# parse tree and knows how to build a Segment from it.
|
|
4
|
+
class SegmentNode
|
|
5
|
+
def initialize(raw_segment)
|
|
6
|
+
@raw = raw_segment
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def reference?
|
|
10
|
+
@raw.key?(:reference)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def text?
|
|
14
|
+
@raw.key?(:text)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Character range covered by this segment in the original text
|
|
18
|
+
def position
|
|
19
|
+
if text?
|
|
20
|
+
t = @raw[:text]
|
|
21
|
+
t.offset...(t.offset + t.size)
|
|
22
|
+
else
|
|
23
|
+
reference_node.position
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def to_segment(text)
|
|
28
|
+
pos = position
|
|
29
|
+
seg_text = text[pos]
|
|
30
|
+
|
|
31
|
+
if reference?
|
|
32
|
+
bible_ref = reference_node.to_bible_reference(text)
|
|
33
|
+
Segment.new(:reference, seg_text, pos, bible_reference: bible_ref)
|
|
34
|
+
else
|
|
35
|
+
Segment.new(:text, seg_text, pos)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def reference_node
|
|
42
|
+
@reference_node ||= ReferenceNode.new(@raw[:reference])
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -7,8 +7,8 @@ module BibleRefParser
|
|
|
7
7
|
def parse(text)
|
|
8
8
|
text = text.to_s.dup.freeze
|
|
9
9
|
begin
|
|
10
|
-
|
|
11
|
-
segments =
|
|
10
|
+
tree = @reference_parser.parse(text)[:segments]
|
|
11
|
+
segments = tree.map { |raw| SegmentNode.new(raw).to_segment(text) }
|
|
12
12
|
validate_segments!(segments, text)
|
|
13
13
|
segments
|
|
14
14
|
rescue Parslet::ParseFailed, InvalidInputError
|
|
@@ -17,29 +17,8 @@ module BibleRefParser
|
|
|
17
17
|
end
|
|
18
18
|
end
|
|
19
19
|
|
|
20
|
-
def get_last_element(element)
|
|
21
|
-
if element.is_a?(Hash)
|
|
22
|
-
return get_last_element(element.values.last)
|
|
23
|
-
elsif element.is_a?(Array)
|
|
24
|
-
return get_last_element(element.last)
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
element
|
|
28
|
-
end
|
|
29
|
-
|
|
30
20
|
private
|
|
31
21
|
|
|
32
|
-
def build_segments(text)
|
|
33
|
-
positions = segment_positions
|
|
34
|
-
raise InvalidInputError, "Position mismatch" unless positions.size == @tree.size
|
|
35
|
-
|
|
36
|
-
@tree.each_with_index.map do |seg, i|
|
|
37
|
-
pos = positions[i]
|
|
38
|
-
seg_text = text[pos]
|
|
39
|
-
Segment.new(seg.keys.first, seg_text, pos)
|
|
40
|
-
end
|
|
41
|
-
end
|
|
42
|
-
|
|
43
22
|
def validate_segments!(segments, full_text)
|
|
44
23
|
# Verify the segments cover the entire input
|
|
45
24
|
coverage = segments.map(&:position).reduce(0..0) { |a, b| a.begin..b.end }
|
|
@@ -47,31 +26,5 @@ module BibleRefParser
|
|
|
47
26
|
raise InvalidInputError, "Input contains unparsed content"
|
|
48
27
|
end
|
|
49
28
|
end
|
|
50
|
-
|
|
51
|
-
def position(first_slice, last_slice = first_slice)
|
|
52
|
-
first_slice.offset...(last_slice.offset + last_slice.size)
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
def segment_positions
|
|
56
|
-
@segment_positions ||= @tree.collect do |seg|
|
|
57
|
-
if seg.key?(:text)
|
|
58
|
-
t = seg[:text]
|
|
59
|
-
position(t)
|
|
60
|
-
elsif seg.key?(:reference)
|
|
61
|
-
ref = seg[:reference]
|
|
62
|
-
book = ref[:book]
|
|
63
|
-
last = get_last_element(ref)
|
|
64
|
-
position(book, last)
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
def segment_texts(text)
|
|
70
|
-
@segment_texts ||= segment_positions.collect { |pos| text[pos] }
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
def segment_types
|
|
74
|
-
@segment_types ||= @tree.collect { |seg| seg.keys }.flatten
|
|
75
|
-
end
|
|
76
29
|
end
|
|
77
30
|
end
|
data/lib/bible_ref_parser.rb
CHANGED
|
@@ -7,6 +7,8 @@ require_relative "bible_ref_parser/bible_reference_text"
|
|
|
7
7
|
require_relative "bible_ref_parser/books"
|
|
8
8
|
require_relative "bible_ref_parser/parser"
|
|
9
9
|
require_relative "bible_ref_parser/segment"
|
|
10
|
+
require_relative "bible_ref_parser/reference_node"
|
|
11
|
+
require_relative "bible_ref_parser/segment_node"
|
|
10
12
|
require_relative "bible_ref_parser/segment_parser"
|
|
11
13
|
|
|
12
14
|
module BibleRefParser
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bible_ref_parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Fabio Papa
|
|
@@ -130,6 +130,7 @@ extra_rdoc_files: []
|
|
|
130
130
|
files:
|
|
131
131
|
- ".rspec"
|
|
132
132
|
- ".standard.yml"
|
|
133
|
+
- CLAUDE.md
|
|
133
134
|
- LICENSE.txt
|
|
134
135
|
- README.md
|
|
135
136
|
- Rakefile
|
|
@@ -141,7 +142,9 @@ files:
|
|
|
141
142
|
- lib/bible_ref_parser/bible_reference_text.rb
|
|
142
143
|
- lib/bible_ref_parser/books.rb
|
|
143
144
|
- lib/bible_ref_parser/parser.rb
|
|
145
|
+
- lib/bible_ref_parser/reference_node.rb
|
|
144
146
|
- lib/bible_ref_parser/segment.rb
|
|
147
|
+
- lib/bible_ref_parser/segment_node.rb
|
|
145
148
|
- lib/bible_ref_parser/segment_parser.rb
|
|
146
149
|
- lib/bible_ref_parser/version.rb
|
|
147
150
|
- sig/bible_ref_parser.rbs
|
|
@@ -167,7 +170,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
167
170
|
- !ruby/object:Gem::Version
|
|
168
171
|
version: '0'
|
|
169
172
|
requirements: []
|
|
170
|
-
rubygems_version:
|
|
173
|
+
rubygems_version: 4.0.8
|
|
171
174
|
specification_version: 4
|
|
172
175
|
summary: A parser for Bible references
|
|
173
176
|
test_files: []
|