bible_ref_parser 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 925e15f2c8fe0edb168af8bb3037d888fadec26ea67b9cdf5204be3ed0f2b00d
4
- data.tar.gz: a6dc3b012dbc430302a4b75d83afb3c003042c1233b002f41eb921628d61226c
3
+ metadata.gz: 8881964f74bd9d7be1e27232326be473469e457a5fbe8c9a5d234e668198e717
4
+ data.tar.gz: 9681a524b8b4f0936b4956aed6ba2918b00c66528cf5f7cb4393ddd13d7d5bfc
5
5
  SHA512:
6
- metadata.gz: 96aad3474367886425c9eaa962ea0a00885a35481b1e85ea12f790fafef0b40a09270cae2aa02f805fbb4b1565ddcc9c0182c1f78e24b1e05e8f1de58db7527d
7
- data.tar.gz: e03fe4f88db255410405296c1df3632dbc30dc370de075bbb258aa7d994f3fb4e203ac869e88aa7ad89b4094eec5ab678dee276675cbe1966c57011a67107ec6
6
+ metadata.gz: 4a560d88938857aa86f7c2b6a981e42b7c09e23ac101d2a1e058f91fb7713c817a1e0f4a8dcf28f8e271a4115cbad83a60be820ed6d71f8e36da9a4c26af28f7
7
+ data.tar.gz: c96e8ecbd32d8c51529512641283ca8485871e8810cae044589f8bdbe40f6b0bea3016b4247c1a33c9d59530be0c1b1158fe243b36bf6bb30b0f0f7d8ec104e1
data/.standard.yml CHANGED
@@ -1,4 +1,4 @@
1
- ruby_version: 2.7
1
+ ruby_version: 3.1
2
2
  ignore:
3
3
  - "bin/**/*"
4
4
  - "db/**/*"
data/CLAUDE.md ADDED
@@ -0,0 +1,45 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project
6
+
7
+ Ruby gem that parses Bible references from text using Parslet (~> 2.0) PEG parser. Given a string like `"Read John 3:16 and Romans 8:28"`, it segments the text into reference and plain-text parts with character positions.
8
+
9
+ ## Commands
10
+
11
+ ```bash
12
+ rake # Default: runs ci (spec + standard + yard)
13
+ rake spec # Run all RSpec tests
14
+ bundle exec rspec spec/lib/bible_ref_parser/parser_spec.rb # Run a single spec file
15
+ bundle exec rspec spec/lib/bible_ref_parser/parser_spec.rb:42 # Run a single example by line
16
+ rake standard # Lint with StandardRB
17
+ bundle exec standardrb --fix # Auto-fix lint issues
18
+ rake yard # Generate YARD docs
19
+ bin/console # Pry REPL with gem loaded
20
+ ```
21
+
22
+ ## Architecture
23
+
24
+ **Parsing pipeline:** `BibleReferenceText.new(text)` → `SegmentParser.parse(text)` → `Parser` (Parslet grammar) → array of `Segment` objects.
25
+
26
+ - **Parser** (`lib/bible_ref_parser/parser.rb`) — Parslet grammar defining rules for book names, chapter:verse notation, ranges, cross-book ranges, and verse lists. Case-insensitive matching with abbreviation support.
27
+ - **SegmentParser** (`lib/bible_ref_parser/segment_parser.rb`) — Converts the Parslet parse tree into `Segment` objects, tracking character positions in the original text.
28
+ - **Segment** (`lib/bible_ref_parser/segment.rb`) — Represents a `:reference` or `:text` segment with original text and position offsets.
29
+ - **BibleReferenceText** (`lib/bible_ref_parser/bible_reference_text.rb`) — Main API class. Includes `Enumerable` and delegates array-like methods to its segments array.
30
+ - **BibleReference** (`lib/bible_ref_parser/bible_reference.rb`) — `Data` class holding text, start_offset, end_offset.
31
+ - **Books** (`lib/bible_ref_parser/books.rb`) — Dictionary of 66 Bible books (39 OT, 27 NT) with abbreviation variants.
32
+
33
+ On parse failure, the library gracefully degrades to a single text segment rather than raising.
34
+
35
+ ## Testing
36
+
37
+ RSpec with custom matchers in `spec/support/matchers/reference_parser_matchers.rb`. The custom `parse` matcher validates Parslet rule output. Tests cover unit, integration, and edge cases (Unicode, HTML tags, invalid input).
38
+
39
+ ## Linting
40
+
41
+ StandardRB configured in `.standard.yml` targeting Ruby 2.7. Max method length: 15 lines.
42
+
43
+ ## CI
44
+
45
+ GitHub Actions (`.github/workflows/main.yml`): Ruby 3.4.2 on ubuntu-latest, runs `bundle exec rake` on push to main and PRs.
@@ -1,5 +1,34 @@
1
1
  module BibleRefParser
2
- BibleReference = Data.define(:text, :start_offset, :end_offset) do
2
+ BibleReference = Data.define(:text, :start_offset, :end_offset, :book_name, :chapter, :_parts) do
3
+ def initialize(text:, start_offset:, end_offset:, book_name: nil, chapter: nil, parts: [])
4
+ super(text:, start_offset:, end_offset:, book_name:, chapter:, _parts: parts.freeze)
5
+ end
6
+
7
+ def parts
8
+ _parts.empty? ? [self] : _parts
9
+ end
10
+
11
+ def compound?
12
+ parts.size > 1
13
+ end
14
+
15
+ def same_chapter?
16
+ return false unless compound?
17
+
18
+ chapters = parts.map(&:chapter)
19
+ !chapters.first.nil? && chapters.uniq.size == 1
20
+ end
21
+
22
+ def qualified_text
23
+ return text unless book_name && !text.start_with?(book_name)
24
+
25
+ if chapter && !text.match?(/\A\d+:/)
26
+ "#{book_name} #{chapter}:#{text}"
27
+ else
28
+ "#{book_name} #{text}"
29
+ end
30
+ end
31
+
3
32
  def to_s
4
33
  text
5
34
  end
@@ -73,12 +73,20 @@ module BibleRefParser
73
73
  (explicit_verse.as(:start) >> range_sep >> explicit_verse.as(:end)).as(:explicit_verse_range)
74
74
  end
75
75
 
76
- rule(:verse_list) do
77
- (verse >> (list_sep >> verse).repeat(1)).as(:verse_list)
76
+ # A verse number that is not followed by a colon, preventing
77
+ # it from consuming a chapter number in a chapter:verse pair
78
+ rule(:standalone_verse) do
79
+ (reference_int >> colon.absent?).as(:verse)
78
80
  end
79
81
 
80
- rule(:chapter_with_verse_list) do
81
- (chapter >> colon >> verse_list).as(:chapter_with_verse_list)
82
+ rule(:verse_specifier) { verse_range | standalone_verse }
83
+
84
+ rule(:verse_specifier_list) do
85
+ (verse_specifier >> (list_sep >> verse_specifier).repeat(1)).as(:verse_specifier_list)
86
+ end
87
+
88
+ rule(:chapter_with_mixed_verses) do
89
+ (chapter >> colon >> verse_specifier_list).as(:chapter_with_mixed_verses)
82
90
  end
83
91
 
84
92
  rule(:chapter_with_verse_range) do
@@ -86,7 +94,7 @@ module BibleRefParser
86
94
  end
87
95
 
88
96
  rule(:chapter_with_verses) do
89
- chapter_with_verse_list | chapter_with_verse_range
97
+ chapter_with_mixed_verses | chapter_with_verse_range
90
98
  end
91
99
 
92
100
  rule(:indicator) do
@@ -94,8 +102,8 @@ module BibleRefParser
94
102
  end
95
103
 
96
104
  rule(:indicator_list) do
97
- indicator.repeat(1).as(:indicator_list) |
98
- (indicator >> (list_sep >> indicator).repeat(1)).as(:indicator_list)
105
+ (indicator >> (list_sep >> indicator).repeat(1)).as(:indicator_list) |
106
+ indicator.repeat(1).as(:indicator_list)
99
107
  end
100
108
 
101
109
  rule(:cross_book_range) do
@@ -0,0 +1,133 @@
1
+ module BibleRefParser
2
+ # Wraps a raw reference hash from the Parslet parse tree.
3
+ # Encapsulates all slice traversal and position extraction logic,
4
+ # so callers can work with named concepts instead of raw nested hashes.
5
+ class ReferenceNode
6
+ def initialize(ref_hash)
7
+ @ref_hash = ref_hash
8
+ end
9
+
10
+ def book_name
11
+ book&.to_s
12
+ end
13
+
14
+ # Character range covered by this reference in the original text
15
+ def position
16
+ book.offset...end_offset(indicators.last)
17
+ end
18
+
19
+ def compound?
20
+ multiple_indicators? || one_chapter_verse_list?
21
+ end
22
+
23
+ def to_bible_reference(text)
24
+ BibleReference.new(
25
+ text: text[position],
26
+ start_offset: position.begin,
27
+ end_offset: position.end,
28
+ book_name: book_name,
29
+ parts: extract_parts(text)
30
+ )
31
+ end
32
+
33
+ private
34
+
35
+ def book
36
+ @ref_hash[:book]
37
+ end
38
+
39
+ def indicators
40
+ @ref_hash[:indicator_list]
41
+ end
42
+
43
+ def extract_parts(text)
44
+ return [] unless compound?
45
+
46
+ if one_chapter_verse_list?
47
+ extract_verse_list_parts(text)
48
+ else
49
+ extract_parts_per_indicator(text)
50
+ end
51
+ end
52
+
53
+ def multiple_indicators?
54
+ indicators in [_, _, *]
55
+ end
56
+
57
+ def one_chapter_verse_list?
58
+ indicators in [{chapter_with_mixed_verses: {verse_specifier_list: [_, _, *]}}]
59
+ end
60
+
61
+ def extract_parts_per_indicator(text)
62
+ indicators.flat_map.with_index do |indicator, idx|
63
+ if indicator in {chapter_with_mixed_verses: {chapter: chapter_slice, verse_specifier_list: specifiers}}
64
+ first_start = (idx == 0) ? book.offset : chapter_slice.offset
65
+ extract_mixed_verse_parts(text, chapter_slice, specifiers, first_part_start: first_start)
66
+ else
67
+ end_pos = end_offset(indicator)
68
+ start_pos = (idx == 0) ? book.offset : start_offset(indicator)
69
+
70
+ BibleReference.new(
71
+ text: text[start_pos...end_pos],
72
+ start_offset: start_pos,
73
+ end_offset: end_pos,
74
+ book_name: book_name
75
+ )
76
+ end
77
+ end
78
+ end
79
+
80
+ def extract_verse_list_parts(text)
81
+ indicators => [{chapter_with_mixed_verses: {chapter: chapter_slice, verse_specifier_list: specifiers}}]
82
+ extract_mixed_verse_parts(text, chapter_slice, specifiers, first_part_start: book.offset)
83
+ end
84
+
85
+ def extract_mixed_verse_parts(text, chapter_slice, specifiers, first_part_start:)
86
+ specifiers.each_with_index.map do |spec, idx|
87
+ case spec
88
+ in {verse_range: {start: {verse: start_verse}, end: {verse: end_verse}}}
89
+ end_pos = end_verse.offset + end_verse.size
90
+ start_pos = (idx == 0) ? first_part_start : start_verse.offset
91
+ in {verse: verse_slice}
92
+ end_pos = verse_slice.offset + verse_slice.size
93
+ start_pos = (idx == 0) ? first_part_start : verse_slice.offset
94
+ end
95
+
96
+ BibleReference.new(
97
+ text: text[start_pos...end_pos],
98
+ start_offset: start_pos,
99
+ end_offset: end_pos,
100
+ book_name: book_name,
101
+ chapter: chapter_slice.to_s
102
+ )
103
+ end
104
+ end
105
+
106
+ def start_offset(indicator)
107
+ case indicator
108
+ in {chapter: first_slice}
109
+ in {explicit_verse: {chapter: first_slice}}
110
+ in {chapter_range: {start: {chapter: first_slice}}}
111
+ in {chapter_with_verse_range: {chapter: first_slice}}
112
+ in {chapter_with_mixed_verses: {chapter: first_slice}}
113
+ in {explicit_verse_range: {start: {explicit_verse: {chapter: first_slice}}}}
114
+ end
115
+
116
+ first_slice.offset
117
+ end
118
+
119
+ def end_offset(indicator)
120
+ case indicator
121
+ in {chapter: last_slice}
122
+ in {explicit_verse: {verse: last_slice}}
123
+ in {chapter_range: {end: {chapter: last_slice}}}
124
+ in {chapter_with_verse_range: {verse_range: {end: {verse: last_slice}}}}
125
+ in {chapter_with_mixed_verses: {verse_specifier_list: [*, {verse: last_slice}]}}
126
+ in {chapter_with_mixed_verses: {verse_specifier_list: [*, {verse_range: {end: {verse: last_slice}}}]}}
127
+ in {explicit_verse_range: {end: {explicit_verse: {verse: last_slice}}}}
128
+ end
129
+
130
+ last_slice.offset + last_slice.size
131
+ end
132
+ end
133
+ end
@@ -8,10 +8,11 @@ module BibleRefParser
8
8
  # @param type [Symbol] :reference or :text
9
9
  # @param original_text [String] The original text segment
10
10
  # @param position [Range] The character positions in the original string
11
- def initialize(type, original_text, position)
11
+ def initialize(type, original_text, position, bible_reference: nil)
12
12
  @type = type
13
13
  @original_text = original_text
14
14
  @position = position
15
+ @bible_reference = bible_reference
15
16
  validate!
16
17
  end
17
18
 
@@ -37,6 +38,7 @@ module BibleRefParser
37
38
  # @return [BibleReference, nil] a BibleReference for reference segments, nil for text
38
39
  def bible_reference
39
40
  return nil unless reference?
41
+ return @bible_reference if @bible_reference
40
42
 
41
43
  BibleReference.new(
42
44
  text: original_text,
@@ -0,0 +1,45 @@
1
+ module BibleRefParser
2
+ # Wraps a raw segment hash (keyed +:reference+ or +:text+) from the Parslet
3
+ # parse tree and knows how to build a Segment from it.
4
+ class SegmentNode
5
+ def initialize(raw_segment)
6
+ @raw = raw_segment
7
+ end
8
+
9
+ def reference?
10
+ @raw.key?(:reference)
11
+ end
12
+
13
+ def text?
14
+ @raw.key?(:text)
15
+ end
16
+
17
+ # Character range covered by this segment in the original text
18
+ def position
19
+ if text?
20
+ t = @raw[:text]
21
+ t.offset...(t.offset + t.size)
22
+ else
23
+ reference_node.position
24
+ end
25
+ end
26
+
27
+ def to_segment(text)
28
+ pos = position
29
+ seg_text = text[pos]
30
+
31
+ if reference?
32
+ bible_ref = reference_node.to_bible_reference(text)
33
+ Segment.new(:reference, seg_text, pos, bible_reference: bible_ref)
34
+ else
35
+ Segment.new(:text, seg_text, pos)
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ def reference_node
42
+ @reference_node ||= ReferenceNode.new(@raw[:reference])
43
+ end
44
+ end
45
+ end
@@ -7,8 +7,8 @@ module BibleRefParser
7
7
  def parse(text)
8
8
  text = text.to_s.dup.freeze
9
9
  begin
10
- @tree = @reference_parser.parse(text)[:segments]
11
- segments = build_segments(text)
10
+ tree = @reference_parser.parse(text)[:segments]
11
+ segments = tree.map { |raw| SegmentNode.new(raw).to_segment(text) }
12
12
  validate_segments!(segments, text)
13
13
  segments
14
14
  rescue Parslet::ParseFailed, InvalidInputError
@@ -17,29 +17,8 @@ module BibleRefParser
17
17
  end
18
18
  end
19
19
 
20
- def get_last_element(element)
21
- if element.is_a?(Hash)
22
- return get_last_element(element.values.last)
23
- elsif element.is_a?(Array)
24
- return get_last_element(element.last)
25
- end
26
-
27
- element
28
- end
29
-
30
20
  private
31
21
 
32
- def build_segments(text)
33
- positions = segment_positions
34
- raise InvalidInputError, "Position mismatch" unless positions.size == @tree.size
35
-
36
- @tree.each_with_index.map do |seg, i|
37
- pos = positions[i]
38
- seg_text = text[pos]
39
- Segment.new(seg.keys.first, seg_text, pos)
40
- end
41
- end
42
-
43
22
  def validate_segments!(segments, full_text)
44
23
  # Verify the segments cover the entire input
45
24
  coverage = segments.map(&:position).reduce(0..0) { |a, b| a.begin..b.end }
@@ -47,31 +26,5 @@ module BibleRefParser
47
26
  raise InvalidInputError, "Input contains unparsed content"
48
27
  end
49
28
  end
50
-
51
- def position(first_slice, last_slice = first_slice)
52
- first_slice.offset...(last_slice.offset + last_slice.size)
53
- end
54
-
55
- def segment_positions
56
- @segment_positions ||= @tree.collect do |seg|
57
- if seg.key?(:text)
58
- t = seg[:text]
59
- position(t)
60
- elsif seg.key?(:reference)
61
- ref = seg[:reference]
62
- book = ref[:book]
63
- last = get_last_element(ref)
64
- position(book, last)
65
- end
66
- end
67
- end
68
-
69
- def segment_texts(text)
70
- @segment_texts ||= segment_positions.collect { |pos| text[pos] }
71
- end
72
-
73
- def segment_types
74
- @segment_types ||= @tree.collect { |seg| seg.keys }.flatten
75
- end
76
29
  end
77
30
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module BibleRefParser
4
- VERSION = "0.2.0"
4
+ VERSION = "0.4.0"
5
5
  end
@@ -7,6 +7,8 @@ require_relative "bible_ref_parser/bible_reference_text"
7
7
  require_relative "bible_ref_parser/books"
8
8
  require_relative "bible_ref_parser/parser"
9
9
  require_relative "bible_ref_parser/segment"
10
+ require_relative "bible_ref_parser/reference_node"
11
+ require_relative "bible_ref_parser/segment_node"
10
12
  require_relative "bible_ref_parser/segment_parser"
11
13
 
12
14
  module BibleRefParser
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bible_ref_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Fabio Papa
@@ -130,6 +130,7 @@ extra_rdoc_files: []
130
130
  files:
131
131
  - ".rspec"
132
132
  - ".standard.yml"
133
+ - CLAUDE.md
133
134
  - LICENSE.txt
134
135
  - README.md
135
136
  - Rakefile
@@ -141,7 +142,9 @@ files:
141
142
  - lib/bible_ref_parser/bible_reference_text.rb
142
143
  - lib/bible_ref_parser/books.rb
143
144
  - lib/bible_ref_parser/parser.rb
145
+ - lib/bible_ref_parser/reference_node.rb
144
146
  - lib/bible_ref_parser/segment.rb
147
+ - lib/bible_ref_parser/segment_node.rb
145
148
  - lib/bible_ref_parser/segment_parser.rb
146
149
  - lib/bible_ref_parser/version.rb
147
150
  - sig/bible_ref_parser.rbs
@@ -167,7 +170,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
167
170
  - !ruby/object:Gem::Version
168
171
  version: '0'
169
172
  requirements: []
170
- rubygems_version: 3.6.8
173
+ rubygems_version: 4.0.8
171
174
  specification_version: 4
172
175
  summary: A parser for Bible references
173
176
  test_files: []