bible_ref_parser 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 925e15f2c8fe0edb168af8bb3037d888fadec26ea67b9cdf5204be3ed0f2b00d
4
- data.tar.gz: a6dc3b012dbc430302a4b75d83afb3c003042c1233b002f41eb921628d61226c
3
+ metadata.gz: 785c346d7af39dc9c8f2eee1088d443fb78ff980c33572bd27c28610ba364b04
4
+ data.tar.gz: 87fcbaa84fb663b042352e4f78339a940922b673a62a76dd0bca0af68ca07d36
5
5
  SHA512:
6
- metadata.gz: 96aad3474367886425c9eaa962ea0a00885a35481b1e85ea12f790fafef0b40a09270cae2aa02f805fbb4b1565ddcc9c0182c1f78e24b1e05e8f1de58db7527d
7
- data.tar.gz: e03fe4f88db255410405296c1df3632dbc30dc370de075bbb258aa7d994f3fb4e203ac869e88aa7ad89b4094eec5ab678dee276675cbe1966c57011a67107ec6
6
+ metadata.gz: a7d0e3c59c1af88185012447a6f7db2e2feb13a351fa3b524bf3cdac98c4c5dbdb387bd1a467d6b08140c4bc8ea7a241331e2c0955c8ed0e18bc19a12d8526fc
7
+ data.tar.gz: 79230b5b2b8a4b3bf1f2626089a3bc776552f92ca1f6ea5ec05d2364e2de1b976f5177f8ab37fef85c85212895f4e1d1a3321ad71c9820f157489f2b7f518c9c
data/CLAUDE.md ADDED
@@ -0,0 +1,45 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project
6
+
7
+ Ruby gem that parses Bible references from text using Parslet (~> 2.0) PEG parser. Given a string like `"Read John 3:16 and Romans 8:28"`, it segments the text into reference and plain-text parts with character positions.
8
+
9
+ ## Commands
10
+
11
+ ```bash
12
+ rake # Default: runs ci (spec + standard + yard)
13
+ rake spec # Run all RSpec tests
14
+ bundle exec rspec spec/lib/bible_ref_parser/parser_spec.rb # Run a single spec file
15
+ bundle exec rspec spec/lib/bible_ref_parser/parser_spec.rb:42 # Run a single example by line
16
+ rake standard # Lint with StandardRB
17
+ bundle exec standardrb --fix # Auto-fix lint issues
18
+ rake yard # Generate YARD docs
19
+ bin/console # Pry REPL with gem loaded
20
+ ```
21
+
22
+ ## Architecture
23
+
24
+ **Parsing pipeline:** `BibleReferenceText.new(text)` → `SegmentParser.parse(text)` → `Parser` (Parslet grammar) → array of `Segment` objects.
25
+
26
+ - **Parser** (`lib/bible_ref_parser/parser.rb`) — Parslet grammar defining rules for book names, chapter:verse notation, ranges, cross-book ranges, and verse lists. Case-insensitive matching with abbreviation support.
27
+ - **SegmentParser** (`lib/bible_ref_parser/segment_parser.rb`) — Converts the Parslet parse tree into `Segment` objects, tracking character positions in the original text.
28
+ - **Segment** (`lib/bible_ref_parser/segment.rb`) — Represents a `:reference` or `:text` segment with original text and position offsets.
29
+ - **BibleReferenceText** (`lib/bible_ref_parser/bible_reference_text.rb`) — Main API class. Includes `Enumerable` and delegates array-like methods to its segments array.
30
+ - **BibleReference** (`lib/bible_ref_parser/bible_reference.rb`) — `Data` class holding text, start_offset, end_offset.
31
+ - **Books** (`lib/bible_ref_parser/books.rb`) — Dictionary of 66 Bible books (39 OT, 27 NT) with abbreviation variants.
32
+
33
+ On parse failure, the library gracefully degrades to a single text segment rather than raising.
34
+
35
+ ## Testing
36
+
37
+ RSpec with custom matchers in `spec/support/matchers/reference_parser_matchers.rb`. The custom `parse` matcher validates Parslet rule output. Tests cover unit, integration, and edge cases (Unicode, HTML tags, invalid input).
38
+
39
+ ## Linting
40
+
41
+ StandardRB configured in `.standard.yml` targeting Ruby 2.7. Max method length: 15 lines.
42
+
43
+ ## CI
44
+
45
+ GitHub Actions (`.github/workflows/main.yml`): Ruby 3.4.2 on ubuntu-latest, runs `bundle exec rake` on push to main and PRs.
@@ -1,5 +1,28 @@
1
1
  module BibleRefParser
2
- BibleReference = Data.define(:text, :start_offset, :end_offset) do
2
+ BibleReference = Data.define(:text, :start_offset, :end_offset, :book_name, :_parts) do
3
+ def initialize(text:, start_offset:, end_offset:, book_name: nil, parts: [])
4
+ super(
5
+ text: text, start_offset: start_offset, end_offset: end_offset,
6
+ book_name: book_name, _parts: parts.freeze
7
+ )
8
+ end
9
+
10
+ def parts
11
+ _parts.empty? ? [self] : _parts
12
+ end
13
+
14
+ def compound?
15
+ parts.size > 1
16
+ end
17
+
18
+ def qualified_text
19
+ if book_name && !text.start_with?(book_name)
20
+ "#{book_name} #{text}"
21
+ else
22
+ text
23
+ end
24
+ end
25
+
3
26
  def to_s
4
27
  text
5
28
  end
@@ -73,8 +73,14 @@ module BibleRefParser
73
73
  (explicit_verse.as(:start) >> range_sep >> explicit_verse.as(:end)).as(:explicit_verse_range)
74
74
  end
75
75
 
76
+ # A verse number that is not followed by a colon, preventing
77
+ # it from consuming a chapter number in a chapter:verse pair
78
+ rule(:standalone_verse) do
79
+ (reference_int >> colon.absent?).as(:verse)
80
+ end
81
+
76
82
  rule(:verse_list) do
77
- (verse >> (list_sep >> verse).repeat(1)).as(:verse_list)
83
+ (verse >> (list_sep >> standalone_verse).repeat(1)).as(:verse_list)
78
84
  end
79
85
 
80
86
  rule(:chapter_with_verse_list) do
@@ -94,8 +100,8 @@ module BibleRefParser
94
100
  end
95
101
 
96
102
  rule(:indicator_list) do
97
- indicator.repeat(1).as(:indicator_list) |
98
- (indicator >> (list_sep >> indicator).repeat(1)).as(:indicator_list)
103
+ (indicator >> (list_sep >> indicator).repeat(1)).as(:indicator_list) |
104
+ indicator.repeat(1).as(:indicator_list)
99
105
  end
100
106
 
101
107
  rule(:cross_book_range) do
@@ -0,0 +1,85 @@
1
+ module BibleRefParser
2
+ # Wraps a raw reference hash from the Parslet parse tree.
3
+ # Encapsulates all slice traversal and position extraction logic,
4
+ # so callers can work with named concepts instead of raw nested hashes.
5
+ class ReferenceNode
6
+ def initialize(ref_hash)
7
+ @ref_hash = ref_hash
8
+ end
9
+
10
+ def book_name
11
+ @ref_hash[:book]&.to_s
12
+ end
13
+
14
+ # The first Parslet::Slice in the entire reference structure
15
+ def first_slice
16
+ leaf_first(@ref_hash)
17
+ end
18
+
19
+ # The last Parslet::Slice in the entire reference structure
20
+ def last_slice
21
+ leaf_last(@ref_hash)
22
+ end
23
+
24
+ # Character range covered by this reference in the original text
25
+ def position
26
+ book = @ref_hash[:book]
27
+ last = last_slice
28
+ book.offset...(last.offset + last.size)
29
+ end
30
+
31
+ def compound?
32
+ indicators.is_a?(Array) && indicators.size > 1
33
+ end
34
+
35
+ def to_bible_reference(text)
36
+ BibleReference.new(
37
+ text: text[position],
38
+ start_offset: position.begin,
39
+ end_offset: position.end,
40
+ book_name: book_name,
41
+ parts: extract_parts(text)
42
+ )
43
+ end
44
+
45
+ private
46
+
47
+ def indicators
48
+ @ref_hash[:indicator_list]
49
+ end
50
+
51
+ def extract_parts(text)
52
+ return [] unless compound?
53
+
54
+ indicators.each_with_index.map do |indicator, idx|
55
+ first = leaf_first(indicator)
56
+ last = leaf_last(indicator)
57
+ end_pos = last.offset + last.size
58
+ start_pos = (idx == 0) ? @ref_hash[:book].offset : first.offset
59
+
60
+ BibleReference.new(
61
+ text: text[start_pos...end_pos],
62
+ start_offset: start_pos,
63
+ end_offset: end_pos,
64
+ book_name: book_name
65
+ )
66
+ end
67
+ end
68
+
69
+ def leaf_first(node)
70
+ case node
71
+ when Hash then leaf_first(node.values.first)
72
+ when Array then leaf_first(node.first)
73
+ else node
74
+ end
75
+ end
76
+
77
+ def leaf_last(node)
78
+ case node
79
+ when Hash then leaf_last(node.values.last)
80
+ when Array then leaf_last(node.last)
81
+ else node
82
+ end
83
+ end
84
+ end
85
+ end
@@ -8,10 +8,11 @@ module BibleRefParser
8
8
  # @param type [Symbol] :reference or :text
9
9
  # @param original_text [String] The original text segment
10
10
  # @param position [Range] The character positions in the original string
11
- def initialize(type, original_text, position)
11
+ def initialize(type, original_text, position, bible_reference: nil)
12
12
  @type = type
13
13
  @original_text = original_text
14
14
  @position = position
15
+ @bible_reference = bible_reference
15
16
  validate!
16
17
  end
17
18
 
@@ -37,6 +38,7 @@ module BibleRefParser
37
38
  # @return [BibleReference, nil] a BibleReference for reference segments, nil for text
38
39
  def bible_reference
39
40
  return nil unless reference?
41
+ return @bible_reference if @bible_reference
40
42
 
41
43
  BibleReference.new(
42
44
  text: original_text,
@@ -0,0 +1,45 @@
1
+ module BibleRefParser
2
+ # Wraps a raw segment hash (keyed +:reference+ or +:text+) from the Parslet
3
+ # parse tree and knows how to build a Segment from it.
4
+ class SegmentNode
5
+ def initialize(raw_segment)
6
+ @raw = raw_segment
7
+ end
8
+
9
+ def reference?
10
+ @raw.key?(:reference)
11
+ end
12
+
13
+ def text?
14
+ @raw.key?(:text)
15
+ end
16
+
17
+ # Character range covered by this segment in the original text
18
+ def position
19
+ if text?
20
+ t = @raw[:text]
21
+ t.offset...(t.offset + t.size)
22
+ else
23
+ reference_node.position
24
+ end
25
+ end
26
+
27
+ def to_segment(text)
28
+ pos = position
29
+ seg_text = text[pos]
30
+
31
+ if reference?
32
+ bible_ref = reference_node.to_bible_reference(text)
33
+ Segment.new(:reference, seg_text, pos, bible_reference: bible_ref)
34
+ else
35
+ Segment.new(:text, seg_text, pos)
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ def reference_node
42
+ @reference_node ||= ReferenceNode.new(@raw[:reference])
43
+ end
44
+ end
45
+ end
@@ -7,8 +7,8 @@ module BibleRefParser
7
7
  def parse(text)
8
8
  text = text.to_s.dup.freeze
9
9
  begin
10
- @tree = @reference_parser.parse(text)[:segments]
11
- segments = build_segments(text)
10
+ tree = @reference_parser.parse(text)[:segments]
11
+ segments = tree.map { |raw| SegmentNode.new(raw).to_segment(text) }
12
12
  validate_segments!(segments, text)
13
13
  segments
14
14
  rescue Parslet::ParseFailed, InvalidInputError
@@ -17,29 +17,8 @@ module BibleRefParser
17
17
  end
18
18
  end
19
19
 
20
- def get_last_element(element)
21
- if element.is_a?(Hash)
22
- return get_last_element(element.values.last)
23
- elsif element.is_a?(Array)
24
- return get_last_element(element.last)
25
- end
26
-
27
- element
28
- end
29
-
30
20
  private
31
21
 
32
- def build_segments(text)
33
- positions = segment_positions
34
- raise InvalidInputError, "Position mismatch" unless positions.size == @tree.size
35
-
36
- @tree.each_with_index.map do |seg, i|
37
- pos = positions[i]
38
- seg_text = text[pos]
39
- Segment.new(seg.keys.first, seg_text, pos)
40
- end
41
- end
42
-
43
22
  def validate_segments!(segments, full_text)
44
23
  # Verify the segments cover the entire input
45
24
  coverage = segments.map(&:position).reduce(0..0) { |a, b| a.begin..b.end }
@@ -47,31 +26,5 @@ module BibleRefParser
47
26
  raise InvalidInputError, "Input contains unparsed content"
48
27
  end
49
28
  end
50
-
51
- def position(first_slice, last_slice = first_slice)
52
- first_slice.offset...(last_slice.offset + last_slice.size)
53
- end
54
-
55
- def segment_positions
56
- @segment_positions ||= @tree.collect do |seg|
57
- if seg.key?(:text)
58
- t = seg[:text]
59
- position(t)
60
- elsif seg.key?(:reference)
61
- ref = seg[:reference]
62
- book = ref[:book]
63
- last = get_last_element(ref)
64
- position(book, last)
65
- end
66
- end
67
- end
68
-
69
- def segment_texts(text)
70
- @segment_texts ||= segment_positions.collect { |pos| text[pos] }
71
- end
72
-
73
- def segment_types
74
- @segment_types ||= @tree.collect { |seg| seg.keys }.flatten
75
- end
76
29
  end
77
30
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module BibleRefParser
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.0"
5
5
  end
@@ -7,6 +7,8 @@ require_relative "bible_ref_parser/bible_reference_text"
7
7
  require_relative "bible_ref_parser/books"
8
8
  require_relative "bible_ref_parser/parser"
9
9
  require_relative "bible_ref_parser/segment"
10
+ require_relative "bible_ref_parser/reference_node"
11
+ require_relative "bible_ref_parser/segment_node"
10
12
  require_relative "bible_ref_parser/segment_parser"
11
13
 
12
14
  module BibleRefParser
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bible_ref_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Fabio Papa
@@ -130,6 +130,7 @@ extra_rdoc_files: []
130
130
  files:
131
131
  - ".rspec"
132
132
  - ".standard.yml"
133
+ - CLAUDE.md
133
134
  - LICENSE.txt
134
135
  - README.md
135
136
  - Rakefile
@@ -141,7 +142,9 @@ files:
141
142
  - lib/bible_ref_parser/bible_reference_text.rb
142
143
  - lib/bible_ref_parser/books.rb
143
144
  - lib/bible_ref_parser/parser.rb
145
+ - lib/bible_ref_parser/reference_node.rb
144
146
  - lib/bible_ref_parser/segment.rb
147
+ - lib/bible_ref_parser/segment_node.rb
145
148
  - lib/bible_ref_parser/segment_parser.rb
146
149
  - lib/bible_ref_parser/version.rb
147
150
  - sig/bible_ref_parser.rbs
@@ -167,7 +170,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
167
170
  - !ruby/object:Gem::Version
168
171
  version: '0'
169
172
  requirements: []
170
- rubygems_version: 3.6.8
173
+ rubygems_version: 4.0.8
171
174
  specification_version: 4
172
175
  summary: A parser for Bible references
173
176
  test_files: []