bible_ref_parser 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 444fb0ee3218f9caa74109ee60f834efad718af2f2982453ac9918980d5da614
4
- data.tar.gz: 839da96a5fedb211d67b645cba63335ef12a16db86b89bf1bc8a52a86f4af783
3
+ metadata.gz: 785c346d7af39dc9c8f2eee1088d443fb78ff980c33572bd27c28610ba364b04
4
+ data.tar.gz: 87fcbaa84fb663b042352e4f78339a940922b673a62a76dd0bca0af68ca07d36
5
5
  SHA512:
6
- metadata.gz: 9a40f9a4068dc57f0316e2ac77a15ec27fd9709135eb0530c8dac2859b75fb0f2d25e0b11d5ea7e37cd4c08d145745c49d6a0c6c4e882f28195ff8cbde7e21c1
7
- data.tar.gz: f5234fd33d0c25a4d2ef4ac5d89db6bb771ff2ad4e93e4aaaf4452c30bd462756688ee60cf403d2fe00c503d61112522792821f68e76b912a86ccefe1c60241e
6
+ metadata.gz: a7d0e3c59c1af88185012447a6f7db2e2feb13a351fa3b524bf3cdac98c4c5dbdb387bd1a467d6b08140c4bc8ea7a241331e2c0955c8ed0e18bc19a12d8526fc
7
+ data.tar.gz: 79230b5b2b8a4b3bf1f2626089a3bc776552f92ca1f6ea5ec05d2364e2de1b976f5177f8ab37fef85c85212895f4e1d1a3321ad71c9820f157489f2b7f518c9c
data/CLAUDE.md ADDED
@@ -0,0 +1,45 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project
6
+
7
+ Ruby gem that parses Bible references from text using Parslet (~> 2.0) PEG parser. Given a string like `"Read John 3:16 and Romans 8:28"`, it segments the text into reference and plain-text parts with character positions.
8
+
9
+ ## Commands
10
+
11
+ ```bash
12
+ rake # Default: runs ci (spec + standard + yard)
13
+ rake spec # Run all RSpec tests
14
+ bundle exec rspec spec/lib/bible_ref_parser/parser_spec.rb # Run a single spec file
15
+ bundle exec rspec spec/lib/bible_ref_parser/parser_spec.rb:42 # Run a single example by line
16
+ rake standard # Lint with StandardRB
17
+ bundle exec standardrb --fix # Auto-fix lint issues
18
+ rake yard # Generate YARD docs
19
+ bin/console # Pry REPL with gem loaded
20
+ ```
21
+
22
+ ## Architecture
23
+
24
+ **Parsing pipeline:** `BibleReferenceText.new(text)` → `SegmentParser.parse(text)` → `Parser` (Parslet grammar) → array of `Segment` objects.
25
+
26
+ - **Parser** (`lib/bible_ref_parser/parser.rb`) — Parslet grammar defining rules for book names, chapter:verse notation, ranges, cross-book ranges, and verse lists. Case-insensitive matching with abbreviation support.
27
+ - **SegmentParser** (`lib/bible_ref_parser/segment_parser.rb`) — Converts the Parslet parse tree into `Segment` objects, tracking character positions in the original text.
28
+ - **Segment** (`lib/bible_ref_parser/segment.rb`) — Represents a `:reference` or `:text` segment with original text and position offsets.
29
+ - **BibleReferenceText** (`lib/bible_ref_parser/bible_reference_text.rb`) — Main API class. Includes `Enumerable` and delegates array-like methods to its segments array.
30
+ - **BibleReference** (`lib/bible_ref_parser/bible_reference.rb`) — `Data` class holding text, start_offset, end_offset.
31
+ - **Books** (`lib/bible_ref_parser/books.rb`) — Dictionary of 66 Bible books (39 OT, 27 NT) with abbreviation variants.
32
+
33
+ On parse failure, the library gracefully degrades to a single text segment rather than raising.
34
+
35
+ ## Testing
36
+
37
+ RSpec with custom matchers in `spec/support/matchers/reference_parser_matchers.rb`. The custom `parse` matcher validates Parslet rule output. Tests cover unit, integration, and edge cases (Unicode, HTML tags, invalid input).
38
+
39
+ ## Linting
40
+
41
+ StandardRB configured in `.standard.yml` targeting Ruby 2.7. Max method length: 15 lines.
42
+
43
+ ## CI
44
+
45
+ GitHub Actions (`.github/workflows/main.yml`): Ruby 3.4.2 on ubuntu-latest, runs `bundle exec rake` on push to main and PRs.
@@ -0,0 +1,30 @@
1
+ module BibleRefParser
2
+ BibleReference = Data.define(:text, :start_offset, :end_offset, :book_name, :_parts) do
3
+ def initialize(text:, start_offset:, end_offset:, book_name: nil, parts: [])
4
+ super(
5
+ text: text, start_offset: start_offset, end_offset: end_offset,
6
+ book_name: book_name, _parts: parts.freeze
7
+ )
8
+ end
9
+
10
+ def parts
11
+ _parts.empty? ? [self] : _parts
12
+ end
13
+
14
+ def compound?
15
+ parts.size > 1
16
+ end
17
+
18
+ def qualified_text
19
+ if book_name && !text.start_with?(book_name)
20
+ "#{book_name} #{text}"
21
+ else
22
+ text
23
+ end
24
+ end
25
+
26
+ def to_s
27
+ text
28
+ end
29
+ end
30
+ end
@@ -1,35 +1,45 @@
1
+ require "forwardable"
2
+
1
3
  module BibleRefParser
2
4
  class BibleReferenceText
5
+ extend Forwardable
3
6
  include Enumerable
4
7
 
8
+ attr_reader :segments
9
+
10
+ def_delegators :segments, :each, :size, :empty?, :[], :last
11
+ alias_method :length, :size
12
+
5
13
  def initialize(text)
6
14
  raise InvalidInputError, "Expected a string, got #{text.class}" unless text.respond_to?(:to_str)
7
- @original_text = text.to_str
15
+ @text = text.to_str
8
16
  @segments = parse_segments
9
17
  end
10
18
 
11
- def segments
12
- @segments || []
13
- end
14
-
15
19
  def references
16
- segments.select(&:reference?)
20
+ segments.filter_map(&:bible_reference)
17
21
  end
18
22
 
19
- def each(&block)
20
- segments.each(&block)
23
+ def text_with_references
24
+ segments.map(&:to_s)
21
25
  end
22
26
 
23
27
  def to_s
24
28
  segments.map(&:to_s).join
25
29
  end
26
30
 
31
+ def inspect
32
+ displayed = segments.first(10).map(&:inspect)
33
+ displayed << "..." if segments.size > 10
34
+ "#<#{self.class} text=#{@text.inspect} segments=[#{displayed.join(", ")}]>"
35
+ end
36
+
27
37
  private
28
38
 
29
39
  def parse_segments
30
- SegmentParser.new.parse(@original_text)
40
+ SegmentParser.new.parse(@text)
31
41
  rescue Parslet::ParseFailed
32
- nil
42
+ [Segment.new(:text, @text, 0...@text.length)]
33
43
  end
34
44
  end
35
45
  end
@@ -73,8 +73,14 @@ module BibleRefParser
73
73
  (explicit_verse.as(:start) >> range_sep >> explicit_verse.as(:end)).as(:explicit_verse_range)
74
74
  end
75
75
 
76
+ # A verse number that is not followed by a colon, preventing
77
+ # it from consuming a chapter number in a chapter:verse pair
78
+ rule(:standalone_verse) do
79
+ (reference_int >> colon.absent?).as(:verse)
80
+ end
81
+
76
82
  rule(:verse_list) do
77
- (verse >> (list_sep >> verse).repeat(1)).as(:verse_list)
83
+ (verse >> (list_sep >> standalone_verse).repeat(1)).as(:verse_list)
78
84
  end
79
85
 
80
86
  rule(:chapter_with_verse_list) do
@@ -94,8 +100,8 @@ module BibleRefParser
94
100
  end
95
101
 
96
102
  rule(:indicator_list) do
97
- indicator.repeat(1).as(:indicator_list) |
98
- (indicator >> (list_sep >> indicator).repeat(1)).as(:indicator_list)
103
+ (indicator >> (list_sep >> indicator).repeat(1)).as(:indicator_list) |
104
+ indicator.repeat(1).as(:indicator_list)
99
105
  end
100
106
 
101
107
  rule(:cross_book_range) do
@@ -0,0 +1,85 @@
1
+ module BibleRefParser
2
+ # Wraps a raw reference hash from the Parslet parse tree.
3
+ # Encapsulates all slice traversal and position extraction logic,
4
+ # so callers can work with named concepts instead of raw nested hashes.
5
+ class ReferenceNode
6
+ def initialize(ref_hash)
7
+ @ref_hash = ref_hash
8
+ end
9
+
10
+ def book_name
11
+ @ref_hash[:book]&.to_s
12
+ end
13
+
14
+ # The first Parslet::Slice in the entire reference structure
15
+ def first_slice
16
+ leaf_first(@ref_hash)
17
+ end
18
+
19
+ # The last Parslet::Slice in the entire reference structure
20
+ def last_slice
21
+ leaf_last(@ref_hash)
22
+ end
23
+
24
+ # Character range covered by this reference in the original text
25
+ def position
26
+ book = @ref_hash[:book]
27
+ last = last_slice
28
+ book.offset...(last.offset + last.size)
29
+ end
30
+
31
+ def compound?
32
+ indicators.is_a?(Array) && indicators.size > 1
33
+ end
34
+
35
+ def to_bible_reference(text)
36
+ BibleReference.new(
37
+ text: text[position],
38
+ start_offset: position.begin,
39
+ end_offset: position.end,
40
+ book_name: book_name,
41
+ parts: extract_parts(text)
42
+ )
43
+ end
44
+
45
+ private
46
+
47
+ def indicators
48
+ @ref_hash[:indicator_list]
49
+ end
50
+
51
+ def extract_parts(text)
52
+ return [] unless compound?
53
+
54
+ indicators.each_with_index.map do |indicator, idx|
55
+ first = leaf_first(indicator)
56
+ last = leaf_last(indicator)
57
+ end_pos = last.offset + last.size
58
+ start_pos = (idx == 0) ? @ref_hash[:book].offset : first.offset
59
+
60
+ BibleReference.new(
61
+ text: text[start_pos...end_pos],
62
+ start_offset: start_pos,
63
+ end_offset: end_pos,
64
+ book_name: book_name
65
+ )
66
+ end
67
+ end
68
+
69
+ def leaf_first(node)
70
+ case node
71
+ when Hash then leaf_first(node.values.first)
72
+ when Array then leaf_first(node.first)
73
+ else node
74
+ end
75
+ end
76
+
77
+ def leaf_last(node)
78
+ case node
79
+ when Hash then leaf_last(node.values.last)
80
+ when Array then leaf_last(node.last)
81
+ else node
82
+ end
83
+ end
84
+ end
85
+ end
@@ -8,10 +8,11 @@ module BibleRefParser
8
8
  # @param type [Symbol] :reference or :text
9
9
  # @param original_text [String] The original text segment
10
10
  # @param position [Range] The character positions in the original string
11
- def initialize(type, original_text, position)
11
+ def initialize(type, original_text, position, bible_reference: nil)
12
12
  @type = type
13
13
  @original_text = original_text
14
14
  @position = position
15
+ @bible_reference = bible_reference
15
16
  validate!
16
17
  end
17
18
 
@@ -20,6 +21,10 @@ module BibleRefParser
20
21
  original_text
21
22
  end
22
23
 
24
+ def inspect
25
+ "#<#{self.class} type=:#{type} text=\"#{original_text}\">"
26
+ end
27
+
23
28
  # @return [Boolean] true if this is a Bible reference segment
24
29
  def reference?
25
30
  type == :reference
@@ -30,6 +35,18 @@ module BibleRefParser
30
35
  type == :text
31
36
  end
32
37
 
38
+ # @return [BibleReference, nil] a BibleReference for reference segments, nil for text
39
+ def bible_reference
40
+ return nil unless reference?
41
+ return @bible_reference if @bible_reference
42
+
43
+ BibleReference.new(
44
+ text: original_text,
45
+ start_offset: position.begin,
46
+ end_offset: position.end
47
+ )
48
+ end
49
+
33
50
  private
34
51
 
35
52
  def validate!
@@ -0,0 +1,45 @@
1
+ module BibleRefParser
2
+ # Wraps a raw segment hash (keyed +:reference+ or +:text+) from the Parslet
3
+ # parse tree and knows how to build a Segment from it.
4
+ class SegmentNode
5
+ def initialize(raw_segment)
6
+ @raw = raw_segment
7
+ end
8
+
9
+ def reference?
10
+ @raw.key?(:reference)
11
+ end
12
+
13
+ def text?
14
+ @raw.key?(:text)
15
+ end
16
+
17
+ # Character range covered by this segment in the original text
18
+ def position
19
+ if text?
20
+ t = @raw[:text]
21
+ t.offset...(t.offset + t.size)
22
+ else
23
+ reference_node.position
24
+ end
25
+ end
26
+
27
+ def to_segment(text)
28
+ pos = position
29
+ seg_text = text[pos]
30
+
31
+ if reference?
32
+ bible_ref = reference_node.to_bible_reference(text)
33
+ Segment.new(:reference, seg_text, pos, bible_reference: bible_ref)
34
+ else
35
+ Segment.new(:text, seg_text, pos)
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ def reference_node
42
+ @reference_node ||= ReferenceNode.new(@raw[:reference])
43
+ end
44
+ end
45
+ end
@@ -7,8 +7,8 @@ module BibleRefParser
7
7
  def parse(text)
8
8
  text = text.to_s.dup.freeze
9
9
  begin
10
- @tree = @reference_parser.parse(text)[:segments]
11
- segments = build_segments(text)
10
+ tree = @reference_parser.parse(text)[:segments]
11
+ segments = tree.map { |raw| SegmentNode.new(raw).to_segment(text) }
12
12
  validate_segments!(segments, text)
13
13
  segments
14
14
  rescue Parslet::ParseFailed, InvalidInputError
@@ -17,29 +17,8 @@ module BibleRefParser
17
17
  end
18
18
  end
19
19
 
20
- def get_last_element(element)
21
- if element.is_a?(Hash)
22
- return get_last_element(element.values.last)
23
- elsif element.is_a?(Array)
24
- return get_last_element(element.last)
25
- end
26
-
27
- element
28
- end
29
-
30
20
  private
31
21
 
32
- def build_segments(text)
33
- positions = segment_positions
34
- raise InvalidInputError, "Position mismatch" unless positions.size == @tree.size
35
-
36
- @tree.each_with_index.map do |seg, i|
37
- pos = positions[i]
38
- seg_text = text[pos]
39
- Segment.new(seg.keys.first, seg_text, pos)
40
- end
41
- end
42
-
43
22
  def validate_segments!(segments, full_text)
44
23
  # Verify the segments cover the entire input
45
24
  coverage = segments.map(&:position).reduce(0..0) { |a, b| a.begin..b.end }
@@ -47,31 +26,5 @@ module BibleRefParser
47
26
  raise InvalidInputError, "Input contains unparsed content"
48
27
  end
49
28
  end
50
-
51
- def position(first_slice, last_slice = first_slice)
52
- first_slice.offset...(last_slice.offset + last_slice.size)
53
- end
54
-
55
- def segment_positions
56
- @segment_positions ||= @tree.collect do |seg|
57
- if seg.key?(:text)
58
- t = seg[:text]
59
- position(t)
60
- elsif seg.key?(:reference)
61
- ref = seg[:reference]
62
- book = ref[:book]
63
- last = get_last_element(ref)
64
- position(book, last)
65
- end
66
- end
67
- end
68
-
69
- def segment_texts(text)
70
- @segment_texts ||= segment_positions.collect { |pos| text[pos] }
71
- end
72
-
73
- def segment_types
74
- @segment_types ||= @tree.collect { |seg| seg.keys }.flatten
75
- end
76
29
  end
77
30
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module BibleRefParser
4
- VERSION = "0.1.0"
4
+ VERSION = "0.3.0"
5
5
  end
@@ -2,10 +2,13 @@
2
2
 
3
3
  require_relative "bible_ref_parser/version"
4
4
 
5
+ require_relative "bible_ref_parser/bible_reference"
5
6
  require_relative "bible_ref_parser/bible_reference_text"
6
7
  require_relative "bible_ref_parser/books"
7
8
  require_relative "bible_ref_parser/parser"
8
9
  require_relative "bible_ref_parser/segment"
10
+ require_relative "bible_ref_parser/reference_node"
11
+ require_relative "bible_ref_parser/segment_node"
9
12
  require_relative "bible_ref_parser/segment_parser"
10
13
 
11
14
  module BibleRefParser
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bible_ref_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Fabio Papa
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-05-03 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: parslet
@@ -130,6 +130,7 @@ extra_rdoc_files: []
130
130
  files:
131
131
  - ".rspec"
132
132
  - ".standard.yml"
133
+ - CLAUDE.md
133
134
  - LICENSE.txt
134
135
  - README.md
135
136
  - Rakefile
@@ -137,10 +138,13 @@ files:
137
138
  - ai_artifacts/spec.md
138
139
  - ai_artifacts/todo.md
139
140
  - lib/bible_ref_parser.rb
141
+ - lib/bible_ref_parser/bible_reference.rb
140
142
  - lib/bible_ref_parser/bible_reference_text.rb
141
143
  - lib/bible_ref_parser/books.rb
142
144
  - lib/bible_ref_parser/parser.rb
145
+ - lib/bible_ref_parser/reference_node.rb
143
146
  - lib/bible_ref_parser/segment.rb
147
+ - lib/bible_ref_parser/segment_node.rb
144
148
  - lib/bible_ref_parser/segment_parser.rb
145
149
  - lib/bible_ref_parser/version.rb
146
150
  - sig/bible_ref_parser.rbs
@@ -166,7 +170,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
170
  - !ruby/object:Gem::Version
167
171
  version: '0'
168
172
  requirements: []
169
- rubygems_version: 3.6.2
173
+ rubygems_version: 4.0.8
170
174
  specification_version: 4
171
175
  summary: A parser for Bible references
172
176
  test_files: []