cskit 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +7 -4
  3. data/History.txt +5 -0
  4. data/cskit.gemspec +5 -8
  5. data/lib/cskit.rb +15 -66
  6. data/lib/cskit/annotated_string.rb +1 -1
  7. data/lib/cskit/annotator.rb +1 -3
  8. data/lib/cskit/formatters.rb +3 -4
  9. data/lib/cskit/formatters/bible.rb +4 -5
  10. data/lib/cskit/formatters/bible/bible_html_formatter.rb +2 -2
  11. data/lib/cskit/formatters/bible/bible_json_formatter.rb +17 -0
  12. data/lib/cskit/formatters/bible/bible_plain_text_formatter.rb +3 -3
  13. data/lib/cskit/formatters/science_health.rb +3 -5
  14. data/lib/cskit/formatters/science_health/science_health_html_formatter.rb +3 -4
  15. data/lib/cskit/formatters/science_health/science_health_plain_text_formatter.rb +5 -4
  16. data/lib/cskit/lesson.rb +3 -5
  17. data/lib/cskit/lesson/lesson.rb +3 -3
  18. data/lib/cskit/lesson/section.rb +1 -1
  19. data/lib/cskit/parsers.rb +6 -3
  20. data/lib/cskit/parsers/bible.rb +10 -0
  21. data/lib/cskit/parsers/bible/bible_parser.rb +192 -0
  22. data/lib/cskit/parsers/bible/bible_tokenizer.rb +32 -0
  23. data/lib/cskit/parsers/parser.rb +68 -0
  24. data/lib/cskit/parsers/science_health.rb +10 -0
  25. data/lib/cskit/parsers/science_health/science_health_parser.rb +201 -0
  26. data/lib/cskit/parsers/science_health/science_health_tokenizer.rb +33 -0
  27. data/lib/cskit/parsers/token.rb +17 -0
  28. data/lib/cskit/parsers/tokenizer.rb +43 -0
  29. data/lib/cskit/readers.rb +4 -4
  30. data/lib/cskit/readers/bible_reader.rb +2 -2
  31. data/lib/cskit/readers/reading.rb +8 -1
  32. data/lib/cskit/readers/science_health_reader.rb +8 -8
  33. data/lib/cskit/registry.rb +65 -0
  34. data/lib/cskit/resources/volumes.rb +3 -3
  35. data/lib/cskit/resources/volumes/bible.rb +11 -9
  36. data/lib/cskit/resources/volumes/science_health.rb +10 -9
  37. data/lib/cskit/version.rb +1 -1
  38. data/lib/cskit/volume.rb +1 -1
  39. data/spec/parsers/bible/bible_parser_spec.rb +205 -0
  40. data/spec/parsers/science_health/science_health_parser_spec.rb +153 -0
  41. data/spec/spec_helper.rb +8 -0
  42. metadata +16 -38
  43. data/lib/cskit/parsers/bible/bible.rb +0 -1005
  44. data/lib/cskit/parsers/bible/bible.treetop +0 -64
  45. data/lib/cskit/parsers/bible/nodes.rb +0 -153
  46. data/lib/cskit/parsers/bible/objects.rb +0 -81
  47. data/lib/cskit/parsers/science_health/nodes.rb +0 -82
  48. data/lib/cskit/parsers/science_health/objects.rb +0 -47
  49. data/lib/cskit/parsers/science_health/science_health.rb +0 -607
  50. data/lib/cskit/parsers/science_health/science_health.treetop +0 -44
@@ -0,0 +1,32 @@
1
+ # encoding: UTF-8
2
+
3
+ module CSKit
4
+ module Parsers
5
+ module Bible
6
+
7
+ class BibleTokenizer < CSKit::Parsers::Tokenizer
8
+ PATTERNS = {
9
+ left_paren: /\A\(/,
10
+ right_paren: /\A\)/,
11
+ dash: /\A-/,
12
+ colon: /\A:/,
13
+ semicolon: /\A;/,
14
+ comma: /\A,/,
15
+ to: /\Ato/,
16
+ cardinality: /\A(1st|2nd|3rd|4th)/,
17
+ number: /\A\d+/,
18
+ text: /\A[^\s\(\);:,]+/,
19
+ space: /\A[\s\t]+/
20
+ }
21
+
22
+ private
23
+
24
+ def patterns
25
+ PATTERNS
26
+ end
27
+ end
28
+
29
+ end
30
+ end
31
+ end
32
+
@@ -0,0 +1,68 @@
1
+ # encoding: UTF-8
2
+
3
+ module CSKit
4
+ module Parsers
5
+ class ParserError < StandardError; end
6
+
7
+ class Parser
8
+ def initialize(citation_text)
9
+ @citation_text = citation_text
10
+ @token_stream = get_token_stream
11
+ @current = token_stream.next
12
+ end
13
+
14
+ def parse
15
+ result = entry_point
16
+
17
+ unless eos?
18
+ raise ParserError, "Expected end of input but more input is available "\
19
+ "at position #{current.position}"
20
+ end
21
+
22
+ result
23
+ end
24
+
25
+ def entry_point
26
+ raise NotImplementedError,
27
+ "`#{__method__} must be defined in derived classes"
28
+ end
29
+
30
+ private
31
+
32
+ def get_token_stream
33
+ raise NotImplementedError,
34
+ "`#{__method__} must be defined in derived classes"
35
+ end
36
+
37
+ attr_reader :citation_text, :token_stream, :current
38
+
39
+ def eos?
40
+ token_stream.peek
41
+ false
42
+ rescue StopIteration
43
+ true
44
+ end
45
+
46
+ def eos_token
47
+ @eos_token ||= Token.new(:eos, nil, citation_text.size)
48
+ end
49
+
50
+ def next_token(*token_types)
51
+ if !token_types.include?(current.type)
52
+ raise ParserError, "Expected #{token_types.join(', ')} but got "\
53
+ "#{current.type} ('#{current.value}') at position #{current.position}"
54
+ end
55
+
56
+ if eos?
57
+ if current.type == :eos
58
+ raise(ParserError, 'Unexpected end of input')
59
+ else
60
+ @current = eos_token
61
+ end
62
+ else
63
+ @current = token_stream.next
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,10 @@
1
+ # encoding: UTF-8
2
+
3
+ module CSKit
4
+ module Parsers
5
+ module ScienceHealth
6
+ autoload :ScienceHealthParser, 'cskit/parsers/science_health/science_health_parser'
7
+ autoload :ScienceHealthTokenizer, 'cskit/parsers/science_health/science_health_tokenizer'
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,201 @@
1
+ # encoding: UTF-8
2
+
3
+ module CSKit
4
+ module Parsers
5
+ module ScienceHealth
6
+
7
+ DEFAULT_CARDINALITY = 1
8
+
9
+ Citation = Struct.new(:page, :lines) do
10
+ def to_s
11
+ "#{page}:#{lines.map(&:to_s).join(", ")}"
12
+ end
13
+
14
+ def to_hash
15
+ {
16
+ page: page,
17
+ lines: lines.map(&:to_hash)
18
+ }
19
+ end
20
+ end
21
+
22
+ Line = Struct.new(:start, :finish, :starter, :terminator) do
23
+ def to_s
24
+ str = if finish
25
+ "#{start}-#{finish}"
26
+ else
27
+ start.to_s
28
+ end
29
+
30
+ str << ' (only)' if only?
31
+ str << " #{start_fragment}" if start_fragment
32
+ str
33
+ end
34
+
35
+ def to_hash
36
+ {
37
+ start: start,
38
+ finish: finish,
39
+ starter: starter ? starter.to_hash : nil,
40
+ terminator: terminator ? terminator.to_hash : nil
41
+ }
42
+ end
43
+ end
44
+
45
+ class Positional
46
+ attr_reader :cardinality, :fragment
47
+
48
+ def initialize(cardinality, fragment)
49
+ @cardinality = cardinality
50
+ @fragment = fragment
51
+ end
52
+
53
+ def to_s
54
+ card_s = case cardinality
55
+ when 1 then '1st'
56
+ when 2 then '2nd'
57
+ when 3 then '3rd'
58
+ end
59
+
60
+ if cardinality
61
+ "#{card_s} #{fragment}"
62
+ else
63
+ fragment
64
+ end
65
+ end
66
+
67
+ def to_hash
68
+ {
69
+ cardinality: cardinality,
70
+ fragment: fragment
71
+ }
72
+ end
73
+ end
74
+
75
+ class Starter < Positional
76
+ end
77
+
78
+ class FragmentTerminator < Positional
79
+ end
80
+
81
+ class OnlyTerminator
82
+ def self.instance
83
+ @instance ||= send(:new)
84
+ end
85
+
86
+ def to_hash
87
+ { only: true }
88
+ end
89
+
90
+ private def initialize
91
+ end
92
+ end
93
+
94
+
95
+ class ScienceHealthParser < CSKit::Parsers::Parser
96
+ def entry_point
97
+ page
98
+ end
99
+
100
+ private
101
+
102
+ def get_token_stream
103
+ ScienceHealthTokenizer.new(citation_text).each_token.lazy
104
+ end
105
+
106
+ def page
107
+ page_num = page_number
108
+ next_token(:colon)
109
+ llist = line_list
110
+
111
+ Citation.new(page_num, llist)
112
+ end
113
+
114
+ def page_number
115
+ current.value.tap { next_token(:page_number, :number) }
116
+ end
117
+
118
+ def line_list
119
+ [].tap do |list|
120
+ loop do
121
+ list << line
122
+
123
+ case current.type
124
+ when :comma
125
+ next_token(:comma)
126
+ else
127
+ break
128
+ end
129
+
130
+ break if eos?
131
+ end
132
+ end
133
+ end
134
+
135
+ def line
136
+ start = current.value.tap { next_token(:number) }.to_i
137
+ finish = start
138
+ starter = nil
139
+
140
+ if current.type == :dash
141
+ next_token(:dash)
142
+ finish = current.value.tap { next_token(:number) }.to_i
143
+ end
144
+
145
+ starter = line_starter
146
+ terminator = line_terminator
147
+
148
+ Line.new(start, finish, starter, terminator)
149
+ end
150
+
151
+ def line_starter
152
+ case current.type
153
+ when :text, :cardinality
154
+ card = cardinality
155
+ fragment = current.value
156
+ next_token(:text, :colon, :comma)
157
+ Starter.new(card, fragment)
158
+ end
159
+ end
160
+
161
+ def line_terminator
162
+ if current.type == :left_paren
163
+ next_token(:left_paren)
164
+
165
+ terminator = if current.type == :to
166
+ fragment_terminator
167
+ else
168
+ only_terminator
169
+ end
170
+
171
+ next_token(:right_paren)
172
+ terminator
173
+ end
174
+ end
175
+
176
+ def fragment_terminator
177
+ next_token(:to)
178
+
179
+ card = cardinality
180
+ fragment = current.value
181
+ next_token(:text, :colon, :comma)
182
+
183
+ FragmentTerminator.new(card, fragment)
184
+ end
185
+
186
+ def only_terminator
187
+ next_token(:only)
188
+ OnlyTerminator.instance
189
+ end
190
+
191
+ def cardinality
192
+ if current.type == :cardinality
193
+ current.value.tap { next_token(:cardinality) }.to_i
194
+ else
195
+ DEFAULT_CARDINALITY
196
+ end
197
+ end
198
+ end
199
+ end
200
+ end
201
+ end
@@ -0,0 +1,33 @@
1
+ # encoding: UTF-8
2
+
3
+ module CSKit
4
+ module Parsers
5
+ module ScienceHealth
6
+
7
+ class ScienceHealthTokenizer < CSKit::Parsers::Tokenizer
8
+ PATTERNS = {
9
+ left_paren: /\A\(/,
10
+ right_paren: /\A\)/,
11
+ dash: /\A-/,
12
+ colon: /\A:/,
13
+ comma: /\A,/,
14
+ to: /\Ato/,
15
+ only: /\Aonly(?=\))/,
16
+ cardinality: /\A(1st|2nd|3rd|4th)/,
17
+ page_number: /\A(vii|viii|ix|x|xi|xii)(?=:)/, # must precede a colon
18
+ number: /\A\d+/,
19
+ text: /\A[^\s\(\):,]+/,
20
+ space: /\A[\s\t]+/
21
+ }
22
+
23
+ private
24
+
25
+ def patterns
26
+ PATTERNS
27
+ end
28
+ end
29
+
30
+ end
31
+ end
32
+ end
33
+
@@ -0,0 +1,17 @@
1
+ # encoding: UTF-8
2
+
3
+ module CSKit
4
+ module Parsers
5
+
6
+ class Token
7
+ attr_reader :type, :value, :position
8
+
9
+ def initialize(type, value, position)
10
+ @type = type
11
+ @value = value
12
+ @position = position
13
+ end
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,43 @@
1
+ # encoding: UTF-8
2
+
3
+ module CSKit
4
+ module Parsers
5
+ # base class for tokenizers
6
+ class Tokenizer
7
+ attr_reader :citation
8
+
9
+ def initialize(citation)
10
+ @citation = citation
11
+ end
12
+
13
+ def each_token
14
+ return to_enum(__method__) unless block_given?
15
+
16
+ text = citation.dup
17
+ pos = 0
18
+
19
+ until text.empty?
20
+ patterns.each_pair do |token_type, pattern|
21
+ if match = pattern.match(text)
22
+ unless token_type == :space
23
+ yield Token.new(token_type, match[0], pos)
24
+ end
25
+
26
+ text[0...match[0].size] = ''
27
+ pos += match[0].size
28
+
29
+ break
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def patterns
38
+ raise NotImplementedError,
39
+ "`#{__method__}' must be implemented by derived classes"
40
+ end
41
+ end
42
+ end
43
+ end
data/lib/cskit/readers.rb CHANGED
@@ -2,8 +2,8 @@
2
2
 
3
3
  module CSKit
4
4
  module Readers
5
- autoload :ScienceHealthReader, "cskit/readers/science_health_reader"
6
- autoload :BibleReader, "cskit/readers/bible_reader"
7
- autoload :Reading, "cskit/readers/reading"
5
+ autoload :ScienceHealthReader, 'cskit/readers/science_health_reader'
6
+ autoload :BibleReader, 'cskit/readers/bible_reader'
7
+ autoload :Reading, 'cskit/readers/reading'
8
8
  end
9
- end
9
+ end
@@ -34,7 +34,7 @@ module CSKit
34
34
  end
35
35
  end
36
36
 
37
- protected
37
+ private
38
38
 
39
39
  def map_verse_texts_for(chapter, book_name)
40
40
  result = []
@@ -50,7 +50,7 @@ module CSKit
50
50
  end
51
51
 
52
52
  def convert_book_name(book_name)
53
- volume.unabbreviate_book_name(book_name).downcase.gsub(" ", "_")
53
+ volume.unabbreviate_book_name(book_name).downcase.gsub(' ', '_')
54
54
  end
55
55
 
56
56
  end