cskit 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +7 -4
  3. data/History.txt +5 -0
  4. data/cskit.gemspec +5 -8
  5. data/lib/cskit.rb +15 -66
  6. data/lib/cskit/annotated_string.rb +1 -1
  7. data/lib/cskit/annotator.rb +1 -3
  8. data/lib/cskit/formatters.rb +3 -4
  9. data/lib/cskit/formatters/bible.rb +4 -5
  10. data/lib/cskit/formatters/bible/bible_html_formatter.rb +2 -2
  11. data/lib/cskit/formatters/bible/bible_json_formatter.rb +17 -0
  12. data/lib/cskit/formatters/bible/bible_plain_text_formatter.rb +3 -3
  13. data/lib/cskit/formatters/science_health.rb +3 -5
  14. data/lib/cskit/formatters/science_health/science_health_html_formatter.rb +3 -4
  15. data/lib/cskit/formatters/science_health/science_health_plain_text_formatter.rb +5 -4
  16. data/lib/cskit/lesson.rb +3 -5
  17. data/lib/cskit/lesson/lesson.rb +3 -3
  18. data/lib/cskit/lesson/section.rb +1 -1
  19. data/lib/cskit/parsers.rb +6 -3
  20. data/lib/cskit/parsers/bible.rb +10 -0
  21. data/lib/cskit/parsers/bible/bible_parser.rb +192 -0
  22. data/lib/cskit/parsers/bible/bible_tokenizer.rb +32 -0
  23. data/lib/cskit/parsers/parser.rb +68 -0
  24. data/lib/cskit/parsers/science_health.rb +10 -0
  25. data/lib/cskit/parsers/science_health/science_health_parser.rb +201 -0
  26. data/lib/cskit/parsers/science_health/science_health_tokenizer.rb +33 -0
  27. data/lib/cskit/parsers/token.rb +17 -0
  28. data/lib/cskit/parsers/tokenizer.rb +43 -0
  29. data/lib/cskit/readers.rb +4 -4
  30. data/lib/cskit/readers/bible_reader.rb +2 -2
  31. data/lib/cskit/readers/reading.rb +8 -1
  32. data/lib/cskit/readers/science_health_reader.rb +8 -8
  33. data/lib/cskit/registry.rb +65 -0
  34. data/lib/cskit/resources/volumes.rb +3 -3
  35. data/lib/cskit/resources/volumes/bible.rb +11 -9
  36. data/lib/cskit/resources/volumes/science_health.rb +10 -9
  37. data/lib/cskit/version.rb +1 -1
  38. data/lib/cskit/volume.rb +1 -1
  39. data/spec/parsers/bible/bible_parser_spec.rb +205 -0
  40. data/spec/parsers/science_health/science_health_parser_spec.rb +153 -0
  41. data/spec/spec_helper.rb +8 -0
  42. metadata +16 -38
  43. data/lib/cskit/parsers/bible/bible.rb +0 -1005
  44. data/lib/cskit/parsers/bible/bible.treetop +0 -64
  45. data/lib/cskit/parsers/bible/nodes.rb +0 -153
  46. data/lib/cskit/parsers/bible/objects.rb +0 -81
  47. data/lib/cskit/parsers/science_health/nodes.rb +0 -82
  48. data/lib/cskit/parsers/science_health/objects.rb +0 -47
  49. data/lib/cskit/parsers/science_health/science_health.rb +0 -607
  50. data/lib/cskit/parsers/science_health/science_health.treetop +0 -44
@@ -0,0 +1,32 @@
1
+ # encoding: UTF-8
2
+
3
+ module CSKit
4
+ module Parsers
5
+ module Bible
6
+
7
+ class BibleTokenizer < CSKit::Parsers::Tokenizer
8
+ PATTERNS = {
9
+ left_paren: /\A\(/,
10
+ right_paren: /\A\)/,
11
+ dash: /\A-/,
12
+ colon: /\A:/,
13
+ semicolon: /\A;/,
14
+ comma: /\A,/,
15
+ to: /\Ato/,
16
+ cardinality: /\A(1st|2nd|3rd|4th)/,
17
+ number: /\A\d+/,
18
+ text: /\A[^\s\(\);:,]+/,
19
+ space: /\A[\s\t]+/
20
+ }
21
+
22
+ private
23
+
24
+ def patterns
25
+ PATTERNS
26
+ end
27
+ end
28
+
29
+ end
30
+ end
31
+ end
32
+
@@ -0,0 +1,68 @@
1
+ # encoding: UTF-8
2
+
3
+ module CSKit
4
+ module Parsers
5
+ class ParserError < StandardError; end
6
+
7
+ class Parser
8
+ def initialize(citation_text)
9
+ @citation_text = citation_text
10
+ @token_stream = get_token_stream
11
+ @current = token_stream.next
12
+ end
13
+
14
+ def parse
15
+ result = entry_point
16
+
17
+ unless eos?
18
+ raise ParserError, "Expected end of input but more input is available "\
19
+ "at position #{current.position}"
20
+ end
21
+
22
+ result
23
+ end
24
+
25
+ def entry_point
26
+ raise NotImplementedError,
27
+ "`#{__method__} must be defined in derived classes"
28
+ end
29
+
30
+ private
31
+
32
+ def get_token_stream
33
+ raise NotImplementedError,
34
+ "`#{__method__} must be defined in derived classes"
35
+ end
36
+
37
+ attr_reader :citation_text, :token_stream, :current
38
+
39
+ def eos?
40
+ token_stream.peek
41
+ false
42
+ rescue StopIteration
43
+ true
44
+ end
45
+
46
+ def eos_token
47
+ @eos_token ||= Token.new(:eos, nil, citation_text.size)
48
+ end
49
+
50
+ def next_token(*token_types)
51
+ if !token_types.include?(current.type)
52
+ raise ParserError, "Expected #{token_types.join(', ')} but got "\
53
+ "#{current.type} ('#{current.value}') at position #{current.position}"
54
+ end
55
+
56
+ if eos?
57
+ if current.type == :eos
58
+ raise(ParserError, 'Unexpected end of input')
59
+ else
60
+ @current = eos_token
61
+ end
62
+ else
63
+ @current = token_stream.next
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,10 @@
1
+ # encoding: UTF-8
2
+
3
+ module CSKit
4
+ module Parsers
5
+ module ScienceHealth
6
+ autoload :ScienceHealthParser, 'cskit/parsers/science_health/science_health_parser'
7
+ autoload :ScienceHealthTokenizer, 'cskit/parsers/science_health/science_health_tokenizer'
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,201 @@
1
+ # encoding: UTF-8
2
+
3
+ module CSKit
4
+ module Parsers
5
+ module ScienceHealth
6
+
7
+ DEFAULT_CARDINALITY = 1
8
+
9
+ Citation = Struct.new(:page, :lines) do
10
+ def to_s
11
+ "#{page}:#{lines.map(&:to_s).join(", ")}"
12
+ end
13
+
14
+ def to_hash
15
+ {
16
+ page: page,
17
+ lines: lines.map(&:to_hash)
18
+ }
19
+ end
20
+ end
21
+
22
+ Line = Struct.new(:start, :finish, :starter, :terminator) do
23
+ def to_s
24
+ str = if finish
25
+ "#{start}-#{finish}"
26
+ else
27
+ start.to_s
28
+ end
29
+
30
+ str << ' (only)' if only?
31
+ str << " #{start_fragment}" if start_fragment
32
+ str
33
+ end
34
+
35
+ def to_hash
36
+ {
37
+ start: start,
38
+ finish: finish,
39
+ starter: starter ? starter.to_hash : nil,
40
+ terminator: terminator ? terminator.to_hash : nil
41
+ }
42
+ end
43
+ end
44
+
45
+ class Positional
46
+ attr_reader :cardinality, :fragment
47
+
48
+ def initialize(cardinality, fragment)
49
+ @cardinality = cardinality
50
+ @fragment = fragment
51
+ end
52
+
53
+ def to_s
54
+ card_s = case cardinality
55
+ when 1 then '1st'
56
+ when 2 then '2nd'
57
+ when 3 then '3rd'
58
+ end
59
+
60
+ if cardinality
61
+ "#{card_s} #{fragment}"
62
+ else
63
+ fragment
64
+ end
65
+ end
66
+
67
+ def to_hash
68
+ {
69
+ cardinality: cardinality,
70
+ fragment: fragment
71
+ }
72
+ end
73
+ end
74
+
75
+ class Starter < Positional
76
+ end
77
+
78
+ class FragmentTerminator < Positional
79
+ end
80
+
81
+ class OnlyTerminator
82
+ def self.instance
83
+ @instance ||= send(:new)
84
+ end
85
+
86
+ def to_hash
87
+ { only: true }
88
+ end
89
+
90
+ private def initialize
91
+ end
92
+ end
93
+
94
+
95
+ class ScienceHealthParser < CSKit::Parsers::Parser
96
+ def entry_point
97
+ page
98
+ end
99
+
100
+ private
101
+
102
+ def get_token_stream
103
+ ScienceHealthTokenizer.new(citation_text).each_token.lazy
104
+ end
105
+
106
+ def page
107
+ page_num = page_number
108
+ next_token(:colon)
109
+ llist = line_list
110
+
111
+ Citation.new(page_num, llist)
112
+ end
113
+
114
+ def page_number
115
+ current.value.tap { next_token(:page_number, :number) }
116
+ end
117
+
118
+ def line_list
119
+ [].tap do |list|
120
+ loop do
121
+ list << line
122
+
123
+ case current.type
124
+ when :comma
125
+ next_token(:comma)
126
+ else
127
+ break
128
+ end
129
+
130
+ break if eos?
131
+ end
132
+ end
133
+ end
134
+
135
+ def line
136
+ start = current.value.tap { next_token(:number) }.to_i
137
+ finish = start
138
+ starter = nil
139
+
140
+ if current.type == :dash
141
+ next_token(:dash)
142
+ finish = current.value.tap { next_token(:number) }.to_i
143
+ end
144
+
145
+ starter = line_starter
146
+ terminator = line_terminator
147
+
148
+ Line.new(start, finish, starter, terminator)
149
+ end
150
+
151
+ def line_starter
152
+ case current.type
153
+ when :text, :cardinality
154
+ card = cardinality
155
+ fragment = current.value
156
+ next_token(:text, :colon, :comma)
157
+ Starter.new(card, fragment)
158
+ end
159
+ end
160
+
161
+ def line_terminator
162
+ if current.type == :left_paren
163
+ next_token(:left_paren)
164
+
165
+ terminator = if current.type == :to
166
+ fragment_terminator
167
+ else
168
+ only_terminator
169
+ end
170
+
171
+ next_token(:right_paren)
172
+ terminator
173
+ end
174
+ end
175
+
176
+ def fragment_terminator
177
+ next_token(:to)
178
+
179
+ card = cardinality
180
+ fragment = current.value
181
+ next_token(:text, :colon, :comma)
182
+
183
+ FragmentTerminator.new(card, fragment)
184
+ end
185
+
186
+ def only_terminator
187
+ next_token(:only)
188
+ OnlyTerminator.instance
189
+ end
190
+
191
+ def cardinality
192
+ if current.type == :cardinality
193
+ current.value.tap { next_token(:cardinality) }.to_i
194
+ else
195
+ DEFAULT_CARDINALITY
196
+ end
197
+ end
198
+ end
199
+ end
200
+ end
201
+ end
@@ -0,0 +1,33 @@
1
+ # encoding: UTF-8
2
+
3
+ module CSKit
4
+ module Parsers
5
+ module ScienceHealth
6
+
7
+ class ScienceHealthTokenizer < CSKit::Parsers::Tokenizer
8
+ PATTERNS = {
9
+ left_paren: /\A\(/,
10
+ right_paren: /\A\)/,
11
+ dash: /\A-/,
12
+ colon: /\A:/,
13
+ comma: /\A,/,
14
+ to: /\Ato/,
15
+ only: /\Aonly(?=\))/,
16
+ cardinality: /\A(1st|2nd|3rd|4th)/,
17
+ page_number: /\A(vii|viii|ix|x|xi|xii)(?=:)/, # must precede a colon
18
+ number: /\A\d+/,
19
+ text: /\A[^\s\(\):,]+/,
20
+ space: /\A[\s\t]+/
21
+ }
22
+
23
+ private
24
+
25
+ def patterns
26
+ PATTERNS
27
+ end
28
+ end
29
+
30
+ end
31
+ end
32
+ end
33
+
@@ -0,0 +1,17 @@
1
+ # encoding: UTF-8
2
+
3
+ module CSKit
4
+ module Parsers
5
+
6
+ class Token
7
+ attr_reader :type, :value, :position
8
+
9
+ def initialize(type, value, position)
10
+ @type = type
11
+ @value = value
12
+ @position = position
13
+ end
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,43 @@
1
+ # encoding: UTF-8
2
+
3
+ module CSKit
4
+ module Parsers
5
+ # base class for tokenizers
6
+ class Tokenizer
7
+ attr_reader :citation
8
+
9
+ def initialize(citation)
10
+ @citation = citation
11
+ end
12
+
13
+ def each_token
14
+ return to_enum(__method__) unless block_given?
15
+
16
+ text = citation.dup
17
+ pos = 0
18
+
19
+ until text.empty?
20
+ patterns.each_pair do |token_type, pattern|
21
+ if match = pattern.match(text)
22
+ unless token_type == :space
23
+ yield Token.new(token_type, match[0], pos)
24
+ end
25
+
26
+ text[0...match[0].size] = ''
27
+ pos += match[0].size
28
+
29
+ break
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def patterns
38
+ raise NotImplementedError,
39
+ "`#{__method__}' must be implemented by derived classes"
40
+ end
41
+ end
42
+ end
43
+ end
data/lib/cskit/readers.rb CHANGED
@@ -2,8 +2,8 @@
2
2
 
3
3
  module CSKit
4
4
  module Readers
5
- autoload :ScienceHealthReader, "cskit/readers/science_health_reader"
6
- autoload :BibleReader, "cskit/readers/bible_reader"
7
- autoload :Reading, "cskit/readers/reading"
5
+ autoload :ScienceHealthReader, 'cskit/readers/science_health_reader'
6
+ autoload :BibleReader, 'cskit/readers/bible_reader'
7
+ autoload :Reading, 'cskit/readers/reading'
8
8
  end
9
- end
9
+ end
@@ -34,7 +34,7 @@ module CSKit
34
34
  end
35
35
  end
36
36
 
37
- protected
37
+ private
38
38
 
39
39
  def map_verse_texts_for(chapter, book_name)
40
40
  result = []
@@ -50,7 +50,7 @@ module CSKit
50
50
  end
51
51
 
52
52
  def convert_book_name(book_name)
53
- volume.unabbreviate_book_name(book_name).downcase.gsub(" ", "_")
53
+ volume.unabbreviate_book_name(book_name).downcase.gsub(' ', '_')
54
54
  end
55
55
 
56
56
  end