cskit 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +7 -4
- data/History.txt +5 -0
- data/cskit.gemspec +5 -8
- data/lib/cskit.rb +15 -66
- data/lib/cskit/annotated_string.rb +1 -1
- data/lib/cskit/annotator.rb +1 -3
- data/lib/cskit/formatters.rb +3 -4
- data/lib/cskit/formatters/bible.rb +4 -5
- data/lib/cskit/formatters/bible/bible_html_formatter.rb +2 -2
- data/lib/cskit/formatters/bible/bible_json_formatter.rb +17 -0
- data/lib/cskit/formatters/bible/bible_plain_text_formatter.rb +3 -3
- data/lib/cskit/formatters/science_health.rb +3 -5
- data/lib/cskit/formatters/science_health/science_health_html_formatter.rb +3 -4
- data/lib/cskit/formatters/science_health/science_health_plain_text_formatter.rb +5 -4
- data/lib/cskit/lesson.rb +3 -5
- data/lib/cskit/lesson/lesson.rb +3 -3
- data/lib/cskit/lesson/section.rb +1 -1
- data/lib/cskit/parsers.rb +6 -3
- data/lib/cskit/parsers/bible.rb +10 -0
- data/lib/cskit/parsers/bible/bible_parser.rb +192 -0
- data/lib/cskit/parsers/bible/bible_tokenizer.rb +32 -0
- data/lib/cskit/parsers/parser.rb +68 -0
- data/lib/cskit/parsers/science_health.rb +10 -0
- data/lib/cskit/parsers/science_health/science_health_parser.rb +201 -0
- data/lib/cskit/parsers/science_health/science_health_tokenizer.rb +33 -0
- data/lib/cskit/parsers/token.rb +17 -0
- data/lib/cskit/parsers/tokenizer.rb +43 -0
- data/lib/cskit/readers.rb +4 -4
- data/lib/cskit/readers/bible_reader.rb +2 -2
- data/lib/cskit/readers/reading.rb +8 -1
- data/lib/cskit/readers/science_health_reader.rb +8 -8
- data/lib/cskit/registry.rb +65 -0
- data/lib/cskit/resources/volumes.rb +3 -3
- data/lib/cskit/resources/volumes/bible.rb +11 -9
- data/lib/cskit/resources/volumes/science_health.rb +10 -9
- data/lib/cskit/version.rb +1 -1
- data/lib/cskit/volume.rb +1 -1
- data/spec/parsers/bible/bible_parser_spec.rb +205 -0
- data/spec/parsers/science_health/science_health_parser_spec.rb +153 -0
- data/spec/spec_helper.rb +8 -0
- metadata +16 -38
- data/lib/cskit/parsers/bible/bible.rb +0 -1005
- data/lib/cskit/parsers/bible/bible.treetop +0 -64
- data/lib/cskit/parsers/bible/nodes.rb +0 -153
- data/lib/cskit/parsers/bible/objects.rb +0 -81
- data/lib/cskit/parsers/science_health/nodes.rb +0 -82
- data/lib/cskit/parsers/science_health/objects.rb +0 -47
- data/lib/cskit/parsers/science_health/science_health.rb +0 -607
- data/lib/cskit/parsers/science_health/science_health.treetop +0 -44
@@ -0,0 +1,32 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CSKit
|
4
|
+
module Parsers
|
5
|
+
module Bible
|
6
|
+
|
7
|
+
class BibleTokenizer < CSKit::Parsers::Tokenizer
|
8
|
+
PATTERNS = {
|
9
|
+
left_paren: /\A\(/,
|
10
|
+
right_paren: /\A\)/,
|
11
|
+
dash: /\A-/,
|
12
|
+
colon: /\A:/,
|
13
|
+
semicolon: /\A;/,
|
14
|
+
comma: /\A,/,
|
15
|
+
to: /\Ato/,
|
16
|
+
cardinality: /\A(1st|2nd|3rd|4th)/,
|
17
|
+
number: /\A\d+/,
|
18
|
+
text: /\A[^\s\(\);:,]+/,
|
19
|
+
space: /\A[\s\t]+/
|
20
|
+
}
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def patterns
|
25
|
+
PATTERNS
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CSKit
|
4
|
+
module Parsers
|
5
|
+
class ParserError < StandardError; end
|
6
|
+
|
7
|
+
class Parser
|
8
|
+
def initialize(citation_text)
|
9
|
+
@citation_text = citation_text
|
10
|
+
@token_stream = get_token_stream
|
11
|
+
@current = token_stream.next
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse
|
15
|
+
result = entry_point
|
16
|
+
|
17
|
+
unless eos?
|
18
|
+
raise ParserError, "Expected end of input but more input is available "\
|
19
|
+
"at position #{current.position}"
|
20
|
+
end
|
21
|
+
|
22
|
+
result
|
23
|
+
end
|
24
|
+
|
25
|
+
def entry_point
|
26
|
+
raise NotImplementedError,
|
27
|
+
"`#{__method__} must be defined in derived classes"
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def get_token_stream
|
33
|
+
raise NotImplementedError,
|
34
|
+
"`#{__method__} must be defined in derived classes"
|
35
|
+
end
|
36
|
+
|
37
|
+
attr_reader :citation_text, :token_stream, :current
|
38
|
+
|
39
|
+
def eos?
|
40
|
+
token_stream.peek
|
41
|
+
false
|
42
|
+
rescue StopIteration
|
43
|
+
true
|
44
|
+
end
|
45
|
+
|
46
|
+
def eos_token
|
47
|
+
@eos_token ||= Token.new(:eos, nil, citation_text.size)
|
48
|
+
end
|
49
|
+
|
50
|
+
def next_token(*token_types)
|
51
|
+
if !token_types.include?(current.type)
|
52
|
+
raise ParserError, "Expected #{token_types.join(', ')} but got "\
|
53
|
+
"#{current.type} ('#{current.value}') at position #{current.position}"
|
54
|
+
end
|
55
|
+
|
56
|
+
if eos?
|
57
|
+
if current.type == :eos
|
58
|
+
raise(ParserError, 'Unexpected end of input')
|
59
|
+
else
|
60
|
+
@current = eos_token
|
61
|
+
end
|
62
|
+
else
|
63
|
+
@current = token_stream.next
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CSKit
|
4
|
+
module Parsers
|
5
|
+
module ScienceHealth
|
6
|
+
autoload :ScienceHealthParser, 'cskit/parsers/science_health/science_health_parser'
|
7
|
+
autoload :ScienceHealthTokenizer, 'cskit/parsers/science_health/science_health_tokenizer'
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,201 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CSKit
|
4
|
+
module Parsers
|
5
|
+
module ScienceHealth
|
6
|
+
|
7
|
+
DEFAULT_CARDINALITY = 1
|
8
|
+
|
9
|
+
Citation = Struct.new(:page, :lines) do
|
10
|
+
def to_s
|
11
|
+
"#{page}:#{lines.map(&:to_s).join(", ")}"
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_hash
|
15
|
+
{
|
16
|
+
page: page,
|
17
|
+
lines: lines.map(&:to_hash)
|
18
|
+
}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
Line = Struct.new(:start, :finish, :starter, :terminator) do
|
23
|
+
def to_s
|
24
|
+
str = if finish
|
25
|
+
"#{start}-#{finish}"
|
26
|
+
else
|
27
|
+
start.to_s
|
28
|
+
end
|
29
|
+
|
30
|
+
str << ' (only)' if only?
|
31
|
+
str << " #{start_fragment}" if start_fragment
|
32
|
+
str
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_hash
|
36
|
+
{
|
37
|
+
start: start,
|
38
|
+
finish: finish,
|
39
|
+
starter: starter ? starter.to_hash : nil,
|
40
|
+
terminator: terminator ? terminator.to_hash : nil
|
41
|
+
}
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class Positional
|
46
|
+
attr_reader :cardinality, :fragment
|
47
|
+
|
48
|
+
def initialize(cardinality, fragment)
|
49
|
+
@cardinality = cardinality
|
50
|
+
@fragment = fragment
|
51
|
+
end
|
52
|
+
|
53
|
+
def to_s
|
54
|
+
card_s = case cardinality
|
55
|
+
when 1 then '1st'
|
56
|
+
when 2 then '2nd'
|
57
|
+
when 3 then '3rd'
|
58
|
+
end
|
59
|
+
|
60
|
+
if cardinality
|
61
|
+
"#{card_s} #{fragment}"
|
62
|
+
else
|
63
|
+
fragment
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def to_hash
|
68
|
+
{
|
69
|
+
cardinality: cardinality,
|
70
|
+
fragment: fragment
|
71
|
+
}
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class Starter < Positional
|
76
|
+
end
|
77
|
+
|
78
|
+
class FragmentTerminator < Positional
|
79
|
+
end
|
80
|
+
|
81
|
+
class OnlyTerminator
|
82
|
+
def self.instance
|
83
|
+
@instance ||= send(:new)
|
84
|
+
end
|
85
|
+
|
86
|
+
def to_hash
|
87
|
+
{ only: true }
|
88
|
+
end
|
89
|
+
|
90
|
+
private def initialize
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
class ScienceHealthParser < CSKit::Parsers::Parser
|
96
|
+
def entry_point
|
97
|
+
page
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
def get_token_stream
|
103
|
+
ScienceHealthTokenizer.new(citation_text).each_token.lazy
|
104
|
+
end
|
105
|
+
|
106
|
+
def page
|
107
|
+
page_num = page_number
|
108
|
+
next_token(:colon)
|
109
|
+
llist = line_list
|
110
|
+
|
111
|
+
Citation.new(page_num, llist)
|
112
|
+
end
|
113
|
+
|
114
|
+
def page_number
|
115
|
+
current.value.tap { next_token(:page_number, :number) }
|
116
|
+
end
|
117
|
+
|
118
|
+
def line_list
|
119
|
+
[].tap do |list|
|
120
|
+
loop do
|
121
|
+
list << line
|
122
|
+
|
123
|
+
case current.type
|
124
|
+
when :comma
|
125
|
+
next_token(:comma)
|
126
|
+
else
|
127
|
+
break
|
128
|
+
end
|
129
|
+
|
130
|
+
break if eos?
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def line
|
136
|
+
start = current.value.tap { next_token(:number) }.to_i
|
137
|
+
finish = start
|
138
|
+
starter = nil
|
139
|
+
|
140
|
+
if current.type == :dash
|
141
|
+
next_token(:dash)
|
142
|
+
finish = current.value.tap { next_token(:number) }.to_i
|
143
|
+
end
|
144
|
+
|
145
|
+
starter = line_starter
|
146
|
+
terminator = line_terminator
|
147
|
+
|
148
|
+
Line.new(start, finish, starter, terminator)
|
149
|
+
end
|
150
|
+
|
151
|
+
def line_starter
|
152
|
+
case current.type
|
153
|
+
when :text, :cardinality
|
154
|
+
card = cardinality
|
155
|
+
fragment = current.value
|
156
|
+
next_token(:text, :colon, :comma)
|
157
|
+
Starter.new(card, fragment)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def line_terminator
|
162
|
+
if current.type == :left_paren
|
163
|
+
next_token(:left_paren)
|
164
|
+
|
165
|
+
terminator = if current.type == :to
|
166
|
+
fragment_terminator
|
167
|
+
else
|
168
|
+
only_terminator
|
169
|
+
end
|
170
|
+
|
171
|
+
next_token(:right_paren)
|
172
|
+
terminator
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def fragment_terminator
|
177
|
+
next_token(:to)
|
178
|
+
|
179
|
+
card = cardinality
|
180
|
+
fragment = current.value
|
181
|
+
next_token(:text, :colon, :comma)
|
182
|
+
|
183
|
+
FragmentTerminator.new(card, fragment)
|
184
|
+
end
|
185
|
+
|
186
|
+
def only_terminator
|
187
|
+
next_token(:only)
|
188
|
+
OnlyTerminator.instance
|
189
|
+
end
|
190
|
+
|
191
|
+
def cardinality
|
192
|
+
if current.type == :cardinality
|
193
|
+
current.value.tap { next_token(:cardinality) }.to_i
|
194
|
+
else
|
195
|
+
DEFAULT_CARDINALITY
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CSKit
|
4
|
+
module Parsers
|
5
|
+
module ScienceHealth
|
6
|
+
|
7
|
+
class ScienceHealthTokenizer < CSKit::Parsers::Tokenizer
|
8
|
+
PATTERNS = {
|
9
|
+
left_paren: /\A\(/,
|
10
|
+
right_paren: /\A\)/,
|
11
|
+
dash: /\A-/,
|
12
|
+
colon: /\A:/,
|
13
|
+
comma: /\A,/,
|
14
|
+
to: /\Ato/,
|
15
|
+
only: /\Aonly(?=\))/,
|
16
|
+
cardinality: /\A(1st|2nd|3rd|4th)/,
|
17
|
+
page_number: /\A(vii|viii|ix|x|xi|xii)(?=:)/, # must precede a colon
|
18
|
+
number: /\A\d+/,
|
19
|
+
text: /\A[^\s\(\):,]+/,
|
20
|
+
space: /\A[\s\t]+/
|
21
|
+
}
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def patterns
|
26
|
+
PATTERNS
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CSKit
|
4
|
+
module Parsers
|
5
|
+
# base class for tokenizers
|
6
|
+
class Tokenizer
|
7
|
+
attr_reader :citation
|
8
|
+
|
9
|
+
def initialize(citation)
|
10
|
+
@citation = citation
|
11
|
+
end
|
12
|
+
|
13
|
+
def each_token
|
14
|
+
return to_enum(__method__) unless block_given?
|
15
|
+
|
16
|
+
text = citation.dup
|
17
|
+
pos = 0
|
18
|
+
|
19
|
+
until text.empty?
|
20
|
+
patterns.each_pair do |token_type, pattern|
|
21
|
+
if match = pattern.match(text)
|
22
|
+
unless token_type == :space
|
23
|
+
yield Token.new(token_type, match[0], pos)
|
24
|
+
end
|
25
|
+
|
26
|
+
text[0...match[0].size] = ''
|
27
|
+
pos += match[0].size
|
28
|
+
|
29
|
+
break
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def patterns
|
38
|
+
raise NotImplementedError,
|
39
|
+
"`#{__method__}' must be implemented by derived classes"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/cskit/readers.rb
CHANGED
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
module CSKit
|
4
4
|
module Readers
|
5
|
-
autoload :ScienceHealthReader,
|
6
|
-
autoload :BibleReader,
|
7
|
-
autoload :Reading,
|
5
|
+
autoload :ScienceHealthReader, 'cskit/readers/science_health_reader'
|
6
|
+
autoload :BibleReader, 'cskit/readers/bible_reader'
|
7
|
+
autoload :Reading, 'cskit/readers/reading'
|
8
8
|
end
|
9
|
-
end
|
9
|
+
end
|
@@ -34,7 +34,7 @@ module CSKit
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
-
|
37
|
+
private
|
38
38
|
|
39
39
|
def map_verse_texts_for(chapter, book_name)
|
40
40
|
result = []
|
@@ -50,7 +50,7 @@ module CSKit
|
|
50
50
|
end
|
51
51
|
|
52
52
|
def convert_book_name(book_name)
|
53
|
-
volume.unabbreviate_book_name(book_name).downcase.gsub(
|
53
|
+
volume.unabbreviate_book_name(book_name).downcase.gsub(' ', '_')
|
54
54
|
end
|
55
55
|
|
56
56
|
end
|