cskit 1.0.1 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +7 -4
- data/History.txt +5 -0
- data/cskit.gemspec +5 -8
- data/lib/cskit.rb +15 -66
- data/lib/cskit/annotated_string.rb +1 -1
- data/lib/cskit/annotator.rb +1 -3
- data/lib/cskit/formatters.rb +3 -4
- data/lib/cskit/formatters/bible.rb +4 -5
- data/lib/cskit/formatters/bible/bible_html_formatter.rb +2 -2
- data/lib/cskit/formatters/bible/bible_json_formatter.rb +17 -0
- data/lib/cskit/formatters/bible/bible_plain_text_formatter.rb +3 -3
- data/lib/cskit/formatters/science_health.rb +3 -5
- data/lib/cskit/formatters/science_health/science_health_html_formatter.rb +3 -4
- data/lib/cskit/formatters/science_health/science_health_plain_text_formatter.rb +5 -4
- data/lib/cskit/lesson.rb +3 -5
- data/lib/cskit/lesson/lesson.rb +3 -3
- data/lib/cskit/lesson/section.rb +1 -1
- data/lib/cskit/parsers.rb +6 -3
- data/lib/cskit/parsers/bible.rb +10 -0
- data/lib/cskit/parsers/bible/bible_parser.rb +192 -0
- data/lib/cskit/parsers/bible/bible_tokenizer.rb +32 -0
- data/lib/cskit/parsers/parser.rb +68 -0
- data/lib/cskit/parsers/science_health.rb +10 -0
- data/lib/cskit/parsers/science_health/science_health_parser.rb +201 -0
- data/lib/cskit/parsers/science_health/science_health_tokenizer.rb +33 -0
- data/lib/cskit/parsers/token.rb +17 -0
- data/lib/cskit/parsers/tokenizer.rb +43 -0
- data/lib/cskit/readers.rb +4 -4
- data/lib/cskit/readers/bible_reader.rb +2 -2
- data/lib/cskit/readers/reading.rb +8 -1
- data/lib/cskit/readers/science_health_reader.rb +8 -8
- data/lib/cskit/registry.rb +65 -0
- data/lib/cskit/resources/volumes.rb +3 -3
- data/lib/cskit/resources/volumes/bible.rb +11 -9
- data/lib/cskit/resources/volumes/science_health.rb +10 -9
- data/lib/cskit/version.rb +1 -1
- data/lib/cskit/volume.rb +1 -1
- data/spec/parsers/bible/bible_parser_spec.rb +205 -0
- data/spec/parsers/science_health/science_health_parser_spec.rb +153 -0
- data/spec/spec_helper.rb +8 -0
- metadata +16 -38
- data/lib/cskit/parsers/bible/bible.rb +0 -1005
- data/lib/cskit/parsers/bible/bible.treetop +0 -64
- data/lib/cskit/parsers/bible/nodes.rb +0 -153
- data/lib/cskit/parsers/bible/objects.rb +0 -81
- data/lib/cskit/parsers/science_health/nodes.rb +0 -82
- data/lib/cskit/parsers/science_health/objects.rb +0 -47
- data/lib/cskit/parsers/science_health/science_health.rb +0 -607
- data/lib/cskit/parsers/science_health/science_health.treetop +0 -44
@@ -0,0 +1,32 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CSKit
|
4
|
+
module Parsers
|
5
|
+
module Bible
|
6
|
+
|
7
|
+
class BibleTokenizer < CSKit::Parsers::Tokenizer
|
8
|
+
PATTERNS = {
|
9
|
+
left_paren: /\A\(/,
|
10
|
+
right_paren: /\A\)/,
|
11
|
+
dash: /\A-/,
|
12
|
+
colon: /\A:/,
|
13
|
+
semicolon: /\A;/,
|
14
|
+
comma: /\A,/,
|
15
|
+
to: /\Ato/,
|
16
|
+
cardinality: /\A(1st|2nd|3rd|4th)/,
|
17
|
+
number: /\A\d+/,
|
18
|
+
text: /\A[^\s\(\);:,]+/,
|
19
|
+
space: /\A[\s\t]+/
|
20
|
+
}
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def patterns
|
25
|
+
PATTERNS
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CSKit
|
4
|
+
module Parsers
|
5
|
+
class ParserError < StandardError; end
|
6
|
+
|
7
|
+
class Parser
|
8
|
+
def initialize(citation_text)
|
9
|
+
@citation_text = citation_text
|
10
|
+
@token_stream = get_token_stream
|
11
|
+
@current = token_stream.next
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse
|
15
|
+
result = entry_point
|
16
|
+
|
17
|
+
unless eos?
|
18
|
+
raise ParserError, "Expected end of input but more input is available "\
|
19
|
+
"at position #{current.position}"
|
20
|
+
end
|
21
|
+
|
22
|
+
result
|
23
|
+
end
|
24
|
+
|
25
|
+
def entry_point
|
26
|
+
raise NotImplementedError,
|
27
|
+
"`#{__method__} must be defined in derived classes"
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def get_token_stream
|
33
|
+
raise NotImplementedError,
|
34
|
+
"`#{__method__} must be defined in derived classes"
|
35
|
+
end
|
36
|
+
|
37
|
+
attr_reader :citation_text, :token_stream, :current
|
38
|
+
|
39
|
+
def eos?
|
40
|
+
token_stream.peek
|
41
|
+
false
|
42
|
+
rescue StopIteration
|
43
|
+
true
|
44
|
+
end
|
45
|
+
|
46
|
+
def eos_token
|
47
|
+
@eos_token ||= Token.new(:eos, nil, citation_text.size)
|
48
|
+
end
|
49
|
+
|
50
|
+
def next_token(*token_types)
|
51
|
+
if !token_types.include?(current.type)
|
52
|
+
raise ParserError, "Expected #{token_types.join(', ')} but got "\
|
53
|
+
"#{current.type} ('#{current.value}') at position #{current.position}"
|
54
|
+
end
|
55
|
+
|
56
|
+
if eos?
|
57
|
+
if current.type == :eos
|
58
|
+
raise(ParserError, 'Unexpected end of input')
|
59
|
+
else
|
60
|
+
@current = eos_token
|
61
|
+
end
|
62
|
+
else
|
63
|
+
@current = token_stream.next
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CSKit
|
4
|
+
module Parsers
|
5
|
+
module ScienceHealth
|
6
|
+
autoload :ScienceHealthParser, 'cskit/parsers/science_health/science_health_parser'
|
7
|
+
autoload :ScienceHealthTokenizer, 'cskit/parsers/science_health/science_health_tokenizer'
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,201 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CSKit
|
4
|
+
module Parsers
|
5
|
+
module ScienceHealth
|
6
|
+
|
7
|
+
DEFAULT_CARDINALITY = 1
|
8
|
+
|
9
|
+
Citation = Struct.new(:page, :lines) do
|
10
|
+
def to_s
|
11
|
+
"#{page}:#{lines.map(&:to_s).join(", ")}"
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_hash
|
15
|
+
{
|
16
|
+
page: page,
|
17
|
+
lines: lines.map(&:to_hash)
|
18
|
+
}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
Line = Struct.new(:start, :finish, :starter, :terminator) do
|
23
|
+
def to_s
|
24
|
+
str = if finish
|
25
|
+
"#{start}-#{finish}"
|
26
|
+
else
|
27
|
+
start.to_s
|
28
|
+
end
|
29
|
+
|
30
|
+
str << ' (only)' if only?
|
31
|
+
str << " #{start_fragment}" if start_fragment
|
32
|
+
str
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_hash
|
36
|
+
{
|
37
|
+
start: start,
|
38
|
+
finish: finish,
|
39
|
+
starter: starter ? starter.to_hash : nil,
|
40
|
+
terminator: terminator ? terminator.to_hash : nil
|
41
|
+
}
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class Positional
|
46
|
+
attr_reader :cardinality, :fragment
|
47
|
+
|
48
|
+
def initialize(cardinality, fragment)
|
49
|
+
@cardinality = cardinality
|
50
|
+
@fragment = fragment
|
51
|
+
end
|
52
|
+
|
53
|
+
def to_s
|
54
|
+
card_s = case cardinality
|
55
|
+
when 1 then '1st'
|
56
|
+
when 2 then '2nd'
|
57
|
+
when 3 then '3rd'
|
58
|
+
end
|
59
|
+
|
60
|
+
if cardinality
|
61
|
+
"#{card_s} #{fragment}"
|
62
|
+
else
|
63
|
+
fragment
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def to_hash
|
68
|
+
{
|
69
|
+
cardinality: cardinality,
|
70
|
+
fragment: fragment
|
71
|
+
}
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class Starter < Positional
|
76
|
+
end
|
77
|
+
|
78
|
+
class FragmentTerminator < Positional
|
79
|
+
end
|
80
|
+
|
81
|
+
class OnlyTerminator
|
82
|
+
def self.instance
|
83
|
+
@instance ||= send(:new)
|
84
|
+
end
|
85
|
+
|
86
|
+
def to_hash
|
87
|
+
{ only: true }
|
88
|
+
end
|
89
|
+
|
90
|
+
private def initialize
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
class ScienceHealthParser < CSKit::Parsers::Parser
|
96
|
+
def entry_point
|
97
|
+
page
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
def get_token_stream
|
103
|
+
ScienceHealthTokenizer.new(citation_text).each_token.lazy
|
104
|
+
end
|
105
|
+
|
106
|
+
def page
|
107
|
+
page_num = page_number
|
108
|
+
next_token(:colon)
|
109
|
+
llist = line_list
|
110
|
+
|
111
|
+
Citation.new(page_num, llist)
|
112
|
+
end
|
113
|
+
|
114
|
+
def page_number
|
115
|
+
current.value.tap { next_token(:page_number, :number) }
|
116
|
+
end
|
117
|
+
|
118
|
+
def line_list
|
119
|
+
[].tap do |list|
|
120
|
+
loop do
|
121
|
+
list << line
|
122
|
+
|
123
|
+
case current.type
|
124
|
+
when :comma
|
125
|
+
next_token(:comma)
|
126
|
+
else
|
127
|
+
break
|
128
|
+
end
|
129
|
+
|
130
|
+
break if eos?
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def line
|
136
|
+
start = current.value.tap { next_token(:number) }.to_i
|
137
|
+
finish = start
|
138
|
+
starter = nil
|
139
|
+
|
140
|
+
if current.type == :dash
|
141
|
+
next_token(:dash)
|
142
|
+
finish = current.value.tap { next_token(:number) }.to_i
|
143
|
+
end
|
144
|
+
|
145
|
+
starter = line_starter
|
146
|
+
terminator = line_terminator
|
147
|
+
|
148
|
+
Line.new(start, finish, starter, terminator)
|
149
|
+
end
|
150
|
+
|
151
|
+
def line_starter
|
152
|
+
case current.type
|
153
|
+
when :text, :cardinality
|
154
|
+
card = cardinality
|
155
|
+
fragment = current.value
|
156
|
+
next_token(:text, :colon, :comma)
|
157
|
+
Starter.new(card, fragment)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def line_terminator
|
162
|
+
if current.type == :left_paren
|
163
|
+
next_token(:left_paren)
|
164
|
+
|
165
|
+
terminator = if current.type == :to
|
166
|
+
fragment_terminator
|
167
|
+
else
|
168
|
+
only_terminator
|
169
|
+
end
|
170
|
+
|
171
|
+
next_token(:right_paren)
|
172
|
+
terminator
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def fragment_terminator
|
177
|
+
next_token(:to)
|
178
|
+
|
179
|
+
card = cardinality
|
180
|
+
fragment = current.value
|
181
|
+
next_token(:text, :colon, :comma)
|
182
|
+
|
183
|
+
FragmentTerminator.new(card, fragment)
|
184
|
+
end
|
185
|
+
|
186
|
+
def only_terminator
|
187
|
+
next_token(:only)
|
188
|
+
OnlyTerminator.instance
|
189
|
+
end
|
190
|
+
|
191
|
+
def cardinality
|
192
|
+
if current.type == :cardinality
|
193
|
+
current.value.tap { next_token(:cardinality) }.to_i
|
194
|
+
else
|
195
|
+
DEFAULT_CARDINALITY
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CSKit
|
4
|
+
module Parsers
|
5
|
+
module ScienceHealth
|
6
|
+
|
7
|
+
class ScienceHealthTokenizer < CSKit::Parsers::Tokenizer
|
8
|
+
PATTERNS = {
|
9
|
+
left_paren: /\A\(/,
|
10
|
+
right_paren: /\A\)/,
|
11
|
+
dash: /\A-/,
|
12
|
+
colon: /\A:/,
|
13
|
+
comma: /\A,/,
|
14
|
+
to: /\Ato/,
|
15
|
+
only: /\Aonly(?=\))/,
|
16
|
+
cardinality: /\A(1st|2nd|3rd|4th)/,
|
17
|
+
page_number: /\A(vii|viii|ix|x|xi|xii)(?=:)/, # must precede a colon
|
18
|
+
number: /\A\d+/,
|
19
|
+
text: /\A[^\s\(\):,]+/,
|
20
|
+
space: /\A[\s\t]+/
|
21
|
+
}
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def patterns
|
26
|
+
PATTERNS
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CSKit
|
4
|
+
module Parsers
|
5
|
+
# base class for tokenizers
|
6
|
+
class Tokenizer
|
7
|
+
attr_reader :citation
|
8
|
+
|
9
|
+
def initialize(citation)
|
10
|
+
@citation = citation
|
11
|
+
end
|
12
|
+
|
13
|
+
def each_token
|
14
|
+
return to_enum(__method__) unless block_given?
|
15
|
+
|
16
|
+
text = citation.dup
|
17
|
+
pos = 0
|
18
|
+
|
19
|
+
until text.empty?
|
20
|
+
patterns.each_pair do |token_type, pattern|
|
21
|
+
if match = pattern.match(text)
|
22
|
+
unless token_type == :space
|
23
|
+
yield Token.new(token_type, match[0], pos)
|
24
|
+
end
|
25
|
+
|
26
|
+
text[0...match[0].size] = ''
|
27
|
+
pos += match[0].size
|
28
|
+
|
29
|
+
break
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def patterns
|
38
|
+
raise NotImplementedError,
|
39
|
+
"`#{__method__}' must be implemented by derived classes"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/cskit/readers.rb
CHANGED
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
module CSKit
|
4
4
|
module Readers
|
5
|
-
autoload :ScienceHealthReader,
|
6
|
-
autoload :BibleReader,
|
7
|
-
autoload :Reading,
|
5
|
+
autoload :ScienceHealthReader, 'cskit/readers/science_health_reader'
|
6
|
+
autoload :BibleReader, 'cskit/readers/bible_reader'
|
7
|
+
autoload :Reading, 'cskit/readers/reading'
|
8
8
|
end
|
9
|
-
end
|
9
|
+
end
|
@@ -34,7 +34,7 @@ module CSKit
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
-
|
37
|
+
private
|
38
38
|
|
39
39
|
def map_verse_texts_for(chapter, book_name)
|
40
40
|
result = []
|
@@ -50,7 +50,7 @@ module CSKit
|
|
50
50
|
end
|
51
51
|
|
52
52
|
def convert_book_name(book_name)
|
53
|
-
volume.unabbreviate_book_name(book_name).downcase.gsub(
|
53
|
+
volume.unabbreviate_book_name(book_name).downcase.gsub(' ', '_')
|
54
54
|
end
|
55
55
|
|
56
56
|
end
|