slaw 0.17.2 → 1.0.0.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'slaw/grammars/schedules_nodes'
4
+
5
+ module Slaw
6
+ module Grammars
7
+ grammar Schedules
8
+ rule schedules_container
9
+ schedules:schedules <ScheduleContainer>
10
+ end
11
+
12
+ rule schedules
13
+ children:schedule+ <GroupNode>
14
+ end
15
+
16
+ rule schedule
17
+ schedule_title
18
+ body:body?
19
+ <Schedule>
20
+ end
21
+
22
+ rule schedule_title
23
+ space? schedule_title_prefix space? "\""? num:alphanums? "\""? [ \t:.-]* title:(content)?
24
+ heading:(newline space? content)?
25
+ eol
26
+ end
27
+
28
+ rule schedule_title_prefix
29
+ 'schedule'i 's'i?
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,107 @@
1
+ require 'slaw/grammars/core_nodes'
2
+
3
+ module Slaw
4
+ module Grammars
5
+ module Schedules
6
+ FRBR_URI = '/za/act/1980/01'
7
+ WORK_URI = FRBR_URI
8
+ EXPRESSION_URI = "#{FRBR_URI}/eng@"
9
+ MANIFESTATION_URI = EXPRESSION_URI
10
+
11
+ class ScheduleContainer < Treetop::Runtime::SyntaxNode
12
+ def to_xml(b)
13
+ b.components { |b|
14
+ schedules.children.elements.each_with_index { |e, i|
15
+ e.to_xml(b, "", i+1)
16
+ }
17
+ }
18
+ end
19
+ end
20
+
21
+ class Schedule < Treetop::Runtime::SyntaxNode
22
+ def num
23
+ n = schedule_title.num.text_value
24
+ return (n && !n.empty?) ? n : nil
25
+ end
26
+
27
+ def alias
28
+ if not schedule_title.title.text_value.blank?
29
+ schedule_title.title.text_value
30
+ elsif num
31
+ "Schedule #{num}"
32
+ else
33
+ "Schedule"
34
+ end
35
+ end
36
+
37
+ def heading
38
+ if schedule_title.heading.respond_to? :content
39
+ schedule_title.heading.content.text_value
40
+ else
41
+ nil
42
+ end
43
+ end
44
+
45
+ def to_xml(b, idprefix=nil, i=1)
46
+ if num
47
+ n = num
48
+ component = "schedule#{n}"
49
+ else
50
+ n = i
51
+ # make a component name from the schedule title
52
+ component = self.alias.downcase().strip().gsub(/[^a-z0-9]/i, '').gsub(/ +/, '')
53
+ end
54
+
55
+ id = "#{idprefix}#{component}"
56
+
57
+ b.component(id: "component-#{id}") { |b|
58
+ b.doc_(name: component) { |b|
59
+ b.meta { |b|
60
+ b.identification(source: "#slaw") { |b|
61
+ b.FRBRWork { |b|
62
+ b.FRBRthis(value: "#{WORK_URI}/#{component}")
63
+ b.FRBRuri(value: WORK_URI)
64
+ b.FRBRalias(value: self.alias)
65
+ b.FRBRdate(date: '1980-01-01', name: 'Generation')
66
+ b.FRBRauthor(href: '#council')
67
+ b.FRBRcountry(value: 'za')
68
+ }
69
+ b.FRBRExpression { |b|
70
+ b.FRBRthis(value: "#{EXPRESSION_URI}/#{component}")
71
+ b.FRBRuri(value: EXPRESSION_URI)
72
+ b.FRBRdate(date: '1980-01-01', name: 'Generation')
73
+ b.FRBRauthor(href: '#council')
74
+ b.FRBRlanguage(language: 'eng')
75
+ }
76
+ b.FRBRManifestation { |b|
77
+ b.FRBRthis(value: "#{MANIFESTATION_URI}/#{component}")
78
+ b.FRBRuri(value: MANIFESTATION_URI)
79
+ b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
80
+ b.FRBRauthor(href: '#slaw')
81
+ }
82
+ }
83
+ }
84
+
85
+ b.mainBody { |b|
86
+ idprefix = "#{id}."
87
+
88
+ # there is no good AKN hierarchy container for schedules, so we
89
+ # just use article because we don't use it anywhere else.
90
+ b.article(id: id) { |b|
91
+ b.heading(heading) if heading
92
+ body.children.elements.each_with_index { |e| e.to_xml(b, idprefix, i) } if body.is_a? Body
93
+ }
94
+ }
95
+ }
96
+ }
97
+ end
98
+ end
99
+
100
+ class ScheduleStatement < Treetop::Runtime::SyntaxNode
101
+ def to_xml(b, idprefix)
102
+ b.p { |b| clauses.to_xml(b, idprefix) } if clauses
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,59 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'slaw/grammars/terminals'
4
+ require 'slaw/grammars/tables_nodes'
5
+
6
+ module Slaw
7
+ module Grammars
8
+ grammar Tables
9
+ ##########
10
+ # wikimedia-style tables
11
+ #
12
+ # this grammar doesn't support inline table cells (eg: | col1 || col2 || col3)
13
+ # instead, the builder preprocesses tables to break inline cells onto their own
14
+ # lines, which we do support.
15
+
16
+ rule table
17
+ space? '{|' eol
18
+ table_body
19
+ '|}' eol
20
+ <Table>
21
+ end
22
+
23
+ rule table_body
24
+ (table_row / table_cell)*
25
+ end
26
+
27
+ rule table_row
28
+ '|-' space? eol
29
+ end
30
+
31
+ rule table_cell
32
+ # don't match end-of-table
33
+ !'|}'
34
+ [!|] attribs:table_attribs? space?
35
+ # first content line, then multiple lines
36
+ content:(line:table_line (![!|] space? line:table_line)*)
37
+ <TableCell>
38
+ end
39
+
40
+ rule table_line
41
+ clauses:clauses? eol
42
+ <TableLine>
43
+ end
44
+
45
+ rule table_attribs
46
+ space? attribs:(table_attrib+) '|'
47
+ end
48
+
49
+ rule table_attrib
50
+ name:([a-z_-]+) '=' value:(
51
+ ('"' (!'"' .)* '"') /
52
+ ("'" (!"'" .)* "'"))
53
+ space?
54
+ end
55
+
56
+ include Terminals
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,74 @@
1
+ module Slaw
2
+ module Grammars
3
+ module Tables
4
+ class Table < Treetop::Runtime::SyntaxNode
5
+ def to_xml(b, idprefix, i=0)
6
+ b.table(id: "#{idprefix}table#{i}") { |b|
7
+ # we'll gather cells into this row list
8
+ rows = []
9
+ cells = []
10
+
11
+ for child in table_body.elements
12
+ if child.is_a? TableCell
13
+ # cell
14
+ cells << child
15
+ else
16
+ # new row marker
17
+ rows << cells unless cells.empty?
18
+ cells = []
19
+ end
20
+ end
21
+ rows << cells unless cells.empty?
22
+
23
+ for row in rows
24
+ b.tr { |tr|
25
+ for cell in row
26
+ cell.to_xml(tr, "")
27
+ end
28
+ }
29
+ end
30
+ }
31
+ end
32
+ end
33
+
34
+ class TableCell < Treetop::Runtime::SyntaxNode
35
+ def to_xml(b, idprefix)
36
+ tag = text_value[0] == '!' ? 'th' : 'td'
37
+
38
+ attrs = {}
39
+ if not attribs.empty?
40
+ for item in attribs.attribs.elements
41
+ # key=value (strip quotes around value)
42
+ attrs[item.name.text_value.strip] = item.value.text_value[1..-2]
43
+ end
44
+ end
45
+
46
+ b.send(tag.to_sym, attrs) { |b|
47
+ b.p { |b|
48
+ # first line, and the rest
49
+ lines = [content.line] + content.elements.last.elements.map(&:line)
50
+
51
+ lines.each_with_index do |line, i|
52
+ line.to_xml(b, i, i == lines.length-1)
53
+ end
54
+ }
55
+ }
56
+ end
57
+ end
58
+
59
+ class TableLine < Treetop::Runtime::SyntaxNode
60
+ # line of table content
61
+ def to_xml(b, i, tail)
62
+ clauses.to_xml(b) unless clauses.empty?
63
+
64
+ # add trailing newlines.
65
+ # for the first line, eat whitespace at the start
66
+ # for the last line, eat whitespace at the end
67
+ if not tail and (i > 0 or not clauses.empty?)
68
+ eol.text_value.count("\n").times { b.eol }
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,84 @@
1
+ # encoding: UTF-8
2
+
3
+ module Slaw
4
+ module Grammars
5
+ grammar Terminals
6
+ #########
7
+ ## one line of basic content
8
+
9
+ rule content
10
+ # anything but a newline, followed by a
11
+ # newline or end of file (without consuming the newline)
12
+ [^\n]+ &eol
13
+ end
14
+
15
+ ##########
16
+ # terminals
17
+
18
+ # eg. 2, 2A, 2b
19
+ rule number_letter
20
+ number letter*
21
+ end
22
+
23
+ rule letter_ordinal
24
+ letter (letter / digit)*
25
+ end
26
+
27
+ rule dotted_number_3
28
+ number '.' number ('.' number)+
29
+ end
30
+
31
+ rule dotted_number_2
32
+ number '.' number
33
+ end
34
+
35
+ rule number
36
+ digit+
37
+ end
38
+
39
+ rule digit
40
+ [0-9]
41
+ end
42
+
43
+ rule letter
44
+ [a-zA-Z]
45
+ end
46
+
47
+ rule alphanums
48
+ [a-zA-Z0-9]+
49
+ end
50
+
51
+ rule quotes
52
+ ["“”]
53
+ end
54
+
55
+ rule non_quotes
56
+ [^"“”]
57
+ end
58
+
59
+ ##########
60
+ # whitespace
61
+
62
+ rule space
63
+ [ \t]+
64
+ end
65
+
66
+ rule whitespace
67
+ [ \t\n]*
68
+ end
69
+
70
+ rule empty_line
71
+ space? newline
72
+ end
73
+
74
+ rule eol
75
+ newline
76
+ empty_line*
77
+ end
78
+
79
+ rule newline
80
+ "\n"
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,222 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'slaw/parse/grammar_helpers'
4
+ require 'slaw/grammars/za/act_nodes'
5
+
6
+ require 'slaw/grammars/terminals'
7
+ require 'slaw/grammars/tables'
8
+ require 'slaw/grammars/schedules'
9
+ require 'slaw/grammars/inlines'
10
+
11
+ module Slaw
12
+ module Grammars
13
+ module ZA
14
+ grammar Act
15
+ include Slaw::Parse::GrammarHelpers
16
+
17
+ ########
18
+ # major containers
19
+
20
+ rule act
21
+ empty_line*
22
+ preface:preface?
23
+ preamble:preamble?
24
+ body
25
+ schedules:schedules_container? <Act>
26
+ end
27
+
28
+ rule preface
29
+ !'PREAMBLE'
30
+ ('PREFACE'i space? eol)?
31
+ statements:(!'PREAMBLE' pre_body_statement)* <Preface>
32
+ end
33
+
34
+ rule preamble
35
+ 'PREAMBLE'i space? eol
36
+ statements:pre_body_statement* <Preamble>
37
+ end
38
+
39
+ rule body
40
+ children:(chapter / part / section / subsection / block_paragraphs)+ <Body>
41
+ end
42
+
43
+ rule chapter
44
+ heading:chapter_heading
45
+ children:(part / section / subsection / block_paragraphs)*
46
+ <Chapter>
47
+ end
48
+
49
+ rule part
50
+ heading:part_heading
51
+ children:(section / subsection / block_paragraphs)*
52
+ <Part>
53
+ end
54
+
55
+ rule section
56
+ section_title
57
+ children:(subsection / block_paragraphs)* <Section>
58
+ end
59
+
60
+ rule subsection
61
+ space? subsection_prefix space?
62
+ # eg: (2) (a) foo
63
+ first_child:inline_block_element?
64
+ # eg: (2)
65
+ eol?
66
+ children:block_element* <Subsection>
67
+ end
68
+
69
+ ##########
70
+ # group elements
71
+ #
72
+ # these are used externally and provide support when parsing just
73
+ # a particular portion of a document
74
+
75
+ rule chapters
76
+ children:chapter+ <GroupNode>
77
+ end
78
+
79
+ rule parts
80
+ children:part+ <GroupNode>
81
+ end
82
+
83
+ rule sections
84
+ children:section+ <GroupNode>
85
+ end
86
+
87
+ ##########
88
+ # headings
89
+
90
+ rule chapter_heading
91
+ space? chapter_heading_prefix heading:(newline? content)? eol
92
+ <ChapterHeading>
93
+ end
94
+
95
+ rule part_heading
96
+ space? part_heading_prefix heading:(newline? content)? eol
97
+ <PartHeading>
98
+ end
99
+
100
+ rule section_title
101
+ section_title_1 / section_1_title
102
+ end
103
+
104
+ rule section_title_1
105
+ &{ |s| options[:section_number_after_title] }
106
+ # Section title
107
+ # 1. Section content
108
+ content eol
109
+ section_title_prefix whitespace <SectionTitleType1>
110
+ end
111
+
112
+ rule section_1_title
113
+ # 1. Section title
114
+ # Section content
115
+ #
116
+ # Additionally, the section title is optional.
117
+ !{ |s| options[:section_number_after_title] }
118
+ space? section_title_prefix section_title:section_title_content? eol?
119
+ <SectionTitleType2>
120
+ end
121
+
122
+ rule section_title_content
123
+ # don't match subsections, eg.
124
+ #
125
+ # 10. (1) subsection content...
126
+ space !subsection_prefix content eol
127
+ end
128
+
129
+ ##########
130
+ # blocks of content inside containers
131
+
132
+ rule block_paragraphs
133
+ block_element+ <BlockParagraph>
134
+ end
135
+
136
+ rule block_element
137
+ (table / blocklist / naked_statement)
138
+ end
139
+
140
+ # Block elements that don't have to appear at the start of a line.
141
+ # ie. we don't need to guard against the start of a chapter, section, etc.
142
+ rule inline_block_element
143
+ (table / blocklist / inline_statement)
144
+ end
145
+
146
+ rule blocklist
147
+ blocklist_item+ <Blocklist>
148
+ end
149
+
150
+ rule blocklist_item
151
+ # TODO: this whitespace should probably be space, to allow empty blocklist items followed by plain text
152
+ space? blocklist_item_prefix whitespace item_content:(!blocklist_item_prefix clauses:clauses? eol)? eol?
153
+ <BlocklistItem>
154
+ end
155
+
156
+ rule blocklist_item_prefix
157
+ ('(' letter_ordinal ')') / dotted_number_3
158
+ end
159
+
160
+ ##########
161
+ # statements - single lines of content
162
+ #
163
+ # If a statement starts with a backslash, it's considered to have escaped the subsequent word,
164
+ # and is ignored. This allows escaping of section headings, etc.
165
+
166
+ rule naked_statement
167
+ space? !(chapter_heading / part_heading / section_title / schedule_title / subsection_prefix) '\\'? clauses eol
168
+ <NakedStatement>
169
+ end
170
+
171
+ rule pre_body_statement
172
+ space? !(chapter_heading / part_heading / section_title / schedule_title) '\\'? clauses eol
173
+ <NakedStatement>
174
+ end
175
+
176
+ ##########
177
+ # prefixes
178
+
179
+ rule part_heading_prefix
180
+ 'part'i space alphanums [ :-]*
181
+ end
182
+
183
+ rule chapter_heading_prefix
184
+ 'chapter'i space alphanums [ :-]*
185
+ end
186
+
187
+ rule section_title_prefix
188
+ number_letter '.'?
189
+ end
190
+
191
+ rule subsection_prefix
192
+ # there are two subsection handling syntaxes:
193
+ #
194
+ # (1) foo
195
+ # (2A) foo
196
+ #
197
+ # and
198
+ #
199
+ # 8.2 for
200
+ # 8.3 bar
201
+ #
202
+ # The second is less common, but this allows us to handle it.
203
+ # Note that it is usually accompanied by a similar list number format:
204
+ #
205
+ # 8.2.1 item 1
206
+ # 8.2.2 item 2
207
+ #
208
+ # which aren't subsections, but lists, so force the space at the end
209
+ # of the number to catch this case.
210
+ num:('(' number_letter ')')
211
+ /
212
+ num:dotted_number_2 '.'? space
213
+ end
214
+
215
+ include Slaw::Grammars::Inlines
216
+ include Slaw::Grammars::Tables
217
+ include Slaw::Grammars::Schedules
218
+ include Slaw::Grammars::Terminals
219
+ end
220
+ end
221
+ end
222
+ end