slaw 0.17.2 → 1.0.0.alpha.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,33 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'slaw/grammars/schedules_nodes'
4
+
5
+ module Slaw
6
+ module Grammars
7
+ grammar Schedules
8
+ rule schedules_container
9
+ schedules:schedules <ScheduleContainer>
10
+ end
11
+
12
+ rule schedules
13
+ children:schedule+ <GroupNode>
14
+ end
15
+
16
+ rule schedule
17
+ schedule_title
18
+ body:body?
19
+ <Schedule>
20
+ end
21
+
22
+ rule schedule_title
23
+ space? schedule_title_prefix space? "\""? num:alphanums? "\""? [ \t:.-]* title:(content)?
24
+ heading:(newline space? content)?
25
+ eol
26
+ end
27
+
28
+ rule schedule_title_prefix
29
+ 'schedule'i 's'i?
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,107 @@
1
+ require 'slaw/grammars/core_nodes'
2
+
3
+ module Slaw
4
+ module Grammars
5
+ module Schedules
6
+ FRBR_URI = '/za/act/1980/01'
7
+ WORK_URI = FRBR_URI
8
+ EXPRESSION_URI = "#{FRBR_URI}/eng@"
9
+ MANIFESTATION_URI = EXPRESSION_URI
10
+
11
+ class ScheduleContainer < Treetop::Runtime::SyntaxNode
12
+ def to_xml(b)
13
+ b.components { |b|
14
+ schedules.children.elements.each_with_index { |e, i|
15
+ e.to_xml(b, "", i+1)
16
+ }
17
+ }
18
+ end
19
+ end
20
+
21
+ class Schedule < Treetop::Runtime::SyntaxNode
22
+ def num
23
+ n = schedule_title.num.text_value
24
+ return (n && !n.empty?) ? n : nil
25
+ end
26
+
27
+ def alias
28
+ if not schedule_title.title.text_value.blank?
29
+ schedule_title.title.text_value
30
+ elsif num
31
+ "Schedule #{num}"
32
+ else
33
+ "Schedule"
34
+ end
35
+ end
36
+
37
+ def heading
38
+ if schedule_title.heading.respond_to? :content
39
+ schedule_title.heading.content.text_value
40
+ else
41
+ nil
42
+ end
43
+ end
44
+
45
+ def to_xml(b, idprefix=nil, i=1)
46
+ if num
47
+ n = num
48
+ component = "schedule#{n}"
49
+ else
50
+ n = i
51
+ # make a component name from the schedule title
52
+ component = self.alias.downcase().strip().gsub(/[^a-z0-9]/i, '').gsub(/ +/, '')
53
+ end
54
+
55
+ id = "#{idprefix}#{component}"
56
+
57
+ b.component(id: "component-#{id}") { |b|
58
+ b.doc_(name: component) { |b|
59
+ b.meta { |b|
60
+ b.identification(source: "#slaw") { |b|
61
+ b.FRBRWork { |b|
62
+ b.FRBRthis(value: "#{WORK_URI}/#{component}")
63
+ b.FRBRuri(value: WORK_URI)
64
+ b.FRBRalias(value: self.alias)
65
+ b.FRBRdate(date: '1980-01-01', name: 'Generation')
66
+ b.FRBRauthor(href: '#council')
67
+ b.FRBRcountry(value: 'za')
68
+ }
69
+ b.FRBRExpression { |b|
70
+ b.FRBRthis(value: "#{EXPRESSION_URI}/#{component}")
71
+ b.FRBRuri(value: EXPRESSION_URI)
72
+ b.FRBRdate(date: '1980-01-01', name: 'Generation')
73
+ b.FRBRauthor(href: '#council')
74
+ b.FRBRlanguage(language: 'eng')
75
+ }
76
+ b.FRBRManifestation { |b|
77
+ b.FRBRthis(value: "#{MANIFESTATION_URI}/#{component}")
78
+ b.FRBRuri(value: MANIFESTATION_URI)
79
+ b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
80
+ b.FRBRauthor(href: '#slaw')
81
+ }
82
+ }
83
+ }
84
+
85
+ b.mainBody { |b|
86
+ idprefix = "#{id}."
87
+
88
+ # there is no good AKN hierarchy container for schedules, so we
89
+ # just use article because we don't use it anywhere else.
90
+ b.article(id: id) { |b|
91
+ b.heading(heading) if heading
92
+ body.children.elements.each_with_index { |e| e.to_xml(b, idprefix, i) } if body.is_a? Body
93
+ }
94
+ }
95
+ }
96
+ }
97
+ end
98
+ end
99
+
100
+ class ScheduleStatement < Treetop::Runtime::SyntaxNode
101
+ def to_xml(b, idprefix)
102
+ b.p { |b| clauses.to_xml(b, idprefix) } if clauses
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,59 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'slaw/grammars/terminals'
4
+ require 'slaw/grammars/tables_nodes'
5
+
6
+ module Slaw
7
+ module Grammars
8
+ grammar Tables
9
+ ##########
10
+ # wikimedia-style tables
11
+ #
12
+ # this grammar doesn't support inline table cells (eg: | col1 || col2 || col3)
13
+ # instead, the builder preprocesses tables to break inline cells onto their own
14
+ # lines, which we do support.
15
+
16
+ rule table
17
+ space? '{|' eol
18
+ table_body
19
+ '|}' eol
20
+ <Table>
21
+ end
22
+
23
+ rule table_body
24
+ (table_row / table_cell)*
25
+ end
26
+
27
+ rule table_row
28
+ '|-' space? eol
29
+ end
30
+
31
+ rule table_cell
32
+ # don't match end-of-table
33
+ !'|}'
34
+ [!|] attribs:table_attribs? space?
35
+ # first content line, then multiple lines
36
+ content:(line:table_line (![!|] space? line:table_line)*)
37
+ <TableCell>
38
+ end
39
+
40
+ rule table_line
41
+ clauses:clauses? eol
42
+ <TableLine>
43
+ end
44
+
45
+ rule table_attribs
46
+ space? attribs:(table_attrib+) '|'
47
+ end
48
+
49
+ rule table_attrib
50
+ name:([a-z_-]+) '=' value:(
51
+ ('"' (!'"' .)* '"') /
52
+ ("'" (!"'" .)* "'"))
53
+ space?
54
+ end
55
+
56
+ include Terminals
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,74 @@
1
+ module Slaw
2
+ module Grammars
3
+ module Tables
4
+ class Table < Treetop::Runtime::SyntaxNode
5
+ def to_xml(b, idprefix, i=0)
6
+ b.table(id: "#{idprefix}table#{i}") { |b|
7
+ # we'll gather cells into this row list
8
+ rows = []
9
+ cells = []
10
+
11
+ for child in table_body.elements
12
+ if child.is_a? TableCell
13
+ # cell
14
+ cells << child
15
+ else
16
+ # new row marker
17
+ rows << cells unless cells.empty?
18
+ cells = []
19
+ end
20
+ end
21
+ rows << cells unless cells.empty?
22
+
23
+ for row in rows
24
+ b.tr { |tr|
25
+ for cell in row
26
+ cell.to_xml(tr, "")
27
+ end
28
+ }
29
+ end
30
+ }
31
+ end
32
+ end
33
+
34
+ class TableCell < Treetop::Runtime::SyntaxNode
35
+ def to_xml(b, idprefix)
36
+ tag = text_value[0] == '!' ? 'th' : 'td'
37
+
38
+ attrs = {}
39
+ if not attribs.empty?
40
+ for item in attribs.attribs.elements
41
+ # key=value (strip quotes around value)
42
+ attrs[item.name.text_value.strip] = item.value.text_value[1..-2]
43
+ end
44
+ end
45
+
46
+ b.send(tag.to_sym, attrs) { |b|
47
+ b.p { |b|
48
+ # first line, and the rest
49
+ lines = [content.line] + content.elements.last.elements.map(&:line)
50
+
51
+ lines.each_with_index do |line, i|
52
+ line.to_xml(b, i, i == lines.length-1)
53
+ end
54
+ }
55
+ }
56
+ end
57
+ end
58
+
59
+ class TableLine < Treetop::Runtime::SyntaxNode
60
+ # line of table content
61
+ def to_xml(b, i, tail)
62
+ clauses.to_xml(b) unless clauses.empty?
63
+
64
+ # add trailing newlines.
65
+ # for the first line, eat whitespace at the start
66
+ # for the last line, eat whitespace at the end
67
+ if not tail and (i > 0 or not clauses.empty?)
68
+ eol.text_value.count("\n").times { b.eol }
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,84 @@
1
+ # encoding: UTF-8
2
+
3
+ module Slaw
4
+ module Grammars
5
+ grammar Terminals
6
+ #########
7
+ ## one line of basic content
8
+
9
+ rule content
10
+ # anything but a newline, followed by a
11
+ # newline or end of file (without consuming the newline)
12
+ [^\n]+ &eol
13
+ end
14
+
15
+ ##########
16
+ # terminals
17
+
18
+ # eg. 2, 2A, 2b
19
+ rule number_letter
20
+ number letter*
21
+ end
22
+
23
+ rule letter_ordinal
24
+ letter (letter / digit)*
25
+ end
26
+
27
+ rule dotted_number_3
28
+ number '.' number ('.' number)+
29
+ end
30
+
31
+ rule dotted_number_2
32
+ number '.' number
33
+ end
34
+
35
+ rule number
36
+ digit+
37
+ end
38
+
39
+ rule digit
40
+ [0-9]
41
+ end
42
+
43
+ rule letter
44
+ [a-zA-Z]
45
+ end
46
+
47
+ rule alphanums
48
+ [a-zA-Z0-9]+
49
+ end
50
+
51
+ rule quotes
52
+ ["“”]
53
+ end
54
+
55
+ rule non_quotes
56
+ [^"“”]
57
+ end
58
+
59
+ ##########
60
+ # whitespace
61
+
62
+ rule space
63
+ [ \t]+
64
+ end
65
+
66
+ rule whitespace
67
+ [ \t\n]*
68
+ end
69
+
70
+ rule empty_line
71
+ space? newline
72
+ end
73
+
74
+ rule eol
75
+ newline
76
+ empty_line*
77
+ end
78
+
79
+ rule newline
80
+ "\n"
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,222 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'slaw/parse/grammar_helpers'
4
+ require 'slaw/grammars/za/act_nodes'
5
+
6
+ require 'slaw/grammars/terminals'
7
+ require 'slaw/grammars/tables'
8
+ require 'slaw/grammars/schedules'
9
+ require 'slaw/grammars/inlines'
10
+
11
+ module Slaw
12
+ module Grammars
13
+ module ZA
14
+ grammar Act
15
+ include Slaw::Parse::GrammarHelpers
16
+
17
+ ########
18
+ # major containers
19
+
20
+ rule act
21
+ empty_line*
22
+ preface:preface?
23
+ preamble:preamble?
24
+ body
25
+ schedules:schedules_container? <Act>
26
+ end
27
+
28
+ rule preface
29
+ !'PREAMBLE'
30
+ ('PREFACE'i space? eol)?
31
+ statements:(!'PREAMBLE' pre_body_statement)* <Preface>
32
+ end
33
+
34
+ rule preamble
35
+ 'PREAMBLE'i space? eol
36
+ statements:pre_body_statement* <Preamble>
37
+ end
38
+
39
+ rule body
40
+ children:(chapter / part / section / subsection / block_paragraphs)+ <Body>
41
+ end
42
+
43
+ rule chapter
44
+ heading:chapter_heading
45
+ children:(part / section / subsection / block_paragraphs)*
46
+ <Chapter>
47
+ end
48
+
49
+ rule part
50
+ heading:part_heading
51
+ children:(section / subsection / block_paragraphs)*
52
+ <Part>
53
+ end
54
+
55
+ rule section
56
+ section_title
57
+ children:(subsection / block_paragraphs)* <Section>
58
+ end
59
+
60
+ rule subsection
61
+ space? subsection_prefix space?
62
+ # eg: (2) (a) foo
63
+ first_child:inline_block_element?
64
+ # eg: (2)
65
+ eol?
66
+ children:block_element* <Subsection>
67
+ end
68
+
69
+ ##########
70
+ # group elements
71
+ #
72
+ # these are used externally and provide support when parsing just
73
+ # a particular portion of a document
74
+
75
+ rule chapters
76
+ children:chapter+ <GroupNode>
77
+ end
78
+
79
+ rule parts
80
+ children:part+ <GroupNode>
81
+ end
82
+
83
+ rule sections
84
+ children:section+ <GroupNode>
85
+ end
86
+
87
+ ##########
88
+ # headings
89
+
90
+ rule chapter_heading
91
+ space? chapter_heading_prefix heading:(newline? content)? eol
92
+ <ChapterHeading>
93
+ end
94
+
95
+ rule part_heading
96
+ space? part_heading_prefix heading:(newline? content)? eol
97
+ <PartHeading>
98
+ end
99
+
100
+ rule section_title
101
+ section_title_1 / section_1_title
102
+ end
103
+
104
+ rule section_title_1
105
+ &{ |s| options[:section_number_after_title] }
106
+ # Section title
107
+ # 1. Section content
108
+ content eol
109
+ section_title_prefix whitespace <SectionTitleType1>
110
+ end
111
+
112
+ rule section_1_title
113
+ # 1. Section title
114
+ # Section content
115
+ #
116
+ # Additionally, the section title is optional.
117
+ !{ |s| options[:section_number_after_title] }
118
+ space? section_title_prefix section_title:section_title_content? eol?
119
+ <SectionTitleType2>
120
+ end
121
+
122
+ rule section_title_content
123
+ # don't match subsections, eg.
124
+ #
125
+ # 10. (1) subsection content...
126
+ space !subsection_prefix content eol
127
+ end
128
+
129
+ ##########
130
+ # blocks of content inside containers
131
+
132
+ rule block_paragraphs
133
+ block_element+ <BlockParagraph>
134
+ end
135
+
136
+ rule block_element
137
+ (table / blocklist / naked_statement)
138
+ end
139
+
140
+ # Block elements that don't have to appear at the start of a line.
141
+ # ie. we don't need to guard against the start of a chapter, section, etc.
142
+ rule inline_block_element
143
+ (table / blocklist / inline_statement)
144
+ end
145
+
146
+ rule blocklist
147
+ blocklist_item+ <Blocklist>
148
+ end
149
+
150
+ rule blocklist_item
151
+ # TODO: this whitespace should probably be space, to allow empty blocklist items followed by plain text
152
+ space? blocklist_item_prefix whitespace item_content:(!blocklist_item_prefix clauses:clauses? eol)? eol?
153
+ <BlocklistItem>
154
+ end
155
+
156
+ rule blocklist_item_prefix
157
+ ('(' letter_ordinal ')') / dotted_number_3
158
+ end
159
+
160
+ ##########
161
+ # statements - single lines of content
162
+ #
163
+ # If a statement starts with a backslash, it's considered to have escaped the subsequent word,
164
+ # and is ignored. This allows escaping of section headings, etc.
165
+
166
+ rule naked_statement
167
+ space? !(chapter_heading / part_heading / section_title / schedule_title / subsection_prefix) '\\'? clauses eol
168
+ <NakedStatement>
169
+ end
170
+
171
+ rule pre_body_statement
172
+ space? !(chapter_heading / part_heading / section_title / schedule_title) '\\'? clauses eol
173
+ <NakedStatement>
174
+ end
175
+
176
+ ##########
177
+ # prefixes
178
+
179
+ rule part_heading_prefix
180
+ 'part'i space alphanums [ :-]*
181
+ end
182
+
183
+ rule chapter_heading_prefix
184
+ 'chapter'i space alphanums [ :-]*
185
+ end
186
+
187
+ rule section_title_prefix
188
+ number_letter '.'?
189
+ end
190
+
191
+ rule subsection_prefix
192
+ # there are two subsection handling syntaxes:
193
+ #
194
+ # (1) foo
195
+ # (2A) foo
196
+ #
197
+ # and
198
+ #
199
+ # 8.2 for
200
+ # 8.3 bar
201
+ #
202
+ # The second is less common, but this allows us to handle it.
203
+ # Note that it is usually accompanied by a similar list number format:
204
+ #
205
+ # 8.2.1 item 1
206
+ # 8.2.2 item 2
207
+ #
208
+ # which aren't subsections, but lists, so force the space at the end
209
+ # of the number to catch this case.
210
+ num:('(' number_letter ')')
211
+ /
212
+ num:dotted_number_2 '.'? space
213
+ end
214
+
215
+ include Slaw::Grammars::Inlines
216
+ include Slaw::Grammars::Tables
217
+ include Slaw::Grammars::Schedules
218
+ include Slaw::Grammars::Terminals
219
+ end
220
+ end
221
+ end
222
+ end