slaw 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +31 -0
- data/Rakefile +7 -0
- data/lib/slaw/act.rb +243 -0
- data/lib/slaw/bylaw.rb +53 -0
- data/lib/slaw/collection.rb +32 -0
- data/lib/slaw/elasticsearch.rb +107 -0
- data/lib/slaw/lifecycle_event.rb +23 -0
- data/lib/slaw/logging.rb +14 -0
- data/lib/slaw/namespace.rb +7 -0
- data/lib/slaw/parse/blocklists.rb +181 -0
- data/lib/slaw/parse/builder.rb +263 -0
- data/lib/slaw/parse/bylaw.treetop +259 -0
- data/lib/slaw/parse/cleanser.rb +171 -0
- data/lib/slaw/parse/error.rb +26 -0
- data/lib/slaw/parse/grammar_helpers.rb +11 -0
- data/lib/slaw/parse/nodes.rb +371 -0
- data/lib/slaw/render/html.rb +53 -0
- data/lib/slaw/render/xsl/act.xsl +15 -0
- data/lib/slaw/render/xsl/elements.xsl +116 -0
- data/lib/slaw/render/xsl/fragment.xsl +16 -0
- data/lib/slaw/version.rb +3 -0
- data/lib/slaw/xml_support.rb +77 -0
- data/lib/slaw.rb +24 -0
- data/slaw.gemspec +30 -0
- data/spec/parse/builder_spec.rb +543 -0
- data/spec/parse/bylaw_spec.rb +365 -0
- data/spec/parse/cleanser_spec.rb +126 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/xml_helpers.rb +46 -0
- metadata +194 -0
@@ -0,0 +1,171 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Slaw
|
4
|
+
module Parse
|
5
|
+
# Helper class to run various cleanup routines on plain text.
|
6
|
+
#
|
7
|
+
# Some of these routines can safely be run multiple times,
|
8
|
+
# others are meant to be run only once.
|
9
|
+
class Cleanser
|
10
|
+
|
11
|
+
# Run general cleanup, such as stripping bad chars and
|
12
|
+
# removing unnecessary whitespace. This is idempotent
|
13
|
+
# and safe to run multiple times.
|
14
|
+
def cleanup(s)
|
15
|
+
s = scrub(s)
|
16
|
+
s = correct_newlines(s)
|
17
|
+
s = fix_quotes(s)
|
18
|
+
s = expand_tabs(s)
|
19
|
+
s = chomp(s)
|
20
|
+
s = enforce_newline(s)
|
21
|
+
s = remove_boilerplate(s)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Run deeper introspections and reformat the text, such as
|
25
|
+
# unwrapping/re-wrapping lines. These may not be safe to run
|
26
|
+
# multiple times.
|
27
|
+
def reformat(s)
|
28
|
+
s = unbreak_lines(s)
|
29
|
+
s = break_lines(s)
|
30
|
+
s = strip_toc(s)
|
31
|
+
s = enforce_newline(s)
|
32
|
+
end
|
33
|
+
|
34
|
+
# ------------------------------------------------------------------------
|
35
|
+
|
36
|
+
def remove_empty_lines(s)
|
37
|
+
s.gsub(/\n\s*$/, '')
|
38
|
+
end
|
39
|
+
|
40
|
+
# line endings
|
41
|
+
def correct_newlines(s)
|
42
|
+
s.gsub(/\r\n/, "\n")\
|
43
|
+
.gsub(/\r/, "\n")
|
44
|
+
end
|
45
|
+
|
46
|
+
# strip invalid bytes and ones we don't like
|
47
|
+
def scrub(s)
|
48
|
+
# we often get this unicode codepoint in the string, nuke it
|
49
|
+
s.gsub([65532].pack('U*'), '')\
|
50
|
+
.gsub("", '')
|
51
|
+
end
|
52
|
+
|
53
|
+
def fix_quotes(s)
|
54
|
+
# change weird quotes to normal ones
|
55
|
+
s.gsub(/‘‘|’’|''/, '"')
|
56
|
+
end
|
57
|
+
|
58
|
+
def expand_tabs(s)
|
59
|
+
# tabs to spaces
|
60
|
+
s.gsub(/\t/, ' ')
|
61
|
+
end
|
62
|
+
|
63
|
+
def remove_boilerplate(s)
|
64
|
+
# nuke any line to do with Sabinet and the government printer
|
65
|
+
s.gsub(/^.*Sabinet.*Government Printer.*$/i, '')\
|
66
|
+
.gsub(/^.*Provincial Gazette \d+.*$/i, '')\
|
67
|
+
.gsub(/^.*Provinsiale Koerant \d+.*$/i, '')\
|
68
|
+
.gsub(/^\s*\d+\s*$/, '')\
|
69
|
+
# get rid of date lines
|
70
|
+
.gsub(/^\d+\s+\w+\s+\d+$/, '')\
|
71
|
+
# get rid of page number lines
|
72
|
+
.gsub(/^\s*page \d+( of \d+)?\s*\n/i, '')
|
73
|
+
end
|
74
|
+
|
75
|
+
def chomp(s)
|
76
|
+
# trailing whitespace at end of lines
|
77
|
+
s = s.gsub(/ +$/, '')
|
78
|
+
|
79
|
+
# whitespace on either side
|
80
|
+
s.strip
|
81
|
+
end
|
82
|
+
|
83
|
+
def enforce_newline(s)
|
84
|
+
# ensure string ends with a newline
|
85
|
+
s.end_with?("\n") ? s : (s + "\n")
|
86
|
+
end
|
87
|
+
|
88
|
+
# make educated guesses about lines that should
|
89
|
+
# have been broken but haven't, and break them
|
90
|
+
def break_lines(s)
|
91
|
+
# often we find a section title munged onto the same line as its first statement
|
92
|
+
# eg:
|
93
|
+
# foo bar. New section title 62. (1) For the purpose
|
94
|
+
s = s.gsub(/\. ([^.]+) (\d+\. \(1\) )/, ".\n" + '\1' + "\n" + '\2')
|
95
|
+
|
96
|
+
# New section title 62. (1) For the purpose
|
97
|
+
s = s.gsub(/(\w) (\d+\. \(1\) )/, '\1' + "\n" + '\2')
|
98
|
+
|
99
|
+
# (1) foo; (2) bar
|
100
|
+
# (1) foo. (2) bar
|
101
|
+
s = s.gsub(/(\w{3,}[;.]) (\([0-9a-z]+\))/, "\\1\n\\2")
|
102
|
+
|
103
|
+
# (1) foo; and (2) bar
|
104
|
+
# (1) foo; or (2) bar
|
105
|
+
s = s.gsub(/; (and|or) \(/, "; \\1\n(")
|
106
|
+
|
107
|
+
# The officer-in-Charge may – (a) remove all withered natural... \n(b)
|
108
|
+
# We do this last, because by now we should have reconised that (b) should already
|
109
|
+
# be on a new line.
|
110
|
+
s = s.gsub(/ (\(a\) .+?\n\(b\))/, "\n\\1")
|
111
|
+
|
112
|
+
# "foo" means ...; "bar" means
|
113
|
+
s = s.gsub(/; (["”“][^"”“]+?["”“] means)/, ";\n\\1")
|
114
|
+
|
115
|
+
s
|
116
|
+
end
|
117
|
+
|
118
|
+
# finds likely candidates for unnecessarily broken lines
|
119
|
+
# and them
|
120
|
+
def unbreak_lines(s)
|
121
|
+
lines = s.split(/\n/)
|
122
|
+
output = []
|
123
|
+
start_re = /^\s*[a-z]/
|
124
|
+
end_re = /[a-z0-9]\s*$/
|
125
|
+
|
126
|
+
prev = nil
|
127
|
+
lines.each_with_index do |line, i|
|
128
|
+
if i == 0
|
129
|
+
output << line
|
130
|
+
else
|
131
|
+
prev = output[-1]
|
132
|
+
|
133
|
+
if line =~ start_re and prev =~ end_re
|
134
|
+
output[-1] = prev + ' ' + line
|
135
|
+
else
|
136
|
+
output << line
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
output.join("\n")
|
142
|
+
end
|
143
|
+
|
144
|
+
# do our best to remove table of contents at the start,
|
145
|
+
# it really confuses the grammer
|
146
|
+
def strip_toc(s)
|
147
|
+
# first, try to find 'TABLE OF CONTENTS' anywhere within the first 4K of text,
|
148
|
+
if toc_start = s[0..4096].match(/TABLE OF CONTENTS/i)
|
149
|
+
|
150
|
+
# grab the first non-blank line after that, it's our end-of-TOC marker
|
151
|
+
if eol = s.match(/^(.+?)$/, toc_start.end(0))
|
152
|
+
marker = eol[0]
|
153
|
+
|
154
|
+
# search for the first line that is a prefix of marker (or vv), and delete
|
155
|
+
# everything in between
|
156
|
+
posn = eol.end(0)
|
157
|
+
while m = s.match(/^(.+?)$/, posn)
|
158
|
+
if marker.start_with?(m[0]) or m[0].start_with?(marker)
|
159
|
+
return s[0...toc_start.begin(0)] + s[m.begin(0)..-1]
|
160
|
+
end
|
161
|
+
|
162
|
+
posn = m.end(0)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
s
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Slaw
|
2
|
+
module Parse
|
3
|
+
class ParseError < Exception
|
4
|
+
attr_accessor :line, :column
|
5
|
+
|
6
|
+
def initialize(message, opts)
|
7
|
+
super(message)
|
8
|
+
|
9
|
+
self.line = opts[:line]
|
10
|
+
self.column = opts[:column]
|
11
|
+
end
|
12
|
+
|
13
|
+
# TODO: move this elsewhere, it's out of context here
|
14
|
+
def to_json(g=nil)
|
15
|
+
msg = self.message
|
16
|
+
msg = msg[0..200] + '...' if msg.length > 200
|
17
|
+
|
18
|
+
{
|
19
|
+
message: msg,
|
20
|
+
line: self.line,
|
21
|
+
column: self.column,
|
22
|
+
}.to_json(g)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,371 @@
|
|
1
|
+
module Slaw
|
2
|
+
module Parse
|
3
|
+
module Bylaw
|
4
|
+
class Bylaw < Treetop::Runtime::SyntaxNode
|
5
|
+
def to_xml(b)
|
6
|
+
b.act(contains: "originalVersion") { |b|
|
7
|
+
b.meta { |b|
|
8
|
+
b.identification(source: "#openbylaws") { |b|
|
9
|
+
# TODO: correct values
|
10
|
+
b.FRBRWork { |b|
|
11
|
+
b.FRBRthis(value: '/za/by-law/locale/1980/name/main')
|
12
|
+
b.FRBRuri(value: '/za/by-law/locale/1980/name')
|
13
|
+
b.FRBRalias(value: 'By-Law Short Title')
|
14
|
+
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
15
|
+
b.FRBRauthor(href: '#council', as: '#author')
|
16
|
+
b.FRBRcountry(value: 'za')
|
17
|
+
}
|
18
|
+
b.FRBRExpression { |b|
|
19
|
+
b.FRBRthis(value: '/za/by-law/locale/1980/name/main/eng@')
|
20
|
+
b.FRBRuri(value: '/za/by-law/locale/1980/name/eng@')
|
21
|
+
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
22
|
+
b.FRBRauthor(href: '#council', as: '#author')
|
23
|
+
b.FRBRlanguage(language: 'eng')
|
24
|
+
}
|
25
|
+
b.FRBRManifestation { |b|
|
26
|
+
b.FRBRthis(value: '/za/by-law/locale/1980/name/main/eng@')
|
27
|
+
b.FRBRuri(value: '/za/by-law/locale/1980/name/eng@')
|
28
|
+
b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
|
29
|
+
b.FRBRauthor(href: '#openbylaws', as: '#author')
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
b.publication(date: '1980-01-01',
|
34
|
+
name: 'Province of Western Cape: Provincial Gazette',
|
35
|
+
number: 'XXXX',
|
36
|
+
showAs: 'Province of Western Cape: Provincial Gazette')
|
37
|
+
|
38
|
+
b.references(source: "#this") {
|
39
|
+
b.TLCOrganization(id: 'openbylaws', href: 'http://openbylaws.org.za', showAs: "openbylaws.org.za")
|
40
|
+
b.TLCOrganization(id: 'council', href: '/ontology/organization/za/council.cape-town', showAs: "Cape Town City Council")
|
41
|
+
b.TLCRole(id: 'author', href: '/ontology/role/author', showAs: 'Author')
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
if preamble.text_value != ""
|
46
|
+
b.preamble { |b|
|
47
|
+
preamble.to_xml(b)
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
b.body { |b|
|
52
|
+
chapters.elements.each { |e| e.to_xml(b) }
|
53
|
+
}
|
54
|
+
}
|
55
|
+
|
56
|
+
schedules.to_xml(b)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class Preamble < Treetop::Runtime::SyntaxNode
|
61
|
+
def to_xml(b)
|
62
|
+
statements.elements.each { |e|
|
63
|
+
if not (e.content.text_value =~ /^preamble/i)
|
64
|
+
b.p(e.content.text_value)
|
65
|
+
end
|
66
|
+
}
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class Part < Treetop::Runtime::SyntaxNode
|
71
|
+
def num
|
72
|
+
heading.empty? ? nil : heading.num
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_xml(b)
|
76
|
+
# do we have a part heading?
|
77
|
+
if not heading.empty?
|
78
|
+
id = "part-#{num}"
|
79
|
+
|
80
|
+
# include a chapter number in the id if our parent has one
|
81
|
+
if parent and parent.parent.is_a?(Chapter) and parent.parent.num
|
82
|
+
id = "chapter-#{parent.parent.num}.#{id}"
|
83
|
+
end
|
84
|
+
|
85
|
+
b.part(id: id) { |b|
|
86
|
+
heading.to_xml(b)
|
87
|
+
sections.elements.each { |e| e.to_xml(b) }
|
88
|
+
}
|
89
|
+
else
|
90
|
+
# no parts
|
91
|
+
sections.elements.each { |e| e.to_xml(b) }
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class PartHeading < Treetop::Runtime::SyntaxNode
|
97
|
+
def num
|
98
|
+
part_heading_prefix.alphanums.text_value
|
99
|
+
end
|
100
|
+
|
101
|
+
def title
|
102
|
+
content.text_value
|
103
|
+
end
|
104
|
+
|
105
|
+
def to_xml(b)
|
106
|
+
b.num(num)
|
107
|
+
b.heading(title)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
class Chapter < Treetop::Runtime::SyntaxNode
|
112
|
+
def num
|
113
|
+
heading.empty? ? nil : heading.num
|
114
|
+
end
|
115
|
+
|
116
|
+
def to_xml(b)
|
117
|
+
# do we have a chapter heading?
|
118
|
+
if not heading.empty?
|
119
|
+
id = "chapter-#{num}"
|
120
|
+
|
121
|
+
# include a part number in the id if our parent has one
|
122
|
+
if parent and parent.parent.is_a?(Part) and parent.parent.num
|
123
|
+
id = "part-#{parent.parent.num}.#{id}"
|
124
|
+
end
|
125
|
+
|
126
|
+
b.chapter(id: id) { |b|
|
127
|
+
heading.to_xml(b)
|
128
|
+
parts.elements.each { |e| e.to_xml(b) }
|
129
|
+
}
|
130
|
+
else
|
131
|
+
# no chapters
|
132
|
+
parts.elements.each { |e| e.to_xml(b) }
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
class ChapterHeading < Treetop::Runtime::SyntaxNode
|
138
|
+
def num
|
139
|
+
chapter_heading_prefix.alphanums.text_value
|
140
|
+
end
|
141
|
+
|
142
|
+
def title
|
143
|
+
if self.respond_to? :heading
|
144
|
+
heading.content.text_value
|
145
|
+
elsif self.respond_to? :content
|
146
|
+
content.text_value
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def to_xml(b)
|
151
|
+
b.num(num)
|
152
|
+
b.heading(title) if title
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
class Section < Treetop::Runtime::SyntaxNode
|
157
|
+
def num
|
158
|
+
section_title.num
|
159
|
+
end
|
160
|
+
|
161
|
+
def title
|
162
|
+
section_title.title
|
163
|
+
end
|
164
|
+
|
165
|
+
def to_xml(b)
|
166
|
+
id = "section-#{num}"
|
167
|
+
b.section(id: id) { |b|
|
168
|
+
b.num("#{num}.")
|
169
|
+
b.heading(title)
|
170
|
+
|
171
|
+
idprefix = "#{id}."
|
172
|
+
|
173
|
+
subsections.elements.each_with_index { |e, i| e.to_xml(b, i, idprefix) }
|
174
|
+
}
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
class SectionTitleType1 < Treetop::Runtime::SyntaxNode
|
179
|
+
# a section title of the form:
|
180
|
+
#
|
181
|
+
# Definitions
|
182
|
+
# 1. In this by-law...
|
183
|
+
|
184
|
+
def num
|
185
|
+
section_title_prefix.number_letter.text_value
|
186
|
+
end
|
187
|
+
|
188
|
+
def title
|
189
|
+
content.text_value
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
class SectionTitleType2 < Treetop::Runtime::SyntaxNode
|
194
|
+
# a section title of the form:
|
195
|
+
#
|
196
|
+
# 1. Definitions
|
197
|
+
# In this by-law...
|
198
|
+
#
|
199
|
+
# In this format, the title is optional and the section content may
|
200
|
+
# start where we think the title is.
|
201
|
+
|
202
|
+
def num
|
203
|
+
section_title_prefix.number_letter.text_value
|
204
|
+
end
|
205
|
+
|
206
|
+
def title
|
207
|
+
section_title.empty? ? "" : section_title.content.text_value
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
class Subsection < Treetop::Runtime::SyntaxNode
|
212
|
+
def to_xml(b, i, idprefix)
|
213
|
+
if statement.is_a?(NumberedStatement)
|
214
|
+
attribs = {id: idprefix + statement.num.gsub(/[()]/, '')}
|
215
|
+
else
|
216
|
+
attribs = {id: idprefix + "subsection-#{i}"}
|
217
|
+
end
|
218
|
+
|
219
|
+
idprefix = attribs[:id] + "."
|
220
|
+
|
221
|
+
b.subsection(attribs) { |b|
|
222
|
+
b.num(statement.num) if statement.is_a?(NumberedStatement)
|
223
|
+
|
224
|
+
b.content { |b|
|
225
|
+
if blocklist and blocklist.is_a?(Blocklist)
|
226
|
+
if statement.content
|
227
|
+
blocklist.to_xml(b, i, idprefix) { |b| b << statement.content.text_value }
|
228
|
+
else
|
229
|
+
blocklist.to_xml(b, i, idprefix)
|
230
|
+
end
|
231
|
+
else
|
232
|
+
# raw content
|
233
|
+
b.p(statement.content.text_value) if statement.content
|
234
|
+
end
|
235
|
+
}
|
236
|
+
}
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
class NumberedStatement < Treetop::Runtime::SyntaxNode
|
241
|
+
def num
|
242
|
+
numbered_statement_prefix.num.text_value
|
243
|
+
end
|
244
|
+
|
245
|
+
def parentheses?
|
246
|
+
!numbered_statement_prefix.respond_to? :dotted_number_2
|
247
|
+
end
|
248
|
+
|
249
|
+
def content
|
250
|
+
if elements[3].text_value == ""
|
251
|
+
nil
|
252
|
+
else
|
253
|
+
elements[3].content
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
class NakedStatement < Treetop::Runtime::SyntaxNode
|
259
|
+
end
|
260
|
+
|
261
|
+
class Blocklist < Treetop::Runtime::SyntaxNode
|
262
|
+
# Render a block list to xml. If a block is given,
|
263
|
+
# yield to it a builder to insert a listIntroduction node
|
264
|
+
def to_xml(b, i, idprefix, &block)
|
265
|
+
id = idprefix + "list#{i}"
|
266
|
+
idprefix = id + '.'
|
267
|
+
|
268
|
+
b.blockList(id: id) { |b|
|
269
|
+
b.listIntroduction { |b| yield b } if block_given?
|
270
|
+
|
271
|
+
elements.each { |e| e.to_xml(b, idprefix) }
|
272
|
+
}
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
class BlocklistItem < Treetop::Runtime::SyntaxNode
|
277
|
+
def num
|
278
|
+
blocklist_item_prefix.text_value
|
279
|
+
end
|
280
|
+
|
281
|
+
def content
|
282
|
+
# TODO this really seems a bit odd
|
283
|
+
item_content.content.text_value if respond_to? :item_content and item_content.respond_to? :content
|
284
|
+
end
|
285
|
+
|
286
|
+
def to_xml(b, idprefix)
|
287
|
+
b.item(id: idprefix + num.gsub(/[()]/, '')) { |b|
|
288
|
+
b.num(num)
|
289
|
+
b.p(content) if content
|
290
|
+
}
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
class ScheduleContainer < Treetop::Runtime::SyntaxNode
|
295
|
+
def to_xml(b)
|
296
|
+
return if schedules.elements.empty?
|
297
|
+
|
298
|
+
b.components { |b|
|
299
|
+
b.component(id: 'component-0') { |b|
|
300
|
+
b.doc(name: 'schedules') { |b|
|
301
|
+
b.meta { |b|
|
302
|
+
b.identification(source: "#openbylaws") { |b|
|
303
|
+
b.FRBRWork { |b|
|
304
|
+
b.FRBRthis(value: '/za/by-law/locale/1980/name/main/schedules')
|
305
|
+
b.FRBRuri(value: '/za/by-law/locale/1980/name/schedules')
|
306
|
+
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
307
|
+
b.FRBRauthor(href: '#council', as: '#author')
|
308
|
+
b.FRBRcountry(value: 'za')
|
309
|
+
}
|
310
|
+
b.FRBRExpression { |b|
|
311
|
+
b.FRBRthis(value: '/za/by-law/locale/1980/name/main//schedules/eng@')
|
312
|
+
b.FRBRuri(value: '/za/by-law/locale/1980/name/schedules/eng@')
|
313
|
+
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
314
|
+
b.FRBRauthor(href: '#council', as: '#author')
|
315
|
+
b.FRBRlanguage(language: 'eng')
|
316
|
+
}
|
317
|
+
b.FRBRManifestation { |b|
|
318
|
+
b.FRBRthis(value: '/za/by-law/locale/1980/name/main/schedules/eng@')
|
319
|
+
b.FRBRuri(value: '/za/by-law/locale/1980/name/schedules/eng@')
|
320
|
+
b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
|
321
|
+
b.FRBRauthor(href: '#openbylaws', as: '#author')
|
322
|
+
}
|
323
|
+
}
|
324
|
+
}
|
325
|
+
|
326
|
+
b.mainBody { |b|
|
327
|
+
schedules.elements.each_with_index { |e, i| e.to_xml(b, i) }
|
328
|
+
}
|
329
|
+
}
|
330
|
+
}
|
331
|
+
}
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
class Schedule < Treetop::Runtime::SyntaxNode
|
336
|
+
def num
|
337
|
+
n = schedule_heading.num.text_value
|
338
|
+
return (n && !n.empty?) ? n : nil
|
339
|
+
end
|
340
|
+
|
341
|
+
def heading
|
342
|
+
if schedule_heading.schedule_title.respond_to? :content
|
343
|
+
schedule_heading.schedule_title.content.text_value
|
344
|
+
else
|
345
|
+
nil
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
def to_xml(b, i)
|
350
|
+
n = num
|
351
|
+
id = if n
|
352
|
+
"schedule-#{n}"
|
353
|
+
else
|
354
|
+
"schedules"
|
355
|
+
end
|
356
|
+
|
357
|
+
b.chapter(id: id) { |b|
|
358
|
+
b.num(num) if num
|
359
|
+
b.heading(heading) if heading
|
360
|
+
|
361
|
+
b.section(id: id + ".section-0") { |b|
|
362
|
+
b.content { |b|
|
363
|
+
statements.elements.each { |e| b.p(e.content.text_value) }
|
364
|
+
}
|
365
|
+
}
|
366
|
+
}
|
367
|
+
end
|
368
|
+
end
|
369
|
+
end
|
370
|
+
end
|
371
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Slaw
|
2
|
+
module Render
|
3
|
+
|
4
|
+
# Support for transforming XML AN documents into HTML.
|
5
|
+
class HTMLRenderer
|
6
|
+
def initialize
|
7
|
+
here = File.dirname(__FILE__)
|
8
|
+
|
9
|
+
@xslt = {
|
10
|
+
act: Nokogiri::XSLT(File.open(File.join([here, 'xsl/act.xsl']))),
|
11
|
+
fragment: Nokogiri::XSLT(File.open(File.join([here, 'xsl/fragment.xsl']))),
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
# Transform an entire XML document +doc+ (a Nokogiri::XML::Document object) into HTML.
|
16
|
+
# Specify +base_url+ to manage the base for relative URLs generated by
|
17
|
+
# the transform.
|
18
|
+
def render(doc, base_url='')
|
19
|
+
params = transform_params({'base_url' => base_url})
|
20
|
+
run_xslt(:act, doc, params)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Transform just a single node and its children into HTML.
|
24
|
+
#
|
25
|
+
# If +elem+ has an id, we use xpath to tell the XSLT which
|
26
|
+
# element to transform. Otherwise we copy the node into a new
|
27
|
+
# tree and apply the XSLT to that.
|
28
|
+
def render_node(node, base_url='')
|
29
|
+
params = transform_params({'base_url' => base_url})
|
30
|
+
|
31
|
+
if node.id
|
32
|
+
params += ['root_elem', "//*[@id='#{node.id}']"]
|
33
|
+
doc = node.document
|
34
|
+
else
|
35
|
+
# create a new document with just this element at the root
|
36
|
+
doc = Nokogiri::XML::Document.new
|
37
|
+
doc.root = node
|
38
|
+
params += ['root_elem', '*']
|
39
|
+
end
|
40
|
+
|
41
|
+
run_xslt(:fragment, doc, params)
|
42
|
+
end
|
43
|
+
|
44
|
+
def run_xslt(xslt, doc, params)
|
45
|
+
@xslt[xslt].transform(doc, params).to_s
|
46
|
+
end
|
47
|
+
|
48
|
+
def transform_params(params)
|
49
|
+
Nokogiri::XSLT.quote_params(params)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
|
3
|
+
xmlns:a="http://www.akomantoso.org/2.0"
|
4
|
+
exclude-result-prefixes="a">
|
5
|
+
|
6
|
+
<xsl:import href="elements.xsl" />
|
7
|
+
|
8
|
+
<xsl:output method="html" />
|
9
|
+
|
10
|
+
<xsl:template match="/">
|
11
|
+
<xsl:apply-templates select="a:akomaNtoso/a:act" />
|
12
|
+
</xsl:template>
|
13
|
+
|
14
|
+
</xsl:stylesheet>
|
15
|
+
|