slaw 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/slaw/act.rb +8 -1
- data/lib/slaw/parse/builder.rb +38 -22
- data/lib/slaw/version.rb +1 -1
- data/lib/slaw/{parse → za}/bylaw.treetop +1 -1
- data/lib/slaw/za/bylaw_generator.rb +41 -0
- data/lib/slaw/{parse/nodes.rb → za/bylaw_nodes.rb} +1 -1
- data/lib/slaw.rb +0 -2
- data/spec/parse/builder_spec.rb +3 -0
- data/spec/{parse → za}/bylaw_spec.rb +13 -11
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 405a0b941536c74c13588e1bfb4350c566337626
|
4
|
+
data.tar.gz: 809e1fd9fd4ada655d3531b4eb31702398490a42
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a636be697e3589db697232bc01876a864ee8c02eb4548232b9db8addc2c3d9fb0a5004ffeb94f12494e88889b2954c3edd61100a865ce9329b5eddad7381fbe8
|
7
|
+
data.tar.gz: 57e78d5489aa950436b2e7dc3ebe7d19a639b86c3f4fa3b95add5edfa9adbb0185b6f191a012f74ba4fc35100432096ac1ea09b928327afa214aaedd1a2c070c
|
data/lib/slaw/act.rb
CHANGED
@@ -63,7 +63,14 @@ module Slaw
|
|
63
63
|
# Parse the XML contained in the file-like object `io`
|
64
64
|
# @param io [file-like] io object with XML
|
65
65
|
def parse(io)
|
66
|
-
|
66
|
+
self.doc = Nokogiri::XML(io)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Set the XML document backing this bylaw.
|
70
|
+
#
|
71
|
+
# @param doc [Nokogiri::XML::Document] document
|
72
|
+
def doc=(doc)
|
73
|
+
@doc = doc
|
67
74
|
@meta = @doc.at_xpath('/a:akomaNtoso/a:act/a:meta', a: NS)
|
68
75
|
@body = @doc.at_xpath('/a:akomaNtoso/a:act/a:body', a: NS)
|
69
76
|
|
data/lib/slaw/parse/builder.rb
CHANGED
@@ -12,61 +12,77 @@ module Slaw
|
|
12
12
|
# XML document.
|
13
13
|
#
|
14
14
|
# @example Parse some text into a well-formed document
|
15
|
-
# builder = Slaw::Builder.new
|
15
|
+
# builder = Slaw::Builder.new(parser: parser)
|
16
16
|
# xml = builder.parse_text(text)
|
17
17
|
# doc = builder.parse_xml(xml)
|
18
18
|
# builder.postprocess(doc)
|
19
19
|
#
|
20
20
|
# @example A quicker way to build a well-formed document
|
21
|
-
# builder = Slaw::Builder.new
|
22
21
|
# doc = builder.parse_and_process_text(text)
|
23
22
|
#
|
24
23
|
class Builder
|
25
24
|
include Slaw::Namespace
|
26
25
|
include Slaw::Logging
|
27
26
|
|
28
|
-
|
27
|
+
@@parsers = {}
|
29
28
|
|
30
|
-
#
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
29
|
+
# Create a new builder.
|
30
|
+
#
|
31
|
+
# Specify either `:parser` or `:grammar_file` and `:grammar_class`.
|
32
|
+
#
|
33
|
+
# @option opts [Treetop::Runtime::CompiledParser] :parser parser to use
|
34
|
+
# @option opts [String] :grammar_file grammar filename to load a parser from
|
35
|
+
# @option opts [String] :grammar_class name of the class that the grammar will generate
|
36
|
+
def initialize(opts={})
|
37
|
+
if opts[:parser]
|
38
|
+
@parser = opts[:parser]
|
39
|
+
elsif opts[:grammar_file] and opts[:grammar_class]
|
40
|
+
if @@parsers[opts[:grammar_class]]
|
41
|
+
# already compiled the grammar, just use it
|
42
|
+
@parser = @@parsers[opts[:grammar_class]]
|
43
|
+
else
|
44
|
+
# load the grammar
|
45
|
+
Treetop.load(opts[:grammar_file])
|
46
|
+
cls = eval(opts[:grammar_class])
|
47
|
+
@parser = cls.new
|
48
|
+
end
|
49
|
+
else
|
50
|
+
raise ArgumentError.new("Specify either :parser or :grammar_file and :grammar_class")
|
51
|
+
end
|
35
52
|
end
|
36
53
|
|
37
54
|
# Do all the work necessary to parse text into a well-formed XML document.
|
38
55
|
#
|
39
56
|
# @param text [String] the text to parse
|
40
|
-
# @param
|
57
|
+
# @param parse_options [Hash] options to parse to the parser
|
41
58
|
#
|
42
59
|
# @return [Nokogiri::XML::Document] a well formed document
|
43
|
-
def parse_and_process_text(text,
|
44
|
-
postprocess(parse_xml(parse_text(text,
|
60
|
+
def parse_and_process_text(text, parse_options={})
|
61
|
+
postprocess(parse_xml(parse_text(text, parse_options)))
|
45
62
|
end
|
46
63
|
|
47
64
|
# Parse text into XML. You should still run {#postprocess} on the
|
48
65
|
# resulting XML to normalise it.
|
49
66
|
#
|
50
67
|
# @param text [String] the text to parse
|
51
|
-
# @param
|
68
|
+
# @param parse_options [Hash] options to parse to the parser
|
52
69
|
#
|
53
70
|
# @return [String] an XML string
|
54
|
-
def parse_text(text,
|
55
|
-
tree = text_to_syntax_tree(text,
|
71
|
+
def parse_text(text, parse_options={})
|
72
|
+
tree = text_to_syntax_tree(text, parse_options)
|
56
73
|
xml_from_syntax_tree(tree)
|
57
74
|
end
|
58
75
|
|
59
76
|
# Parse plain text into a syntax tree.
|
60
77
|
#
|
61
78
|
# @param text [String] the text to parse
|
62
|
-
# @param
|
79
|
+
# @param parse_options [Hash] options to parse to the parser
|
63
80
|
#
|
64
|
-
# @return [Object] the root of the resulting parse tree, usually a Treetop::
|
65
|
-
def text_to_syntax_tree(text,
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
tree = parser.parse(text, {root: root})
|
81
|
+
# @return [Object] the root of the resulting parse tree, usually a Treetop::Runtime::SyntaxNode object
|
82
|
+
def text_to_syntax_tree(text, parse_options={})
|
83
|
+
logger.info("Parsing...")
|
84
|
+
tree = @parser.parse(text, parse_options)
|
85
|
+
logger.info("Parsed!")
|
70
86
|
|
71
87
|
if tree.nil?
|
72
88
|
raise Slaw::Parse::ParseError.new(parser.failure_reason || "Couldn't match to grammar",
|
@@ -80,7 +96,7 @@ module Slaw
|
|
80
96
|
# Generate an XML document from the given syntax tree. You should still
|
81
97
|
# run {#postprocess} on the resulting XML to normalise it.
|
82
98
|
#
|
83
|
-
# @param tree [Object] a Treetop::
|
99
|
+
# @param tree [Object] a Treetop::Runtime::SyntaxNode object
|
84
100
|
#
|
85
101
|
# @return [String] an XML string
|
86
102
|
def xml_from_syntax_tree(tree)
|
data/lib/slaw/version.rb
CHANGED
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'slaw/za/bylaw_nodes'
|
2
|
+
|
3
|
+
module Slaw
|
4
|
+
# Support specifically for South Africa
|
5
|
+
module ZA
|
6
|
+
|
7
|
+
# Support class for generating South African bylaws
|
8
|
+
class BylawGenerator
|
9
|
+
Treetop.load(File.dirname(__FILE__) + "/bylaw.treetop")
|
10
|
+
|
11
|
+
# [Treetop::Runtime::CompiledParser] compiled bylaw parser
|
12
|
+
attr_accessor :parser
|
13
|
+
|
14
|
+
# [Slaw::Parse::Builder] builder used by the generator
|
15
|
+
attr_accessor :builder
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@parser = Slaw::ZA::BylawParser.new
|
19
|
+
@builder = Slaw::Parse::Builder.new(parser: @parser)
|
20
|
+
@cleanser = Slaw::Parse::Cleanser.new
|
21
|
+
end
|
22
|
+
|
23
|
+
# Generate a Slaw::Bylaw instance from plain text.
|
24
|
+
#
|
25
|
+
# @param text [String] plain text
|
26
|
+
#
|
27
|
+
# @return [Slaw::ByLaw] the resulting bylaw
|
28
|
+
def generate_from_text(text)
|
29
|
+
bylaw = Slaw::ByLaw.new
|
30
|
+
bylaw.doc = @builder.parse_and_process_text(cleanup(text))
|
31
|
+
bylaw
|
32
|
+
end
|
33
|
+
|
34
|
+
def cleanup(text)
|
35
|
+
text = @cleanser.cleanup(text)
|
36
|
+
text = @cleanser.reformat(text)
|
37
|
+
text
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/slaw.rb
CHANGED
data/spec/parse/builder_spec.rb
CHANGED
@@ -4,6 +4,9 @@ require 'spec_helper'
|
|
4
4
|
require 'slaw'
|
5
5
|
|
6
6
|
describe Slaw::Parse::Builder do
|
7
|
+
let(:parser) { double("parser") }
|
8
|
+
subject { Slaw::Parse::Builder.new(parser: parser) }
|
9
|
+
|
7
10
|
describe '#nest_blocklists' do
|
8
11
|
it 'should nest simple blocks' do
|
9
12
|
doc = xml2doc(subsection(<<XML
|
@@ -1,16 +1,18 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require 'slaw'
|
4
3
|
require 'builder'
|
5
4
|
|
6
|
-
|
5
|
+
require 'slaw'
|
6
|
+
require 'slaw/za/bylaw_generator'
|
7
|
+
|
8
|
+
describe Slaw::ZA::BylawGenerator do
|
7
9
|
def parse(rule, s)
|
8
|
-
subject.text_to_syntax_tree(s, rule)
|
10
|
+
subject.builder.text_to_syntax_tree(s, {root: rule})
|
9
11
|
end
|
10
12
|
|
11
13
|
def should_parse(rule, s)
|
12
14
|
s << "\n" unless s.end_with?("\n")
|
13
|
-
tree = subject.text_to_syntax_tree(s, rule)
|
15
|
+
tree = subject.builder.text_to_syntax_tree(s, {root: rule})
|
14
16
|
|
15
17
|
if not tree
|
16
18
|
raise Exception.new(subject.failure_reason || "Couldn't match to grammar") if tree.nil?
|
@@ -61,7 +63,7 @@ EOS
|
|
61
63
|
end
|
62
64
|
|
63
65
|
it 'should handle parts and odd section numbers' do
|
64
|
-
subject.
|
66
|
+
subject.parser.options = {section_number_after_title: false}
|
65
67
|
node = parse :bylaw, <<EOS
|
66
68
|
PART 1
|
67
69
|
PREVENTION AND SUPPRESSION OF HEALTH NUISANCES
|
@@ -232,7 +234,7 @@ EOS
|
|
232
234
|
|
233
235
|
context 'sections' do
|
234
236
|
it 'should handle section numbers after title' do
|
235
|
-
subject.
|
237
|
+
subject.parser.options = {section_number_after_title: true}
|
236
238
|
node = parse :bylaw, <<EOS
|
237
239
|
Section
|
238
240
|
1. (1) hello
|
@@ -244,7 +246,7 @@ EOS
|
|
244
246
|
end
|
245
247
|
|
246
248
|
it 'should handle section numbers before title' do
|
247
|
-
subject.
|
249
|
+
subject.parser.options = {section_number_after_title: false}
|
248
250
|
node = parse :bylaw, <<EOS
|
249
251
|
1. Section
|
250
252
|
(1) hello
|
@@ -256,7 +258,7 @@ EOS
|
|
256
258
|
end
|
257
259
|
|
258
260
|
it 'should handle section numbers without a dot' do
|
259
|
-
subject.
|
261
|
+
subject.parser.options = {section_number_after_title: false}
|
260
262
|
node = parse :bylaw, <<EOS
|
261
263
|
1 A section
|
262
264
|
(1) hello
|
@@ -274,7 +276,7 @@ EOS
|
|
274
276
|
end
|
275
277
|
|
276
278
|
it 'should handle sections without titles' do
|
277
|
-
subject.
|
279
|
+
subject.parser.options = {section_number_after_title: false}
|
278
280
|
node = parse :bylaw, <<EOS
|
279
281
|
1. No owner or occupier of any shop or business premises or vacant land, blah blah
|
280
282
|
2. Notwithstanding the provision of any other By-law or legislation no person shall—
|
@@ -291,7 +293,7 @@ EOS
|
|
291
293
|
end
|
292
294
|
|
293
295
|
it 'should handle sections without titles and with subsections' do
|
294
|
-
subject.
|
296
|
+
subject.parser.options = {section_number_after_title: false}
|
295
297
|
node = parse :bylaw, <<EOS
|
296
298
|
10. (1) Transporters must remove medical waste.
|
297
299
|
(2) Without limiting generality, stuff.
|
@@ -305,7 +307,7 @@ EOS
|
|
305
307
|
end
|
306
308
|
|
307
309
|
it 'should realise complex section titles are actually section content' do
|
308
|
-
subject.
|
310
|
+
subject.parser.options = {section_number_after_title: false}
|
309
311
|
node = parse :bylaw, <<EOS
|
310
312
|
10. The owner of any premises which is let or sublet to more than one tenant, shall maintain at all times in a clean and sanitary condition every part of such premises as may be used in common by more than one tenant.
|
311
313
|
11. No person shall keep, cause or suffer to be kept any factory or trade premises so as to cause or give rise to smells or effluvia that constitute a health nuisance.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -132,24 +132,25 @@ files:
|
|
132
132
|
- lib/slaw/namespace.rb
|
133
133
|
- lib/slaw/parse/blocklists.rb
|
134
134
|
- lib/slaw/parse/builder.rb
|
135
|
-
- lib/slaw/parse/bylaw.treetop
|
136
135
|
- lib/slaw/parse/cleanser.rb
|
137
136
|
- lib/slaw/parse/error.rb
|
138
137
|
- lib/slaw/parse/grammar_helpers.rb
|
139
|
-
- lib/slaw/parse/nodes.rb
|
140
138
|
- lib/slaw/render/html.rb
|
141
139
|
- lib/slaw/render/xsl/act.xsl
|
142
140
|
- lib/slaw/render/xsl/elements.xsl
|
143
141
|
- lib/slaw/render/xsl/fragment.xsl
|
144
142
|
- lib/slaw/version.rb
|
145
143
|
- lib/slaw/xml_support.rb
|
144
|
+
- lib/slaw/za/bylaw.treetop
|
145
|
+
- lib/slaw/za/bylaw_generator.rb
|
146
|
+
- lib/slaw/za/bylaw_nodes.rb
|
146
147
|
- slaw.gemspec
|
147
148
|
- spec/extract/extractor_spec.rb
|
148
149
|
- spec/parse/builder_spec.rb
|
149
|
-
- spec/parse/bylaw_spec.rb
|
150
150
|
- spec/parse/cleanser_spec.rb
|
151
151
|
- spec/spec_helper.rb
|
152
152
|
- spec/xml_helpers.rb
|
153
|
+
- spec/za/bylaw_spec.rb
|
153
154
|
homepage: ''
|
154
155
|
licenses:
|
155
156
|
- MIT
|
@@ -177,7 +178,7 @@ summary: A lightweight library for using Akoma Ntoso acts in Ruby.
|
|
177
178
|
test_files:
|
178
179
|
- spec/extract/extractor_spec.rb
|
179
180
|
- spec/parse/builder_spec.rb
|
180
|
-
- spec/parse/bylaw_spec.rb
|
181
181
|
- spec/parse/cleanser_spec.rb
|
182
182
|
- spec/spec_helper.rb
|
183
183
|
- spec/xml_helpers.rb
|
184
|
+
- spec/za/bylaw_spec.rb
|