slaw 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/slaw/act.rb +8 -1
- data/lib/slaw/parse/builder.rb +38 -22
- data/lib/slaw/version.rb +1 -1
- data/lib/slaw/{parse → za}/bylaw.treetop +1 -1
- data/lib/slaw/za/bylaw_generator.rb +41 -0
- data/lib/slaw/{parse/nodes.rb → za/bylaw_nodes.rb} +1 -1
- data/lib/slaw.rb +0 -2
- data/spec/parse/builder_spec.rb +3 -0
- data/spec/{parse → za}/bylaw_spec.rb +13 -11
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 405a0b941536c74c13588e1bfb4350c566337626
|
4
|
+
data.tar.gz: 809e1fd9fd4ada655d3531b4eb31702398490a42
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a636be697e3589db697232bc01876a864ee8c02eb4548232b9db8addc2c3d9fb0a5004ffeb94f12494e88889b2954c3edd61100a865ce9329b5eddad7381fbe8
|
7
|
+
data.tar.gz: 57e78d5489aa950436b2e7dc3ebe7d19a639b86c3f4fa3b95add5edfa9adbb0185b6f191a012f74ba4fc35100432096ac1ea09b928327afa214aaedd1a2c070c
|
data/lib/slaw/act.rb
CHANGED
@@ -63,7 +63,14 @@ module Slaw
|
|
63
63
|
# Parse the XML contained in the file-like object `io`
|
64
64
|
# @param io [file-like] io object with XML
|
65
65
|
def parse(io)
|
66
|
-
|
66
|
+
self.doc = Nokogiri::XML(io)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Set the XML document backing this bylaw.
|
70
|
+
#
|
71
|
+
# @param doc [Nokogiri::XML::Document] document
|
72
|
+
def doc=(doc)
|
73
|
+
@doc = doc
|
67
74
|
@meta = @doc.at_xpath('/a:akomaNtoso/a:act/a:meta', a: NS)
|
68
75
|
@body = @doc.at_xpath('/a:akomaNtoso/a:act/a:body', a: NS)
|
69
76
|
|
data/lib/slaw/parse/builder.rb
CHANGED
@@ -12,61 +12,77 @@ module Slaw
|
|
12
12
|
# XML document.
|
13
13
|
#
|
14
14
|
# @example Parse some text into a well-formed document
|
15
|
-
# builder = Slaw::Builder.new
|
15
|
+
# builder = Slaw::Builder.new(parser: parser)
|
16
16
|
# xml = builder.parse_text(text)
|
17
17
|
# doc = builder.parse_xml(xml)
|
18
18
|
# builder.postprocess(doc)
|
19
19
|
#
|
20
20
|
# @example A quicker way to build a well-formed document
|
21
|
-
# builder = Slaw::Builder.new
|
22
21
|
# doc = builder.parse_and_process_text(text)
|
23
22
|
#
|
24
23
|
class Builder
|
25
24
|
include Slaw::Namespace
|
26
25
|
include Slaw::Logging
|
27
26
|
|
28
|
-
|
27
|
+
@@parsers = {}
|
29
28
|
|
30
|
-
#
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
29
|
+
# Create a new builder.
|
30
|
+
#
|
31
|
+
# Specify either `:parser` or `:grammar_file` and `:grammar_class`.
|
32
|
+
#
|
33
|
+
# @option opts [Treetop::Runtime::CompiledParser] :parser parser to use
|
34
|
+
# @option opts [String] :grammar_file grammar filename to load a parser from
|
35
|
+
# @option opts [String] :grammar_class name of the class that the grammar will generate
|
36
|
+
def initialize(opts={})
|
37
|
+
if opts[:parser]
|
38
|
+
@parser = opts[:parser]
|
39
|
+
elsif opts[:grammar_file] and opts[:grammar_class]
|
40
|
+
if @@parsers[opts[:grammar_class]]
|
41
|
+
# already compiled the grammar, just use it
|
42
|
+
@parser = @@parsers[opts[:grammar_class]]
|
43
|
+
else
|
44
|
+
# load the grammar
|
45
|
+
Treetop.load(opts[:grammar_file])
|
46
|
+
cls = eval(opts[:grammar_class])
|
47
|
+
@parser = cls.new
|
48
|
+
end
|
49
|
+
else
|
50
|
+
raise ArgumentError.new("Specify either :parser or :grammar_file and :grammar_class")
|
51
|
+
end
|
35
52
|
end
|
36
53
|
|
37
54
|
# Do all the work necessary to parse text into a well-formed XML document.
|
38
55
|
#
|
39
56
|
# @param text [String] the text to parse
|
40
|
-
# @param
|
57
|
+
# @param parse_options [Hash] options to parse to the parser
|
41
58
|
#
|
42
59
|
# @return [Nokogiri::XML::Document] a well formed document
|
43
|
-
def parse_and_process_text(text,
|
44
|
-
postprocess(parse_xml(parse_text(text,
|
60
|
+
def parse_and_process_text(text, parse_options={})
|
61
|
+
postprocess(parse_xml(parse_text(text, parse_options)))
|
45
62
|
end
|
46
63
|
|
47
64
|
# Parse text into XML. You should still run {#postprocess} on the
|
48
65
|
# resulting XML to normalise it.
|
49
66
|
#
|
50
67
|
# @param text [String] the text to parse
|
51
|
-
# @param
|
68
|
+
# @param parse_options [Hash] options to parse to the parser
|
52
69
|
#
|
53
70
|
# @return [String] an XML string
|
54
|
-
def parse_text(text,
|
55
|
-
tree = text_to_syntax_tree(text,
|
71
|
+
def parse_text(text, parse_options={})
|
72
|
+
tree = text_to_syntax_tree(text, parse_options)
|
56
73
|
xml_from_syntax_tree(tree)
|
57
74
|
end
|
58
75
|
|
59
76
|
# Parse plain text into a syntax tree.
|
60
77
|
#
|
61
78
|
# @param text [String] the text to parse
|
62
|
-
# @param
|
79
|
+
# @param parse_options [Hash] options to parse to the parser
|
63
80
|
#
|
64
|
-
# @return [Object] the root of the resulting parse tree, usually a Treetop::
|
65
|
-
def text_to_syntax_tree(text,
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
tree = parser.parse(text, {root: root})
|
81
|
+
# @return [Object] the root of the resulting parse tree, usually a Treetop::Runtime::SyntaxNode object
|
82
|
+
def text_to_syntax_tree(text, parse_options={})
|
83
|
+
logger.info("Parsing...")
|
84
|
+
tree = @parser.parse(text, parse_options)
|
85
|
+
logger.info("Parsed!")
|
70
86
|
|
71
87
|
if tree.nil?
|
72
88
|
raise Slaw::Parse::ParseError.new(parser.failure_reason || "Couldn't match to grammar",
|
@@ -80,7 +96,7 @@ module Slaw
|
|
80
96
|
# Generate an XML document from the given syntax tree. You should still
|
81
97
|
# run {#postprocess} on the resulting XML to normalise it.
|
82
98
|
#
|
83
|
-
# @param tree [Object] a Treetop::
|
99
|
+
# @param tree [Object] a Treetop::Runtime::SyntaxNode object
|
84
100
|
#
|
85
101
|
# @return [String] an XML string
|
86
102
|
def xml_from_syntax_tree(tree)
|
data/lib/slaw/version.rb
CHANGED
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'slaw/za/bylaw_nodes'
|
2
|
+
|
3
|
+
module Slaw
|
4
|
+
# Support specifically for South Africa
|
5
|
+
module ZA
|
6
|
+
|
7
|
+
# Support class for generating South African bylaws
|
8
|
+
class BylawGenerator
|
9
|
+
Treetop.load(File.dirname(__FILE__) + "/bylaw.treetop")
|
10
|
+
|
11
|
+
# [Treetop::Runtime::CompiledParser] compiled bylaw parser
|
12
|
+
attr_accessor :parser
|
13
|
+
|
14
|
+
# [Slaw::Parse::Builder] builder used by the generator
|
15
|
+
attr_accessor :builder
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@parser = Slaw::ZA::BylawParser.new
|
19
|
+
@builder = Slaw::Parse::Builder.new(parser: @parser)
|
20
|
+
@cleanser = Slaw::Parse::Cleanser.new
|
21
|
+
end
|
22
|
+
|
23
|
+
# Generate a Slaw::Bylaw instance from plain text.
|
24
|
+
#
|
25
|
+
# @param text [String] plain text
|
26
|
+
#
|
27
|
+
# @return [Slaw::ByLaw] the resulting bylaw
|
28
|
+
def generate_from_text(text)
|
29
|
+
bylaw = Slaw::ByLaw.new
|
30
|
+
bylaw.doc = @builder.parse_and_process_text(cleanup(text))
|
31
|
+
bylaw
|
32
|
+
end
|
33
|
+
|
34
|
+
def cleanup(text)
|
35
|
+
text = @cleanser.cleanup(text)
|
36
|
+
text = @cleanser.reformat(text)
|
37
|
+
text
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/slaw.rb
CHANGED
data/spec/parse/builder_spec.rb
CHANGED
@@ -4,6 +4,9 @@ require 'spec_helper'
|
|
4
4
|
require 'slaw'
|
5
5
|
|
6
6
|
describe Slaw::Parse::Builder do
|
7
|
+
let(:parser) { double("parser") }
|
8
|
+
subject { Slaw::Parse::Builder.new(parser: parser) }
|
9
|
+
|
7
10
|
describe '#nest_blocklists' do
|
8
11
|
it 'should nest simple blocks' do
|
9
12
|
doc = xml2doc(subsection(<<XML
|
@@ -1,16 +1,18 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require 'slaw'
|
4
3
|
require 'builder'
|
5
4
|
|
6
|
-
|
5
|
+
require 'slaw'
|
6
|
+
require 'slaw/za/bylaw_generator'
|
7
|
+
|
8
|
+
describe Slaw::ZA::BylawGenerator do
|
7
9
|
def parse(rule, s)
|
8
|
-
subject.text_to_syntax_tree(s, rule)
|
10
|
+
subject.builder.text_to_syntax_tree(s, {root: rule})
|
9
11
|
end
|
10
12
|
|
11
13
|
def should_parse(rule, s)
|
12
14
|
s << "\n" unless s.end_with?("\n")
|
13
|
-
tree = subject.text_to_syntax_tree(s, rule)
|
15
|
+
tree = subject.builder.text_to_syntax_tree(s, {root: rule})
|
14
16
|
|
15
17
|
if not tree
|
16
18
|
raise Exception.new(subject.failure_reason || "Couldn't match to grammar") if tree.nil?
|
@@ -61,7 +63,7 @@ EOS
|
|
61
63
|
end
|
62
64
|
|
63
65
|
it 'should handle parts and odd section numbers' do
|
64
|
-
subject.
|
66
|
+
subject.parser.options = {section_number_after_title: false}
|
65
67
|
node = parse :bylaw, <<EOS
|
66
68
|
PART 1
|
67
69
|
PREVENTION AND SUPPRESSION OF HEALTH NUISANCES
|
@@ -232,7 +234,7 @@ EOS
|
|
232
234
|
|
233
235
|
context 'sections' do
|
234
236
|
it 'should handle section numbers after title' do
|
235
|
-
subject.
|
237
|
+
subject.parser.options = {section_number_after_title: true}
|
236
238
|
node = parse :bylaw, <<EOS
|
237
239
|
Section
|
238
240
|
1. (1) hello
|
@@ -244,7 +246,7 @@ EOS
|
|
244
246
|
end
|
245
247
|
|
246
248
|
it 'should handle section numbers before title' do
|
247
|
-
subject.
|
249
|
+
subject.parser.options = {section_number_after_title: false}
|
248
250
|
node = parse :bylaw, <<EOS
|
249
251
|
1. Section
|
250
252
|
(1) hello
|
@@ -256,7 +258,7 @@ EOS
|
|
256
258
|
end
|
257
259
|
|
258
260
|
it 'should handle section numbers without a dot' do
|
259
|
-
subject.
|
261
|
+
subject.parser.options = {section_number_after_title: false}
|
260
262
|
node = parse :bylaw, <<EOS
|
261
263
|
1 A section
|
262
264
|
(1) hello
|
@@ -274,7 +276,7 @@ EOS
|
|
274
276
|
end
|
275
277
|
|
276
278
|
it 'should handle sections without titles' do
|
277
|
-
subject.
|
279
|
+
subject.parser.options = {section_number_after_title: false}
|
278
280
|
node = parse :bylaw, <<EOS
|
279
281
|
1. No owner or occupier of any shop or business premises or vacant land, blah blah
|
280
282
|
2. Notwithstanding the provision of any other By-law or legislation no person shall—
|
@@ -291,7 +293,7 @@ EOS
|
|
291
293
|
end
|
292
294
|
|
293
295
|
it 'should handle sections without titles and with subsections' do
|
294
|
-
subject.
|
296
|
+
subject.parser.options = {section_number_after_title: false}
|
295
297
|
node = parse :bylaw, <<EOS
|
296
298
|
10. (1) Transporters must remove medical waste.
|
297
299
|
(2) Without limiting generality, stuff.
|
@@ -305,7 +307,7 @@ EOS
|
|
305
307
|
end
|
306
308
|
|
307
309
|
it 'should realise complex section titles are actually section content' do
|
308
|
-
subject.
|
310
|
+
subject.parser.options = {section_number_after_title: false}
|
309
311
|
node = parse :bylaw, <<EOS
|
310
312
|
10. The owner of any premises which is let or sublet to more than one tenant, shall maintain at all times in a clean and sanitary condition every part of such premises as may be used in common by more than one tenant.
|
311
313
|
11. No person shall keep, cause or suffer to be kept any factory or trade premises so as to cause or give rise to smells or effluvia that constitute a health nuisance.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -132,24 +132,25 @@ files:
|
|
132
132
|
- lib/slaw/namespace.rb
|
133
133
|
- lib/slaw/parse/blocklists.rb
|
134
134
|
- lib/slaw/parse/builder.rb
|
135
|
-
- lib/slaw/parse/bylaw.treetop
|
136
135
|
- lib/slaw/parse/cleanser.rb
|
137
136
|
- lib/slaw/parse/error.rb
|
138
137
|
- lib/slaw/parse/grammar_helpers.rb
|
139
|
-
- lib/slaw/parse/nodes.rb
|
140
138
|
- lib/slaw/render/html.rb
|
141
139
|
- lib/slaw/render/xsl/act.xsl
|
142
140
|
- lib/slaw/render/xsl/elements.xsl
|
143
141
|
- lib/slaw/render/xsl/fragment.xsl
|
144
142
|
- lib/slaw/version.rb
|
145
143
|
- lib/slaw/xml_support.rb
|
144
|
+
- lib/slaw/za/bylaw.treetop
|
145
|
+
- lib/slaw/za/bylaw_generator.rb
|
146
|
+
- lib/slaw/za/bylaw_nodes.rb
|
146
147
|
- slaw.gemspec
|
147
148
|
- spec/extract/extractor_spec.rb
|
148
149
|
- spec/parse/builder_spec.rb
|
149
|
-
- spec/parse/bylaw_spec.rb
|
150
150
|
- spec/parse/cleanser_spec.rb
|
151
151
|
- spec/spec_helper.rb
|
152
152
|
- spec/xml_helpers.rb
|
153
|
+
- spec/za/bylaw_spec.rb
|
153
154
|
homepage: ''
|
154
155
|
licenses:
|
155
156
|
- MIT
|
@@ -177,7 +178,7 @@ summary: A lightweight library for using Akoma Ntoso acts in Ruby.
|
|
177
178
|
test_files:
|
178
179
|
- spec/extract/extractor_spec.rb
|
179
180
|
- spec/parse/builder_spec.rb
|
180
|
-
- spec/parse/bylaw_spec.rb
|
181
181
|
- spec/parse/cleanser_spec.rb
|
182
182
|
- spec/spec_helper.rb
|
183
183
|
- spec/xml_helpers.rb
|
184
|
+
- spec/za/bylaw_spec.rb
|